From 714518e550f3c6ee36fa6e3e2447b943898a2ac5 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 11 Apr 2002 09:56:38 +0000 Subject: this adds a completely new hash based mangling scheme the hash for this scheme is *much* larger (approximately 31 bits) and the code is written to be very fast, correctly handling multibyte while not doing any actual multi-byte conversions in the vast majority of cases you can select this scheme using "mangling method = hash2", although I may make it the default if it works out well. (This used to be commit bb173c1a7e2408ced967ebac40b5e3f852ccd3a1) --- source3/smbd/mangle_hash2.c | 510 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 510 insertions(+) create mode 100644 source3/smbd/mangle_hash2.c (limited to 'source3/smbd/mangle_hash2.c') diff --git a/source3/smbd/mangle_hash2.c b/source3/smbd/mangle_hash2.c new file mode 100644 index 0000000000..661ae7eb44 --- /dev/null +++ b/source3/smbd/mangle_hash2.c @@ -0,0 +1,510 @@ +/* + Unix SMB/CIFS implementation. + new hash based name mangling implementation + Copyright (C) Andrew Tridgell 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + this mangling scheme uses the following format + + Annnn~n.AAA + + where nnnnn is a base 36 hash, and A represents characters from the original string + + The hash is taken of the leading part of the long filename, in uppercase + + for simplicity, we only allow ascii characters in 8.3 names + */ + + +/* + =============================================================================== + NOTE NOTE NOTE!!! + + This file deliberately uses non-multibyte string functions in many places. This + is *not* a mistake. This code is multi-byte safe, but it gets this property + through some very subtle knowledge of the way multi-byte strings are encoded + and the fact that this mangling algorithm only supports ascii characters in + 8.3 names. + + please don't convert this file to use the *_m() functions!! + =============================================================================== +*/ + + +#include "includes.h" + + +#define FLAG_BASECHAR 1 +#define FLAG_ASCII 2 +#define FLAG_ILLEGAL 4 +#define FLAG_POSSIBLE1 8 +#define FLAG_POSSIBLE2 16 +#define FLAG_POSSIBLE3 32 + +#define CACHE_SIZE 8192 + +/* these tables are used to provide fast tests for characters */ +static unsigned char char_flags[256]; + +/* we will use a very simple direct mapped prefix cache. The big + advantage of this cache structure is speed and low memory usage + + The cache is indexed by the low-order bits of the hash, and confirmed by + hashing the resulting cache entry to match the known hash +*/ +static char **prefix_cache; + +/* these are the characters we use in the 8.3 hash. Must be 36 chars long */ +const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static unsigned char base_reverse[256]; +#define base_forward(v) basechars[v] + +/* the list of reserved dos names - all of these are illegal */ +const char *reserved_names[] = { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4", + "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL }; + +/* + hash a string of the specified length. The string does not need to be + null terminated + + this hash needs to be fast with a low collision rate (what hash doesn't?) +*/ +static u32 mangle_hash(const char *key, unsigned length) +{ + u32 value; + u32 i; + fstring str; + + strncpy(str, key, length); + str[length] = 0; + strupper_m(str); + + /* the length of a multi-byte string can change after a strupper */ + length = strlen(str); + + /* Set the initial value from the key size. */ + for (value = 0x238F13AF * length, i=0; i < length; i++) { + value = (value + (((unsigned char)str[i]) << (i*5 % 24))); + } + + return (1103515243 * value + 12345); +} + +/* + initialise the prefix cache + */ +static BOOL cache_init(void) +{ + if (prefix_cache) return True; + + prefix_cache = malloc(sizeof(char *) * CACHE_SIZE); + if (!prefix_cache) return False; + + memset(prefix_cache, 0, sizeof(char *) * CACHE_SIZE); + return True; +} + +/* + insert an entry into the prefix cache. The string may not be null terminated +*/ +static void cache_insert(const char *prefix, int length, u32 hash) +{ + int i = hash % CACHE_SIZE; + + if (prefix_cache[i]) { + free(prefix_cache[i]); + } + + prefix_cache[i] = strndup(prefix, length); +} + +/* + lookup an entry in the prefix cache. Return NULL if not found. +*/ +static const char *cache_lookup(u32 hash) +{ + int i = hash % CACHE_SIZE; + + if (!prefix_cache[i]) return NULL; + + /* we have a possible match - compute the hash to confirm */ + if (hash != mangle_hash(prefix_cache[i], strlen(prefix_cache[i]))) { + return NULL; + } + + /* yep, it matched */ + return prefix_cache[i]; +} + + +/* + determine if a string is possibly in a mangled format, ignoring + case + + In this algorithm, mangled names use only pure ascii characters (no + multi-byte) so we can avoid doing a UCS2 conversion +*/ +static BOOL is_mangled(const char *name) +{ + /* the best distinguishing characteristic is the ~ */ + if (name[6] != '~') return False; + + /* check extension */ + /* check first character */ + /* check rest of hash */ + + return True; +} + + +/* + see if a filename is an allowable 8.3 name. + + we are only going to allow ascii characters in 8.3 names, as this + simplifies things greatly (it means that we know the string won't + get larger when converted from UNIX to DOS formats) +*/ +static BOOL is_8_3(const char *name, BOOL check_case) +{ + int len, i; + char *dot_p; + + /* as a special case, the names '.' and '..' are allowable 8.3 names */ + if (name[0] == '.') { + if (!name[1] || (name[1] == '.' && !name[2])) { + return True; + } + } + + /* the simplest test is on the overall length of the + filename. Note that we deliberately use the ascii string + length (not the multi-byte one) as it is faster, and gives us + the result we need in this case. Using strlen_m would not + only be slower, it would be incorrect */ + len = strlen(name); + if (len > 12) return False; + + /* find the '.'. Note that once again we use the non-multibyte + function */ + dot_p = strchr(name, '.'); + + if (!dot_p) { + /* if the name doesn't contain a '.' then its length + must be less than 8 */ + if (len > 8) { + return False; + } + } else { + int prefix_len, suffix_len; + + /* if it does contain a dot then the prefix must be <= + 8 and the suffix <= 3 in length */ + prefix_len = PTR_DIFF(dot_p, name); + suffix_len = len - (prefix_len+1); + + if (prefix_len > 8 || suffix_len > 3) { + return False; + } + + /* a 8.3 name cannot contain more than 1 '.' */ + if (strchr(dot_p+1, '.')) { + return False; + } + } + + /* the length are all OK. Now check to see if the characters themselves are OK */ + for (i=0; name[i]; i++) { + if (!(char_flags[(unsigned)(name[i])] & FLAG_ASCII)) { + return False; + } + } + + /* it is a good 8.3 name */ + return True; +} + + +/* + reset the mangling cache on a smb.conf reload. This only really makes sense for + mangling backends that have parameters in smb.conf, and as this backend doesn't + this is a NULL operation +*/ +static void mangle_reset(void) +{ + /* noop */ +} + + +/* + try to find a 8.3 name in the cache, and if found then + replace the string with the original long name. + + The filename must be able to hold at least sizeof(fstring) +*/ +static BOOL check_cache(char *name) +{ + u32 hash, multiplier; + int i; + const char *prefix; + char extension[4]; + + /* make sure that this is a mangled name from this cache */ + if (!is_mangled(name)) { + return False; + } + + /* we need to extract the hash from the 8.3 name */ + hash = base_reverse[(unsigned char)name[7]]; + for (multiplier=36, i=5;i>=1;i--) { + u32 v = base_reverse[(unsigned char)name[i]]; + hash += multiplier * v; + multiplier *= 36; + } + + /* now look in the prefix cache for that hash */ + prefix = cache_lookup(hash); + if (!prefix) { + return False; + } + + /* we found it - construct the full name */ + strncpy(extension, name+9, 3); + + if (extension[0]) { + slprintf(name, sizeof(fstring), "%s.%s", prefix, extension); + } else { + fstrcpy(name, prefix); + } + return True; +} + + +/* + look for a DOS reserved name +*/ +static BOOL is_reserved_name(const char *name) +{ + if ((char_flags[(unsigned char)name[0]] & FLAG_POSSIBLE1) && + (char_flags[(unsigned char)name[1]] & FLAG_POSSIBLE2) && + (char_flags[(unsigned char)name[2]] & FLAG_POSSIBLE3)) { + /* a likely match, scan the lot */ + int i; + for (i=0; reserved_names[i]; i++) { + int len = strlen(reserved_names[i]); + /* note that we match on COM1 as well as COM1.foo */ + if (strncasecmp(name, reserved_names[i], len) == 0 && + (name[len] == '.' || name[len] == 0)) { + return True; + } + } + } + + return False; +} + +/* + see if a filename is a legal long filename +*/ +static BOOL is_legal_name(const char *name) +{ + while (*name) { + if (char_flags[(unsigned char)*name] & FLAG_ILLEGAL) { + return False; + } + name++; + } + + return True; +} + +/* + the main forward mapping function, which converts a long filename to + a 8.3 name + + if need83 is not set then we only do the mangling if the name is illegal + as a long name + + if cache83 is not set then we don't cache the result + + the name parameter must be able to hold 13 bytes +*/ +static BOOL name_map(char *name, BOOL need83, BOOL cache83) +{ + char *dot_p; + char lead_char; + char extension[4]; + int extension_length, i; + int prefix_len; + u32 hash, v; + char new_name[13]; + + /* reserved names are handled specially */ + if (!is_reserved_name(name)) { + /* if the name is already a valid 8.3 name then we don't need to + do anything */ + if (is_8_3(name, False)) { + return True; + } + + /* if the caller doesn't strictly need 8.3 then just check for illegal + filenames */ + if (!need83 && is_legal_name(name)) { + return True; + } + } + + /* find the '.' if any */ + dot_p = strchr(name, '.'); + + /* the leading character in the mangled name is taken from + the first character of the name, if it is ascii + otherwise '_' is used + */ + lead_char = name[0]; + if (! (char_flags[(unsigned char)lead_char] & FLAG_ASCII)) { + lead_char = '_'; + } + + /* the prefix is anything up to the first dot */ + if (dot_p) { + prefix_len = PTR_DIFF(dot_p, name); + } else { + prefix_len = strlen(name); + } + + /* the extension of the mangled name is taken from the first 3 + ascii chars after the dot */ + extension_length = 0; + if (dot_p) { + for (i=1; extension_length < 3 && dot_p[i]; i++) { + char c = dot_p[i]; + if (char_flags[(unsigned char)c] & FLAG_ASCII) { + extension[extension_length++] = c; + } + } + } + + /* find the hash for this prefix */ + v = hash = mangle_hash(name, prefix_len); + + /* now form the mangled name */ + new_name[0] = lead_char; + new_name[7] = base_forward(v % 36); + new_name[6] = '~'; + for (i=5; i>=1; i--) { + v = v / 36; + new_name[i] = base_forward(v % 36); + } + + /* add the extension */ + if (extension_length) { + new_name[8] = '.'; + memcpy(&new_name[9], extension, extension_length); + new_name[9+extension_length] = 0; + } else { + new_name[8] = 0; + } + + if (cache83) { + /* put it in the cache */ + cache_insert(name, prefix_len, hash); + } + + /* and overwrite the old name */ + fstrcpy(name, new_name); + + /* all done, we've managed to mangle it */ + return True; +} + + +/* initialise the flags table + + we allow only a very restricted set of characters as 'ascii' in this + mangling backend. This isn't a significant problem as modern clients + use the 'long' filenames anyway, and those don't have these + restrictions. +*/ +static void init_tables(void) +{ + int i; + + memset(char_flags, 0, sizeof(char_flags)); + + for (i=0;i<128;i++) { + if ((i >= '0' && i <= '9') || + (i >= 'a' && i <= 'z') || + (i >= 'A' && i <= 'Z')) { + char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR); + } + if (strchr("._-$~", i)) { + char_flags[i] |= FLAG_ASCII; + } + + if (strchr("*\\/?<>|\":", i)) { + char_flags[i] |= FLAG_ILLEGAL; + } + } + + memset(base_reverse, 0, sizeof(base_reverse)); + for (i=0;i<36;i++) { + base_reverse[(unsigned char)base_forward(i)] = i; + } + + /* fill in the reserved names flags. These are used as a very + fast filter for finding possible DOS reserved filenames */ + for (i=0; reserved_names[i]; i++) { + unsigned char c1, c2, c3; + + c1 = (unsigned char)reserved_names[i][0]; + c2 = (unsigned char)reserved_names[i][1]; + c3 = (unsigned char)reserved_names[i][2]; + + char_flags[c1] |= FLAG_POSSIBLE1; + char_flags[c2] |= FLAG_POSSIBLE2; + char_flags[c3] |= FLAG_POSSIBLE3; + char_flags[tolower(c1)] |= FLAG_POSSIBLE1; + char_flags[tolower(c2)] |= FLAG_POSSIBLE2; + char_flags[tolower(c3)] |= FLAG_POSSIBLE3; + } +} + + +/* + the following provides the abstraction layer to make it easier + to drop in an alternative mangling implementation */ +static struct mangle_fns mangle_fns = { + is_mangled, + is_8_3, + mangle_reset, + check_cache, + name_map +}; + +/* return the methods for this mangling implementation */ +struct mangle_fns *mangle_hash2_init(void) +{ + init_tables(); + mangle_reset(); + + if (!cache_init()) { + return NULL; + } + + return &mangle_fns; +} -- cgit