diff options
Diffstat (limited to 'source3/smbd/mangle_hash2.c')
-rw-r--r-- | source3/smbd/mangle_hash2.c | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/source3/smbd/mangle_hash2.c b/source3/smbd/mangle_hash2.c new file mode 100644 index 0000000000..96ca7360b8 --- /dev/null +++ b/source3/smbd/mangle_hash2.c @@ -0,0 +1,587 @@ +/* + Unix SMB/CIFS implementation. + new hash based name mangling implementation + Copyright (C) Andrew Tridgell 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + this mangling scheme uses the following format + + Annnn~n.AAA + + where nnnnn is a base 36 hash, and A represents characters from the original string + + The hash is taken of the leading part of the long filename, in uppercase + + for simplicity, we only allow ascii characters in 8.3 names + */ + + +/* + =============================================================================== + NOTE NOTE NOTE!!! + + This file deliberately uses non-multibyte string functions in many places. This + is *not* a mistake. This code is multi-byte safe, but it gets this property + through some very subtle knowledge of the way multi-byte strings are encoded + and the fact that this mangling algorithm only supports ascii characters in + 8.3 names. + + please don't convert this file to use the *_m() functions!! + =============================================================================== +*/ + + +#include "includes.h" + +#if 0 +#define M_DEBUG(level, x) DEBUG(level, x) +#else +#define M_DEBUG(level, x) +#endif + +/* these flags are used to mark characters in as having particular + properties */ +#define FLAG_BASECHAR 1 +#define FLAG_ASCII 2 +#define FLAG_ILLEGAL 4 +#define FLAG_WILDCARD 8 + +/* the "possible" flags are used as a fast way to find possible DOS + reserved filenames */ +#define FLAG_POSSIBLE1 16 +#define FLAG_POSSIBLE2 32 +#define FLAG_POSSIBLE3 64 +#define FLAG_POSSIBLE4 128 + +/* by default have a max of 4096 entries in the cache. */ +#ifndef MANGLE_CACHE_SIZE +#define MANGLE_CACHE_SIZE 4096 +#endif + +/* these tables are used to provide fast tests for characters */ +static unsigned char char_flags[256]; + +#define FLAG_CHECK(c, flag) (char_flags[(unsigned char)(c)] & (flag)) + +/* we will use a very simple direct mapped prefix cache. The big + advantage of this cache structure is speed and low memory usage + + The cache is indexed by the low-order bits of the hash, and confirmed by + hashing the resulting cache entry to match the known hash +*/ +static char **prefix_cache; +static u32 *prefix_cache_hashes; + +/* these are the characters we use in the 8.3 hash. Must be 36 chars long */ +const char *basechars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static unsigned char base_reverse[256]; +#define base_forward(v) basechars[v] + +/* the list of reserved dos names - all of these are illegal */ +const char *reserved_names[] = { "AUX", "LOCK$", "CON", "COM1", "COM2", "COM3", "COM4", + "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL }; + +/* + hash a string of the specified length. The string does not need to be + null terminated + + this hash needs to be fast with a low collision rate (what hash doesn't?) +*/ +static u32 mangle_hash(const char *key, unsigned length) +{ + u32 value; + u32 i; + fstring str; + + /* we have to uppercase here to ensure that the mangled name + doesn't depend on the case of the long name. Note that this + is the only place where we need to use a multi-byte string + function */ + strncpy(str, key, length); + str[length] = 0; + strupper_m(str); + + /* the length of a multi-byte string can change after a strupper_m */ + length = strlen(str); + + /* Set the initial value from the key size. */ + for (value = 0x238F13AF * length, i=0; i < length; i++) { + value = (value + (((unsigned char)str[i]) << (i*5 % 24))); + } + + /* note that we force it to a 31 bit hash, to keep within the limits + of the 36^6 mangle space */ + return (1103515243 * value + 12345) & ~0x80000000; +} + +/* + initialise (ie. allocate) the prefix cache + */ +static BOOL cache_init(void) +{ + if (prefix_cache) return True; + + prefix_cache = malloc(sizeof(char *) * MANGLE_CACHE_SIZE); + if (!prefix_cache) return False; + + prefix_cache_hashes = malloc(sizeof(u32) * MANGLE_CACHE_SIZE); + if (!prefix_cache_hashes) return False; + + memset(prefix_cache, 0, sizeof(char *) * MANGLE_CACHE_SIZE); + memset(prefix_cache_hashes, 0, sizeof(char *) * MANGLE_CACHE_SIZE); + return True; +} + +/* + insert an entry into the prefix cache. The string might not be null + terminated */ +static void cache_insert(const char *prefix, int length, u32 hash) +{ + int i = hash % MANGLE_CACHE_SIZE; + + if (prefix_cache[i]) { + free(prefix_cache[i]); + } + + prefix_cache[i] = strndup(prefix, length); + prefix_cache_hashes[i] = hash; +} + +/* + lookup an entry in the prefix cache. Return NULL if not found. +*/ +static const char *cache_lookup(u32 hash) +{ + int i = hash % MANGLE_CACHE_SIZE; + + if (!prefix_cache[i] || hash != prefix_cache_hashes[i]) { + return NULL; + } + + /* yep, it matched */ + return prefix_cache[i]; +} + + +/* + determine if a string is possibly in a mangled format, ignoring + case + + In this algorithm, mangled names use only pure ascii characters (no + multi-byte) so we can avoid doing a UCS2 conversion +*/ +static BOOL is_mangled(const char *name) +{ + int len, i; + + M_DEBUG(0,("is_mangled %s ?\n", name)); + + /* the best distinguishing characteristic is the ~ */ + if (name[6] != '~') return False; + + /* check the length */ + len = strlen(name); + if (len > 12 || len < 8) return False; + + /* check extension */ + if (len > 8) { + if (name[8] != '.') return False; + for (i=9; name[i]; i++) { + if (! FLAG_CHECK(name[i], FLAG_ASCII)) { + return False; + } + } + } + + /* check first character */ + if (! FLAG_CHECK(name[0], FLAG_ASCII)) { + return False; + } + + /* check rest of hash */ + if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) { + return False; + } + for (i=1;i<6;i++) { + if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) { + return False; + } + } + + M_DEBUG(0,("is_mangled %s -> yes\n", name)); + + return True; +} + + +/* + see if a filename is an allowable 8.3 name. + + we are only going to allow ascii characters in 8.3 names, as this + simplifies things greatly (it means that we know the string won't + get larger when converted from UNIX to DOS formats) +*/ +static BOOL is_8_3(const char *name, BOOL check_case) +{ + int len, i; + char *dot_p; + + /* as a special case, the names '.' and '..' are allowable 8.3 names */ + if (name[0] == '.') { + if (!name[1] || (name[1] == '.' && !name[2])) { + return True; + } + } + + /* the simplest test is on the overall length of the + filename. Note that we deliberately use the ascii string + length (not the multi-byte one) as it is faster, and gives us + the result we need in this case. Using strlen_m would not + only be slower, it would be incorrect */ + len = strlen(name); + if (len > 12) return False; + + /* find the '.'. Note that once again we use the non-multibyte + function */ + dot_p = strchr(name, '.'); + + if (!dot_p) { + /* if the name doesn't contain a '.' then its length + must be less than 8 */ + if (len > 8) { + return False; + } + } else { + int prefix_len, suffix_len; + + /* if it does contain a dot then the prefix must be <= + 8 and the suffix <= 3 in length */ + prefix_len = PTR_DIFF(dot_p, name); + suffix_len = len - (prefix_len+1); + + if (prefix_len > 8 || suffix_len > 3) { + return False; + } + + /* a 8.3 name cannot contain more than 1 '.' */ + if (strchr(dot_p+1, '.')) { + return False; + } + } + + /* the length are all OK. Now check to see if the characters themselves are OK */ + for (i=0; name[i]; i++) { + /* note that we allow wildcard petterns! */ + if (!FLAG_CHECK(name[i], FLAG_ASCII|FLAG_WILDCARD) && name[i] != '.') { + return False; + } + } + + /* it is a good 8.3 name */ + return True; +} + + +/* + reset the mangling cache on a smb.conf reload. This only really makes sense for + mangling backends that have parameters in smb.conf, and as this backend doesn't + this is a NULL operation +*/ +static void mangle_reset(void) +{ + /* noop */ +} + + +/* + try to find a 8.3 name in the cache, and if found then + replace the string with the original long name. + + The filename must be able to hold at least sizeof(fstring) +*/ +static BOOL check_cache(char *name) +{ + u32 hash, multiplier; + int i; + const char *prefix; + char extension[4]; + + /* make sure that this is a mangled name from this cache */ + if (!is_mangled(name)) { + M_DEBUG(0,("check_cache: %s -> not mangled\n", name)); + return False; + } + + /* we need to extract the hash from the 8.3 name */ + hash = base_reverse[(unsigned char)name[7]]; + for (multiplier=36, i=5;i>=1;i--) { + u32 v = base_reverse[(unsigned char)name[i]]; + hash += multiplier * v; + multiplier *= 36; + } + + /* now look in the prefix cache for that hash */ + prefix = cache_lookup(hash); + if (!prefix) { + M_DEBUG(0,("check_cache: %s -> %08X -> not found\n", name, hash)); + return False; + } + + /* we found it - construct the full name */ + strncpy(extension, name+9, 3); + + if (extension[0]) { + M_DEBUG(0,("check_cache: %s -> %s.%s\n", name, prefix, extension)); + slprintf(name, sizeof(fstring), "%s.%s", prefix, extension); + } else { + M_DEBUG(0,("check_cache: %s -> %s\n", name, prefix)); + fstrcpy(name, prefix); + } + + return True; +} + + +/* + look for a DOS reserved name +*/ +static BOOL is_reserved_name(const char *name) +{ + if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) && + FLAG_CHECK(name[1], FLAG_POSSIBLE2) && + FLAG_CHECK(name[2], FLAG_POSSIBLE3) && + FLAG_CHECK(name[3], FLAG_POSSIBLE4)) { + /* a likely match, scan the lot */ + int i; + for (i=0; reserved_names[i]; i++) { + int len = strlen(reserved_names[i]); + /* note that we match on COM1 as well as COM1.foo */ + if (strncasecmp(name, reserved_names[i], len) == 0 && + (name[len] == '.' || name[len] == 0)) { + return True; + } + } + } + + return False; +} + +/* + see if a filename is a legal long filename +*/ +static BOOL is_legal_name(const char *name) +{ + while (*name) { + if (FLAG_CHECK(name[0], FLAG_ILLEGAL)) { + return False; + } + name++; + } + + return True; +} + +/* + the main forward mapping function, which converts a long filename to + a 8.3 name + + if need83 is not set then we only do the mangling if the name is illegal + as a long name + + if cache83 is not set then we don't cache the result + + the name parameter must be able to hold 13 bytes +*/ +static BOOL name_map(char *name, BOOL need83, BOOL cache83) +{ + char *dot_p; + char lead_char; + char extension[4]; + int extension_length, i; + int prefix_len; + u32 hash, v; + char new_name[13]; + + /* reserved names are handled specially */ + if (!is_reserved_name(name)) { + /* if the name is already a valid 8.3 name then we don't need to + do anything */ + if (is_8_3(name, False)) { + return True; + } + + /* if the caller doesn't strictly need 8.3 then just check for illegal + filenames */ + if (!need83 && is_legal_name(name)) { + return True; + } + } + + /* find the '.' if any */ + dot_p = strrchr(name, '.'); + + /* the leading character in the mangled name is taken from + the first character of the name, if it is ascii + otherwise '_' is used + */ + lead_char = name[0]; + if (! FLAG_CHECK(lead_char, FLAG_ASCII)) { + lead_char = '_'; + } + lead_char = toupper(lead_char); + + /* the prefix is anything up to the first dot */ + if (dot_p) { + prefix_len = PTR_DIFF(dot_p, name); + } else { + prefix_len = strlen(name); + } + + /* the extension of the mangled name is taken from the first 3 + ascii chars after the dot */ + extension_length = 0; + if (dot_p) { + for (i=1; extension_length < 3 && dot_p[i]; i++) { + char c = dot_p[i]; + if (FLAG_CHECK(c, FLAG_ASCII)) { + extension[extension_length++] = toupper(c); + } + } + } + + /* find the hash for this prefix */ + v = hash = mangle_hash(name, prefix_len); + + /* now form the mangled name. */ + new_name[0] = lead_char; + new_name[7] = base_forward(v % 36); + new_name[6] = '~'; + for (i=5; i>=1; i--) { + v = v / 36; + new_name[i] = base_forward(v % 36); + } + + /* add the extension */ + if (extension_length) { + new_name[8] = '.'; + memcpy(&new_name[9], extension, extension_length); + new_name[9+extension_length] = 0; + } else { + new_name[8] = 0; + } + + if (cache83) { + /* put it in the cache */ + cache_insert(name, prefix_len, hash); + } + + M_DEBUG(0,("name_map: %s -> %08X -> %s (cache=%d)\n", + name, hash, new_name, cache83)); + + /* and overwrite the old name */ + fstrcpy(name, new_name); + + /* all done, we've managed to mangle it */ + return True; +} + + +/* initialise the flags table + + we allow only a very restricted set of characters as 'ascii' in this + mangling backend. This isn't a significant problem as modern clients + use the 'long' filenames anyway, and those don't have these + restrictions. +*/ +static void init_tables(void) +{ + int i; + + memset(char_flags, 0, sizeof(char_flags)); + + for (i=0;i<128;i++) { + if ((i >= '0' && i <= '9') || + (i >= 'a' && i <= 'z') || + (i >= 'A' && i <= 'Z')) { + char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR); + } + if (strchr("_-$~", i)) { + char_flags[i] |= FLAG_ASCII; + } + + if (strchr("*\\/?<>|\":", i)) { + char_flags[i] |= FLAG_ILLEGAL; + } + + if (strchr("*?\"<>", i)) { + char_flags[i] |= FLAG_WILDCARD; + } + } + + memset(base_reverse, 0, sizeof(base_reverse)); + for (i=0;i<36;i++) { + base_reverse[(unsigned char)base_forward(i)] = i; + } + + /* fill in the reserved names flags. These are used as a very + fast filter for finding possible DOS reserved filenames */ + for (i=0; reserved_names[i]; i++) { + unsigned char c1, c2, c3, c4; + + c1 = (unsigned char)reserved_names[i][0]; + c2 = (unsigned char)reserved_names[i][1]; + c3 = (unsigned char)reserved_names[i][2]; + c4 = (unsigned char)reserved_names[i][3]; + + char_flags[c1] |= FLAG_POSSIBLE1; + char_flags[c2] |= FLAG_POSSIBLE2; + char_flags[c3] |= FLAG_POSSIBLE3; + char_flags[c4] |= FLAG_POSSIBLE4; + char_flags[tolower(c1)] |= FLAG_POSSIBLE1; + char_flags[tolower(c2)] |= FLAG_POSSIBLE2; + char_flags[tolower(c3)] |= FLAG_POSSIBLE3; + char_flags[tolower(c4)] |= FLAG_POSSIBLE4; + + char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4; + } +} + + +/* + the following provides the abstraction layer to make it easier + to drop in an alternative mangling implementation */ +static struct mangle_fns mangle_fns = { + is_mangled, + is_8_3, + mangle_reset, + check_cache, + name_map +}; + +/* return the methods for this mangling implementation */ +struct mangle_fns *mangle_hash2_init(void) +{ + init_tables(); + mangle_reset(); + + if (!cache_init()) { + return NULL; + } + + return &mangle_fns; +} |