From 92faeaeea841dd0bce9460d25429846ae2fdc0af Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Wed, 16 Feb 2011 16:30:56 +1100 Subject: charset Remove use of {isupper,islower,toupper,tolower}_w functions These now call the common _m functions that consider UTF16 code points. This removes the code which will make up a 'lame' table in memory, as this can just as correctly be handled by running the algorithm at runtime (which is to call toupper() and tolower() on characters < 128). When used, a top level waf build will always locate the correct table - in the build tree or outside - due to relinking the installed binary. Andrew Bartlett --- lib/util/charset/codepoints.c | 2 + source3/Makefile.in | 2 +- source3/include/proto.h | 4 - source3/lib/charcnv.c | 2 +- source3/lib/ms_fnmatch.c | 2 +- source3/lib/util_str.c | 4 +- source3/lib/util_unistr.c | 171 ++++-------------------------------------- source3/wscript_build | 4 +- 8 files changed, 24 insertions(+), 167 deletions(-) diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c index 5dc76143e2..53febb8b5e 100644 --- a/lib/util/charset/codepoints.c +++ b/lib/util/charset/codepoints.c @@ -48,9 +48,11 @@ void load_case_tables(void) lowcase_table = map_file(talloc_asprintf(mem_ctx, "%s/lowcase.dat", get_dyn_CODEPAGEDIR()), 0x20000); talloc_free(mem_ctx); if (upcase_table == NULL) { + DEBUG(1, ("Failed to load upcase.dat, will use lame ASCII-only case sensitivity rules\n")); upcase_table = (void *)-1; } if (lowcase_table == NULL) { + DEBUG(1, ("Failed to load lowcase.dat, will use lame ASCII-only case sensitivity rules\n")); lowcase_table = (void *)-1; } } diff --git a/source3/Makefile.in b/source3/Makefile.in index 8e338fd82f..f037314247 100644 --- a/source3/Makefile.in +++ b/source3/Makefile.in @@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \ lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \ lib/wins_srv.o \ lib/util_str.o lib/clobber.o lib/util_sid.o \ - lib/util_unistr.o lib/util_file.o \ + lib/util_unistr.o ../lib/util/charset/codepoints.o lib/util_file.o \ lib/util.o lib/util_names.o \ lib/util_sock.o lib/sock_exec.o lib/util_sec.o \ lib/substitute.o lib/dbwrap_util.o \ diff --git a/source3/include/proto.h b/source3/include/proto.h index 07852527a8..8ca4db2b61 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -1479,10 +1479,6 @@ size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate char *skip_unibuf(char *src, size_t len); int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags); int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src); -smb_ucs2_t toupper_w(smb_ucs2_t val); -smb_ucs2_t tolower_w( smb_ucs2_t val ); -bool islower_w(smb_ucs2_t c); -bool isupper_w(smb_ucs2_t c); bool isvalid83_w(smb_ucs2_t c); size_t strlen_w(const smb_ucs2_t *src); size_t strnlen_w(const smb_ucs2_t *src, size_t max); diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 3b6dfc5099..f79785ef15 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -1229,7 +1229,7 @@ size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_ terminated if STR_TERMINATE isn't set. */ for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) { - smb_ucs2_t v = toupper_w(dest_ucs2[i]); + smb_ucs2_t v = toupper_m(dest_ucs2[i]); if (v != dest_ucs2[i]) { dest_ucs2[i] = v; } diff --git a/source3/lib/ms_fnmatch.c b/source3/lib/ms_fnmatch.c index bff7382940..31c66953a9 100644 --- a/source3/lib/ms_fnmatch.c +++ b/source3/lib/ms_fnmatch.c @@ -129,7 +129,7 @@ static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n, if (is_case_sensitive) { return -1; } - if (toupper_w(c) != toupper_w(*n)) { + if (toupper_m(c) != toupper_m(*n)) { return -1; } } diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 6edf64deea..b26bde81c4 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -460,7 +460,7 @@ bool strhasupper(const char *s) } for(p = tmp; *p != 0; p++) { - if(isupper_w(*p)) { + if(isupper_m(*p)) { break; } } @@ -485,7 +485,7 @@ bool strhaslower(const char *s) } for(p = tmp; *p != 0; p++) { - if(islower_w(*p)) { + if(islower_m(*p)) { break; } } diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c index 4cda38dc19..6199a35fc8 100644 --- a/source3/lib/util_unistr.c +++ b/source3/lib/util_unistr.c @@ -27,11 +27,7 @@ /* these 3 tables define the unicode case handling. They are loaded at startup either via mmap() or read() from the lib directory */ -static smb_ucs2_t *upcase_table; -static smb_ucs2_t *lowcase_table; static uint8 *valid_table; -static bool upcase_table_use_unmap; -static bool lowcase_table_use_unmap; static bool valid_table_use_unmap; static bool initialized; @@ -40,20 +36,6 @@ static bool initialized; **/ void gfree_case_tables(void) { - if ( upcase_table ) { - if ( upcase_table_use_unmap ) - unmap_file(upcase_table, 0x20000); - else - SAFE_FREE(upcase_table); - } - - if ( lowcase_table ) { - if ( lowcase_table_use_unmap ) - unmap_file(lowcase_table, 0x20000); - else - SAFE_FREE(lowcase_table); - } - if ( valid_table ) { if ( valid_table_use_unmap ) unmap_file(valid_table, 0x10000); @@ -63,99 +45,6 @@ void gfree_case_tables(void) initialized = false; } -/** - * Load or generate the case handling tables. - * - * The case tables are defined in UCS2 and don't depend on any - * configured parameters, so they never need to be reloaded. - **/ - -void load_case_tables(void) -{ - char *old_locale = NULL, *saved_locale = NULL; - int i; - TALLOC_CTX *frame = NULL; - - if (initialized) { - return; - } - initialized = true; - - frame = talloc_stackframe(); - - upcase_table = (smb_ucs2_t *)map_file(data_path("upcase.dat"), - 0x20000); - upcase_table_use_unmap = ( upcase_table != NULL ); - - lowcase_table = (smb_ucs2_t *)map_file(data_path("lowcase.dat"), - 0x20000); - lowcase_table_use_unmap = ( lowcase_table != NULL ); - -#ifdef HAVE_SETLOCALE - /* Get the name of the current locale. */ - old_locale = setlocale(LC_ALL, NULL); - - if (old_locale) { - /* Save it as it is in static storage. */ - saved_locale = SMB_STRDUP(old_locale); - } - - /* We set back the locale to C to get ASCII-compatible toupper/lower functions. */ - setlocale(LC_ALL, "C"); -#endif - - /* we would like Samba to limp along even if these tables are - not available */ - if (!upcase_table) { - DEBUG(1,("creating lame upcase table\n")); - upcase_table = (smb_ucs2_t *)SMB_MALLOC(0x20000); - if (!upcase_table) { - smb_panic("lame upcase table malloc fail"); - /* notreached. */ - return; - } - for (i=0;i<0x10000;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, i); - upcase_table[v] = i; - } - for (i=0;i<256;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, UCS2_CHAR(i)); - upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i); - } - } - - if (!lowcase_table) { - DEBUG(1,("creating lame lowcase table\n")); - lowcase_table = (smb_ucs2_t *)SMB_MALLOC(0x20000); - if (!lowcase_table) { - smb_panic("lame lowcase table malloc fail"); - /* notreached. */ - return; - } - for (i=0;i<0x10000;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, i); - lowcase_table[v] = i; - } - for (i=0;i<256;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, UCS2_CHAR(i)); - lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i); - } - } - -#ifdef HAVE_SETLOCALE - /* Restore the old locale. */ - if (saved_locale) { - setlocale (LC_ALL, saved_locale); - SAFE_FREE(saved_locale); - } -#endif - TALLOC_FREE(frame); -} - static int check_dos_char_slowly(smb_ucs2_t c) { char buf[10]; @@ -287,42 +176,6 @@ int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) return -1; } -/******************************************************************* - Convert a wchar to upper case. -********************************************************************/ - -smb_ucs2_t toupper_w(smb_ucs2_t val) -{ - return upcase_table[SVAL(&val,0)]; -} - -/******************************************************************* - Convert a wchar to lower case. -********************************************************************/ - -smb_ucs2_t tolower_w( smb_ucs2_t val ) -{ - return lowcase_table[SVAL(&val,0)]; -} - -/******************************************************************* - Determine if a character is lowercase. -********************************************************************/ - -bool islower_w(smb_ucs2_t c) -{ - return upcase_table[SVAL(&c,0)] != c; -} - -/******************************************************************* - Determine if a character is uppercase. -********************************************************************/ - -bool isupper_w(smb_ucs2_t c) -{ - return lowcase_table[SVAL(&c,0)] != c; -} - /******************************************************************* Determine if a character is valid in a 8.3 name. ********************************************************************/ @@ -466,6 +319,8 @@ smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins) /******************************************************************* Convert a string to lower case. return True if any char is converted + + This is unsafe for any string involving a UTF16 character ********************************************************************/ bool strlower_w(smb_ucs2_t *s) @@ -474,7 +329,7 @@ bool strlower_w(smb_ucs2_t *s) bool ret = False; while (*(COPY_UCS2_CHAR(&cp,s))) { - smb_ucs2_t v = tolower_w(cp); + smb_ucs2_t v = tolower_m(cp); if (v != cp) { COPY_UCS2_CHAR(s,&v); ret = True; @@ -487,6 +342,8 @@ bool strlower_w(smb_ucs2_t *s) /******************************************************************* Convert a string to upper case. return True if any char is converted + + This is unsafe for any string involving a UTF16 character ********************************************************************/ bool strupper_w(smb_ucs2_t *s) @@ -494,7 +351,7 @@ bool strupper_w(smb_ucs2_t *s) smb_ucs2_t cp; bool ret = False; while (*(COPY_UCS2_CHAR(&cp,s))) { - smb_ucs2_t v = toupper_w(cp); + smb_ucs2_t v = toupper_m(cp); if (v != cp) { COPY_UCS2_CHAR(s,&v); ret = True; @@ -552,11 +409,11 @@ int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) { smb_ucs2_t cpa, cpb; - while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_w(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_w(cpb)) { + while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) { a++; b++; } - return (tolower_w(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_w(*(COPY_UCS2_CHAR(&cpb,b)))); + return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))); } /******************************************************************* @@ -568,12 +425,12 @@ int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) smb_ucs2_t cpa, cpb; size_t n = 0; - while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_w(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_w(cpb))) { + while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) { a++; b++; n++; } - return (len - n)?(tolower_w(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_w(*(COPY_UCS2_CHAR(&cpb,b)))):0; + return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0; } /******************************************************************* @@ -831,7 +688,7 @@ smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins) int toupper_ascii(int c) { - smb_ucs2_t uc = toupper_w(UCS2_CHAR(c)); + smb_ucs2_t uc = toupper_m(UCS2_CHAR(c)); return UCS2_TO_CHAR(uc); } @@ -841,7 +698,7 @@ int toupper_ascii(int c) int tolower_ascii(int c) { - smb_ucs2_t uc = tolower_w(UCS2_CHAR(c)); + smb_ucs2_t uc = tolower_m(UCS2_CHAR(c)); return UCS2_TO_CHAR(uc); } @@ -851,7 +708,7 @@ int tolower_ascii(int c) int isupper_ascii(int c) { - return isupper_w(UCS2_CHAR(c)); + return isupper_m(UCS2_CHAR(c)); } /************************************************************* @@ -860,5 +717,5 @@ int isupper_ascii(int c) int islower_ascii(int c) { - return islower_w(UCS2_CHAR(c)); + return islower_m(UCS2_CHAR(c)); } diff --git a/source3/wscript_build b/source3/wscript_build index 8b68089731..9a78d5a676 100644 --- a/source3/wscript_build +++ b/source3/wscript_build @@ -990,13 +990,14 @@ bld.SAMBA3_SUBSYSTEM('tdb-wrap', bld.SAMBA3_SUBSYSTEM('CHARSET', source='''lib/util_str.c lib/util_unistr.c lib/charcnv.c lib/iconv.c''', - public_deps='iconv', + public_deps='iconv CODEPOINTS', deps='DYNCONFIG') bld.SAMBA3_SUBSYSTEM('samba-util', source=UTIL_SRC, deps='talloc CHARSET LIBCRYPTO', vars=locals()) + #FIXME bld.SAMBA3_SUBSYSTEM('ndr-standard', source='', @@ -1359,6 +1360,7 @@ bld.INSTALL_FILES('${SWATDIR}', swat_files, base_name='../swat') ########################## INCLUDES ################################# +bld.RECURSE('../lib/util/charset') bld.RECURSE('../auth') bld.RECURSE('../lib/addns') bld.RECURSE('../lib/async_req') -- cgit