charset Remove use of {isupper,islower,toupper,tolower}_w functions

These now call the common _m functions that consider UTF16 code points. This removes the code which will make up a 'lame' table in memory, as this can just as correctly be handled by running the algorithm at runtime (which is to call toupper() and tolower() on characters < 128). When used, a top level waf build will always locate the correct table - in the build tree or outside - due to relinking the installed binary. Andrew Bartlett
author: Andrew Bartlett <abartlet@samba.org> 2011-02-16 16:30:56 +1100
committer: Andrew Bartlett <abartlet@samba.org> 2011-02-18 17:00:34 +1100
commit: 92faeaeea841dd0bce9460d25429846ae2fdc0af (patch)
tree: 1280a8722d974c46e767047fe8ec9a65442dd5d7
parent: 0bad0e3ff2063f009557ab6ad7a442ceaed593ee (diff)
download: samba-92faeaeea841dd0bce9460d25429846ae2fdc0af.tar.gz
samba-92faeaeea841dd0bce9460d25429846ae2fdc0af.tar.bz2
samba-92faeaeea841dd0bce9460d25429846ae2fdc0af.zip
8 files changed, 24 insertions, 167 deletions
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
index 5dc76143e2..53febb8b5e 100644
--- a/lib/util/charset/codepoints.c
+++ b/lib/util/charset/codepoints.c
@@ -48,9 +48,11 @@ void load_case_tables(void)
 	lowcase_table = map_file(talloc_asprintf(mem_ctx, "%s/lowcase.dat", get_dyn_CODEPAGEDIR()), 0x20000);
 	talloc_free(mem_ctx);
 	if (upcase_table == NULL) {
+		DEBUG(1, ("Failed to load upcase.dat, will use lame ASCII-only case sensitivity rules\n"));
 		upcase_table = (void *)-1;
 	}
 	if (lowcase_table == NULL) {
+		DEBUG(1, ("Failed to load lowcase.dat, will use lame ASCII-only case sensitivity rules\n"));
 		lowcase_table = (void *)-1;
 	}
 }
diff --git a/source3/Makefile.in b/source3/Makefile.in
index 8e338fd82f..f037314247 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
 	  lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \
 	  lib/wins_srv.o \
 	  lib/util_str.o lib/clobber.o lib/util_sid.o \
-	  lib/util_unistr.o lib/util_file.o \
+	  lib/util_unistr.o ../lib/util/charset/codepoints.o lib/util_file.o \
 	  lib/util.o lib/util_names.o \
 	  lib/util_sock.o lib/sock_exec.o lib/util_sec.o \
 	  lib/substitute.o lib/dbwrap_util.o \
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 07852527a8..8ca4db2b61 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -1479,10 +1479,6 @@ size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate
 char *skip_unibuf(char *src, size_t len);
 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags);
 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src);
-smb_ucs2_t toupper_w(smb_ucs2_t val);
-smb_ucs2_t tolower_w( smb_ucs2_t val );
-bool islower_w(smb_ucs2_t c);
-bool isupper_w(smb_ucs2_t c);
 bool isvalid83_w(smb_ucs2_t c);
 size_t strlen_w(const smb_ucs2_t *src);
 size_t strnlen_w(const smb_ucs2_t *src, size_t max);
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index 3b6dfc5099..f79785ef15 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -1229,7 +1229,7 @@ size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_
 		   terminated if STR_TERMINATE isn't set. */
 
 		for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
-			smb_ucs2_t v = toupper_w(dest_ucs2[i]);
+			smb_ucs2_t v = toupper_m(dest_ucs2[i]);
 			if (v != dest_ucs2[i]) {
 				dest_ucs2[i] = v;
 			}
diff --git a/source3/lib/ms_fnmatch.c b/source3/lib/ms_fnmatch.c
index bff7382940..31c66953a9 100644
--- a/source3/lib/ms_fnmatch.c
+++ b/source3/lib/ms_fnmatch.c
@@ -129,7 +129,7 @@ static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n,
 				if (is_case_sensitive) {
 					return -1;
 				}
-				if (toupper_w(c) != toupper_w(*n)) {
+				if (toupper_m(c) != toupper_m(*n)) {
 					return -1;
 				}
 			}
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 6edf64deea..b26bde81c4 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -460,7 +460,7 @@ bool strhasupper(const char *s)
 	}
 
 	for(p = tmp; *p != 0; p++) {
-		if(isupper_w(*p)) {
+		if(isupper_m(*p)) {
 			break;
 		}
 	}
@@ -485,7 +485,7 @@ bool strhaslower(const char *s)
 	}
 
 	for(p = tmp; *p != 0; p++) {
-		if(islower_w(*p)) {
+		if(islower_m(*p)) {
 			break;
 		}
 	}
diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 4cda38dc19..6199a35fc8 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -27,11 +27,7 @@
 
 /* these 3 tables define the unicode case handling.  They are loaded
    at startup either via mmap() or read() from the lib directory */
-static smb_ucs2_t *upcase_table;
-static smb_ucs2_t *lowcase_table;
 static uint8 *valid_table;
-static bool upcase_table_use_unmap;
-static bool lowcase_table_use_unmap;
 static bool valid_table_use_unmap;
 static bool initialized;
 
@@ -40,20 +36,6 @@ static bool initialized;
  **/
 void gfree_case_tables(void)
 {
-	if ( upcase_table ) {
-		if ( upcase_table_use_unmap )
-			unmap_file(upcase_table, 0x20000);
-		else
-			SAFE_FREE(upcase_table);
-	}
-
-	if ( lowcase_table ) {
-		if ( lowcase_table_use_unmap )
-			unmap_file(lowcase_table, 0x20000);
-		else
-			SAFE_FREE(lowcase_table);
-	}
-
 	if ( valid_table ) {
 		if ( valid_table_use_unmap )
 			unmap_file(valid_table, 0x10000);
@@ -63,99 +45,6 @@ void gfree_case_tables(void)
 	initialized = false;
 }
 
-/**
- * Load or generate the case handling tables.
- *
- * The case tables are defined in UCS2 and don't depend on any
- * configured parameters, so they never need to be reloaded.
- **/
-
-void load_case_tables(void)
-{
-	char *old_locale = NULL, *saved_locale = NULL;
-	int i;
-	TALLOC_CTX *frame = NULL;
-
-	if (initialized) {
-		return;
-	}
-	initialized = true;
-
-	frame = talloc_stackframe();
-
-	upcase_table = (smb_ucs2_t *)map_file(data_path("upcase.dat"),
-					      0x20000);
-	upcase_table_use_unmap = ( upcase_table != NULL );
-
-	lowcase_table = (smb_ucs2_t *)map_file(data_path("lowcase.dat"),
-					       0x20000);
-	lowcase_table_use_unmap = ( lowcase_table != NULL );
-
-#ifdef HAVE_SETLOCALE
-	/* Get the name of the current locale.  */
-	old_locale = setlocale(LC_ALL, NULL);
-
-	if (old_locale) {
-		/* Save it as it is in static storage. */
-		saved_locale = SMB_STRDUP(old_locale);
-	}
-
-	/* We set back the locale to C to get ASCII-compatible toupper/lower functions. */
-	setlocale(LC_ALL, "C");
-#endif
-
-	/* we would like Samba to limp along even if these tables are
-	   not available */
-	if (!upcase_table) {
-		DEBUG(1,("creating lame upcase table\n"));
-		upcase_table = (smb_ucs2_t *)SMB_MALLOC(0x20000);
-		if (!upcase_table) {
-			smb_panic("lame upcase table malloc fail");
-			/* notreached. */
-			return;
-		}
-		for (i=0;i<0x10000;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, i);
-			upcase_table[v] = i;
-		}
-		for (i=0;i<256;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, UCS2_CHAR(i));
-			upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i);
-		}
-	}
-
-	if (!lowcase_table) {
-		DEBUG(1,("creating lame lowcase table\n"));
-		lowcase_table = (smb_ucs2_t *)SMB_MALLOC(0x20000);
-		if (!lowcase_table) {
-			smb_panic("lame lowcase table malloc fail");
-			/* notreached. */
-			return;
-		}
-		for (i=0;i<0x10000;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, i);
-			lowcase_table[v] = i;
-		}
-		for (i=0;i<256;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, UCS2_CHAR(i));
-			lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i);
-		}
-	}
-
-#ifdef HAVE_SETLOCALE
-	/* Restore the old locale. */
-	if (saved_locale) {
-		setlocale (LC_ALL, saved_locale);
-		SAFE_FREE(saved_locale);
-	}
-#endif
-	TALLOC_FREE(frame);
-}
-
 static int check_dos_char_slowly(smb_ucs2_t c)
 {
 	char buf[10];
@@ -288,42 +177,6 @@ int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
 }
 
 /*******************************************************************
- Convert a wchar to upper case.
-********************************************************************/
-
-smb_ucs2_t toupper_w(smb_ucs2_t val)
-{
-	return upcase_table[SVAL(&val,0)];
-}
-
-/*******************************************************************
- Convert a wchar to lower case.
-********************************************************************/
-
-smb_ucs2_t tolower_w( smb_ucs2_t val )
-{
-	return lowcase_table[SVAL(&val,0)];
-}
-
-/*******************************************************************
- Determine if a character is lowercase.
-********************************************************************/
-
-bool islower_w(smb_ucs2_t c)
-{
-	return upcase_table[SVAL(&c,0)] != c;
-}
-
-/*******************************************************************
- Determine if a character is uppercase.
-********************************************************************/
-
-bool isupper_w(smb_ucs2_t c)
-{
-	return lowcase_table[SVAL(&c,0)] != c;
-}
-
-/*******************************************************************
  Determine if a character is valid in a 8.3 name.
 ********************************************************************/
 
@@ -466,6 +319,8 @@ smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
 /*******************************************************************
  Convert a string to lower case.
  return True if any char is converted
+
+ This is unsafe for any string involving a UTF16 character
 ********************************************************************/
 
 bool strlower_w(smb_ucs2_t *s)
@@ -474,7 +329,7 @@ bool strlower_w(smb_ucs2_t *s)
 	bool ret = False;
 
 	while (*(COPY_UCS2_CHAR(&cp,s))) {
-		smb_ucs2_t v = tolower_w(cp);
+		smb_ucs2_t v = tolower_m(cp);
 		if (v != cp) {
 			COPY_UCS2_CHAR(s,&v);
 			ret = True;
@@ -487,6 +342,8 @@ bool strlower_w(smb_ucs2_t *s)
 /*******************************************************************
  Convert a string to upper case.
  return True if any char is converted
+
+ This is unsafe for any string involving a UTF16 character
 ********************************************************************/
 
 bool strupper_w(smb_ucs2_t *s)
@@ -494,7 +351,7 @@ bool strupper_w(smb_ucs2_t *s)
 	smb_ucs2_t cp;
 	bool ret = False;
 	while (*(COPY_UCS2_CHAR(&cp,s))) {
-		smb_ucs2_t v = toupper_w(cp);
+		smb_ucs2_t v = toupper_m(cp);
 		if (v != cp) {
 			COPY_UCS2_CHAR(s,&v);
 			ret = True;
@@ -552,11 +409,11 @@ int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
 {
 	smb_ucs2_t cpa, cpb;
 
-	while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_w(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_w(cpb)) {
+	while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
 		a++;
 		b++;
 	}
-	return (tolower_w(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_w(*(COPY_UCS2_CHAR(&cpb,b))));
+	return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
 }
 
 /*******************************************************************
@@ -568,12 +425,12 @@ int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
 	smb_ucs2_t cpa, cpb;
 	size_t n = 0;
 
-	while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_w(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_w(cpb))) {
+	while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
 		a++;
 		b++;
 		n++;
 	}
-	return (len - n)?(tolower_w(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_w(*(COPY_UCS2_CHAR(&cpb,b)))):0;
+	return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
 }
 
 /*******************************************************************
@@ -831,7 +688,7 @@ smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
 
 int toupper_ascii(int c)
 {
-	smb_ucs2_t uc = toupper_w(UCS2_CHAR(c));
+	smb_ucs2_t uc = toupper_m(UCS2_CHAR(c));
 	return UCS2_TO_CHAR(uc);
 }
 
@@ -841,7 +698,7 @@ int toupper_ascii(int c)
 
 int tolower_ascii(int c)
 {
-	smb_ucs2_t uc = tolower_w(UCS2_CHAR(c));
+	smb_ucs2_t uc = tolower_m(UCS2_CHAR(c));
 	return UCS2_TO_CHAR(uc);
 }
 
@@ -851,7 +708,7 @@ int tolower_ascii(int c)
 
 int isupper_ascii(int c)
 {
-	return isupper_w(UCS2_CHAR(c));
+	return isupper_m(UCS2_CHAR(c));
 }
 
 /*************************************************************
@@ -860,5 +717,5 @@ int isupper_ascii(int c)
 
 int islower_ascii(int c)
 {
-	return islower_w(UCS2_CHAR(c));
+	return islower_m(UCS2_CHAR(c));
 }
diff --git a/source3/wscript_build b/source3/wscript_build
index 8b68089731..9a78d5a676 100644
--- a/source3/wscript_build
+++ b/source3/wscript_build
@@ -990,13 +990,14 @@ bld.SAMBA3_SUBSYSTEM('tdb-wrap',
 
 bld.SAMBA3_SUBSYSTEM('CHARSET',
                     source='''lib/util_str.c lib/util_unistr.c lib/charcnv.c lib/iconv.c''',
-                    public_deps='iconv',
+                    public_deps='iconv CODEPOINTS',
                     deps='DYNCONFIG')
 
 bld.SAMBA3_SUBSYSTEM('samba-util',
                     source=UTIL_SRC,
                     deps='talloc CHARSET LIBCRYPTO',
                     vars=locals())
+
 #FIXME
 bld.SAMBA3_SUBSYSTEM('ndr-standard',
                     source='',
@@ -1359,6 +1360,7 @@ bld.INSTALL_FILES('${SWATDIR}', swat_files, base_name='../swat')
 
 ########################## INCLUDES #################################
 
+bld.RECURSE('../lib/util/charset')
 bld.RECURSE('../auth')
 bld.RECURSE('../lib/addns')
 bld.RECURSE('../lib/async_req')
author	Andrew Bartlett <abartlet@samba.org>	2011-02-16 16:30:56 +1100
committer	Andrew Bartlett <abartlet@samba.org>	2011-02-18 17:00:34 +1100
commit	92faeaeea841dd0bce9460d25429846ae2fdc0af (patch)
tree	1280a8722d974c46e767047fe8ec9a65442dd5d7
parent	0bad0e3ff2063f009557ab6ad7a442ceaed593ee (diff)
download	samba-92faeaeea841dd0bce9460d25429846ae2fdc0af.tar.gz samba-92faeaeea841dd0bce9460d25429846ae2fdc0af.tar.bz2 samba-92faeaeea841dd0bce9460d25429846ae2fdc0af.zip