Merge mbp's HEAD changes.

Jeremy. (This used to be commit da1271a95fce7fd217555fb161d4669d0b9b80e2)
author: Jeremy Allison <jra@samba.org> 2003-03-18 21:21:21 +0000
committer: Jeremy Allison <jra@samba.org> 2003-03-18 21:21:21 +0000
commit: 84e99fe8986dac9fa95857bf15bbd633db78e7ea (patch)
tree: 7a0133a8562c4bc2912b85baa9d800dea7d9abf5 /source3
parent: 6cbe33161d195c720aaa7149d082ee72ca222c07 (diff)
download: samba-84e99fe8986dac9fa95857bf15bbd633db78e7ea.tar.gz
samba-84e99fe8986dac9fa95857bf15bbd633db78e7ea.tar.bz2
samba-84e99fe8986dac9fa95857bf15bbd633db78e7ea.zip
2 files changed, 72 insertions, 34 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in
index a773c1bed4..fc2e8fe821 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -151,7 +151,7 @@ LIB_OBJ = lib/charcnv.o lib/debug.o lib/fault.o \
 	  lib/util_getent.o lib/util_pw.o lib/access.o lib/smbrun.o \
 	  lib/bitmap.o lib/crc32.o lib/snprintf.o lib/dprintf.o \
 	  lib/xfile.o lib/wins_srv.o \
-	  lib/util_str.o lib/util_sid.o lib/util_uuid.o \
+	  lib/util_str.o lib/clobber.o lib/util_sid.o lib/util_uuid.o \
 	  lib/util_unistr.o lib/util_file.o lib/data_blob.o \
 	  lib/util.o lib/util_sock.o lib/util_sec.o \
 	  lib/talloc.o lib/hash.o lib/substitute.o lib/fsusage.o \
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 7643c2807e..5157de0d91 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -1,8 +1,10 @@
 /* 
    Unix SMB/CIFS implementation.
    Samba utility functions
+   
    Copyright (C) Andrew Tridgell 1992-2001
    Copyright (C) Simo Sorce      2001-2002
+   Copyright (C) Martin Pool     2003
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,10 +23,10 @@
 
 #include "includes.h"
 
-#ifdef DEVELOPER
-const char *global_clobber_region_function;
-unsigned int global_clobber_region_line;
-#endif
+/**
+ * @file
+ * @brief String utilities.
+ **/
 
 /**
  * Get the next token from a string, return False if none found.
@@ -145,21 +147,79 @@ char **toktocliplist(int *ctok, const char *sep)
 }
 
 /**
- Case insensitive string compararison.
-**/
-
+ * Case insensitive string compararison.
+ *
+ * iconv does not directly give us a way to compare strings in
+ * arbitrary unix character sets -- all we can is convert and then
+ * compare.  This is expensive.
+ *
+ * As an optimization, we do a first pass that considers only the
+ * prefix of the strings that is entirely 7-bit.  Within this, we
+ * check whether they have the same value.
+ *
+ * Hopefully this will often give the answer without needing to copy.
+ * In particular it should speed comparisons to literal ascii strings
+ * or comparisons of strings that are "obviously" different.
+ *
+ * If we find a non-ascii character we fall back to converting via
+ * iconv.
+ *
+ * This should never be slower than convering the whole thing, and
+ * often faster.
+ *
+ * A different optimization would be to compare for bitwise equality
+ * in the binary encoding.  (It would be possible thought hairy to do
+ * both simultaneously.)  But in that case if they turn out to be
+ * different, we'd need to restart the whole thing.
+ *
+ * Even better is to implement strcasecmp for each encoding and use a
+ * function pointer. 
+ **/
 int StrCaseCmp(const char *s, const char *t)
 {
+
+	const char * ps, * pt;
 	pstring buf1, buf2;
-	unix_strupper(s, strlen(s)+1, buf1, sizeof(buf1));
-	unix_strupper(t, strlen(t)+1, buf2, sizeof(buf2));
-	return strcmp(buf1,buf2);
+
+	for (ps = s, pt = t; ; ps++, pt++) {
+		char us, ut;
+
+		if (!*ps && !*pt)
+			return 0; /* both ended */
+ 		else if (!*ps)
+			return -1; /* s is a prefix */
+		else if (!*pt)
+			return +1; /* t is a prefix */
+		else if ((*ps & 0x80) || (*pt & 0x80))
+			/* not ascii anymore, do it the hard way from here on in */
+			break;
+
+		us = toupper(*ps);
+		ut = toupper(*pt);
+		if (us == ut)
+			continue;
+		else if (us < ut)
+			return -1;
+		else if (us > ut)
+			return +1;
+	}
+
+	/* TODO: Don't do this with a fixed-length buffer.  This could
+	 * still be much more efficient. */
+	/* TODO: Hardcode a char-by-char comparison for UTF-8, which
+	 * can be much faster. */
+	/* TODO: Test case for this! */
+
+	unix_strupper(ps, strlen(ps)+1, buf1, sizeof(buf1));
+	unix_strupper(pt, strlen(pt)+1, buf2, sizeof(buf2));
+
+	return strcmp(buf1, buf2);
 }
 
+
 /**
  Case insensitive string compararison, length limited.
 **/
-
 int StrnCaseCmp(const char *s, const char *t, size_t n)
 {
 	pstring buf1, buf2;
@@ -415,28 +475,6 @@ size_t count_chars(const char *s,char c)
 }
 
 /**
- * In developer builds, clobber a region of memory.
- *
- * If we think a string buffer is longer than it really is, this ought
- * to make the failure obvious, by segfaulting (if in the heap) or by
- * killing the return address (on the stack), or by trapping under a
- * memory debugger.
- *
- * This is meant to catch possible string overflows, even if the
- * actual string copied is not big enough to cause an overflow.
- **/
-void clobber_region(const char *fn, unsigned int line, char *dest, size_t len)
-{
-#ifdef DEVELOPER
-	/* F1 is odd and 0xf1f1f1f1 shouldn't be a valid pointer */
-	memset(dest, 0xF1, len);
-	global_clobber_region_function = fn;
-	global_clobber_region_line = line;
-#endif
-}
-
-
-/**
  Safe string copy into a known length string. maxlength does not
  include the terminating zero.
 **/
author	Jeremy Allison <jra@samba.org>	2003-03-18 21:21:21 +0000
committer	Jeremy Allison <jra@samba.org>	2003-03-18 21:21:21 +0000
commit	84e99fe8986dac9fa95857bf15bbd633db78e7ea (patch)
tree	7a0133a8562c4bc2912b85baa9d800dea7d9abf5 /source3
parent	6cbe33161d195c720aaa7149d082ee72ca222c07 (diff)
download	samba-84e99fe8986dac9fa95857bf15bbd633db78e7ea.tar.gz samba-84e99fe8986dac9fa95857bf15bbd633db78e7ea.tar.bz2 samba-84e99fe8986dac9fa95857bf15bbd633db78e7ea.zip