From 84e99fe8986dac9fa95857bf15bbd633db78e7ea Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 18 Mar 2003 21:21:21 +0000 Subject: Merge mbp's HEAD changes. Jeremy. (This used to be commit da1271a95fce7fd217555fb161d4669d0b9b80e2) --- source3/lib/util_str.c | 104 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 33 deletions(-) (limited to 'source3/lib/util_str.c') diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 7643c2807e..5157de0d91 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1,8 +1,10 @@ /* Unix SMB/CIFS implementation. Samba utility functions + Copyright (C) Andrew Tridgell 1992-2001 Copyright (C) Simo Sorce 2001-2002 + Copyright (C) Martin Pool 2003 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +23,10 @@ #include "includes.h" -#ifdef DEVELOPER -const char *global_clobber_region_function; -unsigned int global_clobber_region_line; -#endif +/** + * @file + * @brief String utilities. + **/ /** * Get the next token from a string, return False if none found. @@ -145,21 +147,79 @@ char **toktocliplist(int *ctok, const char *sep) } /** - Case insensitive string compararison. -**/ - + * Case insensitive string compararison. + * + * iconv does not directly give us a way to compare strings in + * arbitrary unix character sets -- all we can is convert and then + * compare. This is expensive. + * + * As an optimization, we do a first pass that considers only the + * prefix of the strings that is entirely 7-bit. Within this, we + * check whether they have the same value. + * + * Hopefully this will often give the answer without needing to copy. + * In particular it should speed comparisons to literal ascii strings + * or comparisons of strings that are "obviously" different. + * + * If we find a non-ascii character we fall back to converting via + * iconv. + * + * This should never be slower than convering the whole thing, and + * often faster. + * + * A different optimization would be to compare for bitwise equality + * in the binary encoding. (It would be possible thought hairy to do + * both simultaneously.) But in that case if they turn out to be + * different, we'd need to restart the whole thing. + * + * Even better is to implement strcasecmp for each encoding and use a + * function pointer. + **/ int StrCaseCmp(const char *s, const char *t) { + + const char * ps, * pt; pstring buf1, buf2; - unix_strupper(s, strlen(s)+1, buf1, sizeof(buf1)); - unix_strupper(t, strlen(t)+1, buf2, sizeof(buf2)); - return strcmp(buf1,buf2); + + for (ps = s, pt = t; ; ps++, pt++) { + char us, ut; + + if (!*ps && !*pt) + return 0; /* both ended */ + else if (!*ps) + return -1; /* s is a prefix */ + else if (!*pt) + return +1; /* t is a prefix */ + else if ((*ps & 0x80) || (*pt & 0x80)) + /* not ascii anymore, do it the hard way from here on in */ + break; + + us = toupper(*ps); + ut = toupper(*pt); + if (us == ut) + continue; + else if (us < ut) + return -1; + else if (us > ut) + return +1; + } + + /* TODO: Don't do this with a fixed-length buffer. This could + * still be much more efficient. */ + /* TODO: Hardcode a char-by-char comparison for UTF-8, which + * can be much faster. */ + /* TODO: Test case for this! */ + + unix_strupper(ps, strlen(ps)+1, buf1, sizeof(buf1)); + unix_strupper(pt, strlen(pt)+1, buf2, sizeof(buf2)); + + return strcmp(buf1, buf2); } + /** Case insensitive string compararison, length limited. **/ - int StrnCaseCmp(const char *s, const char *t, size_t n) { pstring buf1, buf2; @@ -414,28 +474,6 @@ size_t count_chars(const char *s,char c) return(count); } -/** - * In developer builds, clobber a region of memory. - * - * If we think a string buffer is longer than it really is, this ought - * to make the failure obvious, by segfaulting (if in the heap) or by - * killing the return address (on the stack), or by trapping under a - * memory debugger. - * - * This is meant to catch possible string overflows, even if the - * actual string copied is not big enough to cause an overflow. - **/ -void clobber_region(const char *fn, unsigned int line, char *dest, size_t len) -{ -#ifdef DEVELOPER - /* F1 is odd and 0xf1f1f1f1 shouldn't be a valid pointer */ - memset(dest, 0xF1, len); - global_clobber_region_function = fn; - global_clobber_region_line = line; -#endif -} - - /** Safe string copy into a known length string. maxlength does not include the terminating zero. -- cgit