summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Pool <mbp@samba.org>2003-03-18 07:31:49 +0000
committerMartin Pool <mbp@samba.org>2003-03-18 07:31:49 +0000
commit743d429d9b425aa84d89bd65c08beb06374026c8 (patch)
treeb61f53d922f5283abea4ce12bbb6dcc456702cb3
parent6b2fb1e2fe23816cb21fda9424e6eb1d2537794d (diff)
downloadsamba-743d429d9b425aa84d89bd65c08beb06374026c8.tar.gz
samba-743d429d9b425aa84d89bd65c08beb06374026c8.tar.bz2
samba-743d429d9b425aa84d89bd65c08beb06374026c8.zip
Step one of optimizations for StrCaseCmp:
First of all, do a char-by-char walk through both buffers until we get to a non-ascii character, or a difference between the strings. This prefix can be directly compared without needing to call into iconv. This should be much faster for strings that are either all ascii, or differ near the start. (This used to be commit f7f692b2db4dd513068d6d8fed2792186933ddda)
-rw-r--r--source3/lib/util_str.c79
1 files changed, 72 insertions, 7 deletions
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 8ef4ddade6..5157de0d91 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -1,8 +1,10 @@
/*
Unix SMB/CIFS implementation.
Samba utility functions
+
Copyright (C) Andrew Tridgell 1992-2001
Copyright (C) Simo Sorce 2001-2002
+ Copyright (C) Martin Pool 2003
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -22,6 +24,11 @@
#include "includes.h"
/**
+ * @file
+ * @brief String utilities.
+ **/
+
+/**
* Get the next token from a string, return False if none found.
* Handles double-quotes.
*
@@ -140,21 +147,79 @@ char **toktocliplist(int *ctok, const char *sep)
}
/**
- Case insensitive string compararison.
-**/
-
+ * Case insensitive string compararison.
+ *
+ * iconv does not directly give us a way to compare strings in
+ * arbitrary unix character sets -- all we can is convert and then
+ * compare. This is expensive.
+ *
+ * As an optimization, we do a first pass that considers only the
+ * prefix of the strings that is entirely 7-bit. Within this, we
+ * check whether they have the same value.
+ *
+ * Hopefully this will often give the answer without needing to copy.
+ * In particular it should speed comparisons to literal ascii strings
+ * or comparisons of strings that are "obviously" different.
+ *
+ * If we find a non-ascii character we fall back to converting via
+ * iconv.
+ *
+ * This should never be slower than convering the whole thing, and
+ * often faster.
+ *
+ * A different optimization would be to compare for bitwise equality
+ * in the binary encoding. (It would be possible thought hairy to do
+ * both simultaneously.) But in that case if they turn out to be
+ * different, we'd need to restart the whole thing.
+ *
+ * Even better is to implement strcasecmp for each encoding and use a
+ * function pointer.
+ **/
int StrCaseCmp(const char *s, const char *t)
{
+
+ const char * ps, * pt;
pstring buf1, buf2;
- unix_strupper(s, strlen(s)+1, buf1, sizeof(buf1));
- unix_strupper(t, strlen(t)+1, buf2, sizeof(buf2));
- return strcmp(buf1,buf2);
+
+ for (ps = s, pt = t; ; ps++, pt++) {
+ char us, ut;
+
+ if (!*ps && !*pt)
+ return 0; /* both ended */
+ else if (!*ps)
+ return -1; /* s is a prefix */
+ else if (!*pt)
+ return +1; /* t is a prefix */
+ else if ((*ps & 0x80) || (*pt & 0x80))
+ /* not ascii anymore, do it the hard way from here on in */
+ break;
+
+ us = toupper(*ps);
+ ut = toupper(*pt);
+ if (us == ut)
+ continue;
+ else if (us < ut)
+ return -1;
+ else if (us > ut)
+ return +1;
+ }
+
+ /* TODO: Don't do this with a fixed-length buffer. This could
+ * still be much more efficient. */
+ /* TODO: Hardcode a char-by-char comparison for UTF-8, which
+ * can be much faster. */
+ /* TODO: Test case for this! */
+
+ unix_strupper(ps, strlen(ps)+1, buf1, sizeof(buf1));
+ unix_strupper(pt, strlen(pt)+1, buf2, sizeof(buf2));
+
+ return strcmp(buf1, buf2);
}
+
/**
Case insensitive string compararison, length limited.
**/
-
int StrnCaseCmp(const char *s, const char *t, size_t n)
{
pstring buf1, buf2;