diff options
author | Simo Sorce <idra@samba.org> | 2006-02-04 16:44:27 +0000 |
---|---|---|
committer | Gerald (Jerry) Carter <jerry@samba.org> | 2007-10-10 13:51:47 -0500 |
commit | e1e693792c1af66283e869dc427d03c6e9983776 (patch) | |
tree | 06d45e5373ecfea9ba43d6bb83407da7fea5f20c /source4 | |
parent | 1a53c1dc927efbc6a594ed513feb9ab9247078e8 (diff) | |
download | samba-e1e693792c1af66283e869dc427d03c6e9983776.tar.gz samba-e1e693792c1af66283e869dc427d03c6e9983776.tar.bz2 samba-e1e693792c1af66283e869dc427d03c6e9983776.zip |
r13347: - Now we compare values with an optimized utf8
safe function if the user provides an utf8
compliant casefold function to ldb.
- Fix toupper_m and tolower_m to not crash if
the case tables are not found
- Let load_case_table() search into the correct
directory in the search tree for the case
tables so that we can test utf8
Simo
(This used to be commit e12f070958eb3c144beb81c5cb878db122249021)
Diffstat (limited to 'source4')
-rw-r--r-- | source4/lib/ldb/common/attrib_handlers.c | 35 | ||||
-rw-r--r-- | source4/lib/util_unistr.c | 24 |
2 files changed, 50 insertions, 9 deletions
diff --git a/source4/lib/ldb/common/attrib_handlers.c b/source4/lib/ldb/common/attrib_handlers.c index 4b6a7af1ee..7d1eff6d9a 100644 --- a/source4/lib/ldb/common/attrib_handlers.c +++ b/source4/lib/ldb/common/attrib_handlers.c @@ -145,15 +145,24 @@ int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx, compare two case insensitive strings, ignoring multiple whitespaces and leading and trailing whitespaces see rfc2252 section 8.1 + + try to optimize for the ascii case, + but if we find out an utf8 codepoint revert to slower but correct function */ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, const struct ldb_val *v1, const struct ldb_val *v2) { const char *s1=(const char *)v1->data, *s2=(const char *)v2->data; + char *b1, *b2, *u1, *u2; + int ret; while (*s1 == ' ') s1++; while (*s2 == ' ') s2++; /* TODO: make utf8 safe, possibly with helper function from application */ while (*s1 && *s2) { + /* the first 127 (0x7F) chars are ascii and utf8 guarantes they + * never appear in multibyte sequences */ + if (((unsigned char)s1[0]) & 0x80) goto utf8str; + if (((unsigned char)s2[0]) & 0x80) goto utf8str; if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2)) break; if (*s1 == ' ') { @@ -163,7 +172,7 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, s1++; s2++; } if (! (*s1 && *s2)) { - /* remove trailing spaces only if one of the pointers + /* check for trailing spaces only if one of the pointers * has reached the end of the strings otherwise we * can mistakenly match. * ex. "domain users" <-> "domainUpdates" @@ -172,6 +181,30 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, while (*s2 == ' ') s2++; } return (int)(toupper(*s1)) - (int)(toupper(*s2)); + +utf8str: + /* non need to recheck from the start, just from the first utf8 char found */ + b1 = u1 = ldb_casefold(ldb, mem_ctx, s1); + b2 = u2 = ldb_casefold(ldb, mem_ctx, s2); + + while (*u1 & *u2) { + if (*u1 != *u2) + break; + if (*u1 == ' ') { + while (u1[0] == u1[1]) u1++; + while (u2[0] == u2[1]) u2++; + } + u1++; u2++; + } + if (! (*u1 && *u2)) { + while (*u1 == ' ') u1++; + while (*u2 == ' ') u2++; + } + ret = (int)(*u1 - *u2); + talloc_free(b1); + talloc_free(b2); + + return ret; } /* diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c index e589e6493a..b35822877c 100644 --- a/source4/lib/util_unistr.c +++ b/source4/lib/util_unistr.c @@ -43,10 +43,18 @@ static void load_case_tables(void) lowcase_table = map_file(lib_path(mem_ctx, "lowcase.dat"), 0x20000); talloc_free(mem_ctx); if (upcase_table == NULL) { - upcase_table = (void *)-1; + /* try also under codepages for testing purposes */ + upcase_table = map_file("codepages/upcase.dat", 0x20000); + if (upcase_table == NULL) { + upcase_table = (void *)-1; + } } if (lowcase_table == NULL) { - lowcase_table = (void *)-1; + /* try also under codepages for testing purposes */ + lowcase_table = map_file("codepages/lowcase.dat", 0x20000); + if (lowcase_table == NULL) { + lowcase_table = (void *)-1; + } } } @@ -58,12 +66,12 @@ codepoint_t toupper_w(codepoint_t val) if (val < 128) { return toupper(val); } - if (upcase_table == (void *)-1) { - return val; - } if (upcase_table == NULL) { load_case_tables(); } + if (upcase_table == (void *)-1) { + return val; + } if (val & 0xFFFF0000) { return val; } @@ -78,12 +86,12 @@ codepoint_t tolower_w(codepoint_t val) if (val < 128) { return tolower(val); } - if (lowcase_table == (void *)-1) { - return val; - } if (lowcase_table == NULL) { load_case_tables(); } + if (lowcase_table == (void *)-1) { + return val; + } if (val & 0xFFFF0000) { return val; } |