diff options
-rw-r--r-- | source4/lib/ldb/common/attrib_handlers.c | 35 | ||||
-rw-r--r-- | source4/lib/util_unistr.c | 24 |
2 files changed, 50 insertions, 9 deletions
diff --git a/source4/lib/ldb/common/attrib_handlers.c b/source4/lib/ldb/common/attrib_handlers.c index 4b6a7af1ee..7d1eff6d9a 100644 --- a/source4/lib/ldb/common/attrib_handlers.c +++ b/source4/lib/ldb/common/attrib_handlers.c @@ -145,15 +145,24 @@ int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx, compare two case insensitive strings, ignoring multiple whitespaces and leading and trailing whitespaces see rfc2252 section 8.1 + + try to optimize for the ascii case, + but if we find out an utf8 codepoint revert to slower but correct function */ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, const struct ldb_val *v1, const struct ldb_val *v2) { const char *s1=(const char *)v1->data, *s2=(const char *)v2->data; + char *b1, *b2, *u1, *u2; + int ret; while (*s1 == ' ') s1++; while (*s2 == ' ') s2++; /* TODO: make utf8 safe, possibly with helper function from application */ while (*s1 && *s2) { + /* the first 127 (0x7F) chars are ascii and utf8 guarantes they + * never appear in multibyte sequences */ + if (((unsigned char)s1[0]) & 0x80) goto utf8str; + if (((unsigned char)s2[0]) & 0x80) goto utf8str; if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2)) break; if (*s1 == ' ') { @@ -163,7 +172,7 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, s1++; s2++; } if (! (*s1 && *s2)) { - /* remove trailing spaces only if one of the pointers + /* check for trailing spaces only if one of the pointers * has reached the end of the strings otherwise we * can mistakenly match. * ex. "domain users" <-> "domainUpdates" @@ -172,6 +181,30 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, while (*s2 == ' ') s2++; } return (int)(toupper(*s1)) - (int)(toupper(*s2)); + +utf8str: + /* non need to recheck from the start, just from the first utf8 char found */ + b1 = u1 = ldb_casefold(ldb, mem_ctx, s1); + b2 = u2 = ldb_casefold(ldb, mem_ctx, s2); + + while (*u1 & *u2) { + if (*u1 != *u2) + break; + if (*u1 == ' ') { + while (u1[0] == u1[1]) u1++; + while (u2[0] == u2[1]) u2++; + } + u1++; u2++; + } + if (! (*u1 && *u2)) { + while (*u1 == ' ') u1++; + while (*u2 == ' ') u2++; + } + ret = (int)(*u1 - *u2); + talloc_free(b1); + talloc_free(b2); + + return ret; } /* diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c index e589e6493a..b35822877c 100644 --- a/source4/lib/util_unistr.c +++ b/source4/lib/util_unistr.c @@ -43,10 +43,18 @@ static void load_case_tables(void) lowcase_table = map_file(lib_path(mem_ctx, "lowcase.dat"), 0x20000); talloc_free(mem_ctx); if (upcase_table == NULL) { - upcase_table = (void *)-1; + /* try also under codepages for testing purposes */ + upcase_table = map_file("codepages/upcase.dat", 0x20000); + if (upcase_table == NULL) { + upcase_table = (void *)-1; + } } if (lowcase_table == NULL) { - lowcase_table = (void *)-1; + /* try also under codepages for testing purposes */ + lowcase_table = map_file("codepages/lowcase.dat", 0x20000); + if (lowcase_table == NULL) { + lowcase_table = (void *)-1; + } } } @@ -58,12 +66,12 @@ codepoint_t toupper_w(codepoint_t val) if (val < 128) { return toupper(val); } - if (upcase_table == (void *)-1) { - return val; - } if (upcase_table == NULL) { load_case_tables(); } + if (upcase_table == (void *)-1) { + return val; + } if (val & 0xFFFF0000) { return val; } @@ -78,12 +86,12 @@ codepoint_t tolower_w(codepoint_t val) if (val < 128) { return tolower(val); } - if (lowcase_table == (void *)-1) { - return val; - } if (lowcase_table == NULL) { load_case_tables(); } + if (lowcase_table == (void *)-1) { + return val; + } if (val & 0xFFFF0000) { return val; } |