summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source4/lib/ldb/common/attrib_handlers.c35
-rw-r--r--source4/lib/util_unistr.c24
2 files changed, 50 insertions, 9 deletions
diff --git a/source4/lib/ldb/common/attrib_handlers.c b/source4/lib/ldb/common/attrib_handlers.c
index 4b6a7af1ee..7d1eff6d9a 100644
--- a/source4/lib/ldb/common/attrib_handlers.c
+++ b/source4/lib/ldb/common/attrib_handlers.c
@@ -145,15 +145,24 @@ int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx,
compare two case insensitive strings, ignoring multiple whitespaces
and leading and trailing whitespaces
see rfc2252 section 8.1
+
+ try to optimize for the ascii case,
+ but if we find out an utf8 codepoint revert to slower but correct function
*/
static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
const struct ldb_val *v1, const struct ldb_val *v2)
{
const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
+ char *b1, *b2, *u1, *u2;
+ int ret;
while (*s1 == ' ') s1++;
while (*s2 == ' ') s2++;
/* TODO: make utf8 safe, possibly with helper function from application */
while (*s1 && *s2) {
+ /* the first 127 (0x7F) chars are ascii and utf8 guarantes they
+ * never appear in multibyte sequences */
+ if (((unsigned char)s1[0]) & 0x80) goto utf8str;
+ if (((unsigned char)s2[0]) & 0x80) goto utf8str;
if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2))
break;
if (*s1 == ' ') {
@@ -163,7 +172,7 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
s1++; s2++;
}
if (! (*s1 && *s2)) {
- /* remove trailing spaces only if one of the pointers
+ /* check for trailing spaces only if one of the pointers
* has reached the end of the strings otherwise we
* can mistakenly match.
* ex. "domain users" <-> "domainUpdates"
@@ -172,6 +181,30 @@ static int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
while (*s2 == ' ') s2++;
}
return (int)(toupper(*s1)) - (int)(toupper(*s2));
+
+utf8str:
+ /* non need to recheck from the start, just from the first utf8 char found */
+ b1 = u1 = ldb_casefold(ldb, mem_ctx, s1);
+ b2 = u2 = ldb_casefold(ldb, mem_ctx, s2);
+
+ while (*u1 & *u2) {
+ if (*u1 != *u2)
+ break;
+ if (*u1 == ' ') {
+ while (u1[0] == u1[1]) u1++;
+ while (u2[0] == u2[1]) u2++;
+ }
+ u1++; u2++;
+ }
+ if (! (*u1 && *u2)) {
+ while (*u1 == ' ') u1++;
+ while (*u2 == ' ') u2++;
+ }
+ ret = (int)(*u1 - *u2);
+ talloc_free(b1);
+ talloc_free(b2);
+
+ return ret;
}
/*
diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c
index e589e6493a..b35822877c 100644
--- a/source4/lib/util_unistr.c
+++ b/source4/lib/util_unistr.c
@@ -43,10 +43,18 @@ static void load_case_tables(void)
lowcase_table = map_file(lib_path(mem_ctx, "lowcase.dat"), 0x20000);
talloc_free(mem_ctx);
if (upcase_table == NULL) {
- upcase_table = (void *)-1;
+ /* try also under codepages for testing purposes */
+ upcase_table = map_file("codepages/upcase.dat", 0x20000);
+ if (upcase_table == NULL) {
+ upcase_table = (void *)-1;
+ }
}
if (lowcase_table == NULL) {
- lowcase_table = (void *)-1;
+ /* try also under codepages for testing purposes */
+ lowcase_table = map_file("codepages/lowcase.dat", 0x20000);
+ if (lowcase_table == NULL) {
+ lowcase_table = (void *)-1;
+ }
}
}
@@ -58,12 +66,12 @@ codepoint_t toupper_w(codepoint_t val)
if (val < 128) {
return toupper(val);
}
- if (upcase_table == (void *)-1) {
- return val;
- }
if (upcase_table == NULL) {
load_case_tables();
}
+ if (upcase_table == (void *)-1) {
+ return val;
+ }
if (val & 0xFFFF0000) {
return val;
}
@@ -78,12 +86,12 @@ codepoint_t tolower_w(codepoint_t val)
if (val < 128) {
return tolower(val);
}
- if (lowcase_table == (void *)-1) {
- return val;
- }
if (lowcase_table == NULL) {
load_case_tables();
}
+ if (lowcase_table == (void *)-1) {
+ return val;
+ }
if (val & 0xFFFF0000) {
return val;
}