From 4aad79041b2ababc3336db3bea90f115c5634427 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 7 Aug 2009 14:34:01 +1000
Subject: make sure we never look past the end of either string in
 ldb_comparison_fold()

This fixes a bug in the samba3sam test with the python libraries as
noticed by abartlet
---
 source4/lib/ldb/common/attrib_handlers.c | 70 ++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 26 deletions(-)

(limited to 'source4/lib/ldb')

diff --git a/source4/lib/ldb/common/attrib_handlers.c b/source4/lib/ldb/common/attrib_handlers.c
index 9cb9ff886c..3ea9857d52 100644
--- a/source4/lib/ldb/common/attrib_handlers.c
+++ b/source4/lib/ldb/common/attrib_handlers.c
@@ -187,13 +187,13 @@ int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
 {
 	const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
 	size_t n1 = v1->length, n2 = v2->length;
-	const char *u1, *u2;
 	char *b1, *b2;
+	const char *u1, *u2;
 	int ret;
-	while (*s1 == ' ' && n1) { s1++; n1--; };
-	while (*s2 == ' ' && n2) { s2++; n2--; };
-	/* TODO: make utf8 safe, possibly with helper function from application */
-	while (*s1 && *s2 && n1 && n2) {
+	while (n1 && *s1 == ' ') { s1++; n1--; };
+	while (n2 && *s2 == ' ') { s2++; n2--; };
+
+	while (n1 && n2 && *s1 && *s2) {
 		/* the first 127 (0x7F) chars are ascii and utf8 guarantes they
 		 * never appear in multibyte sequences */
 		if (((unsigned char)s1[0]) & 0x80) goto utf8str;
@@ -201,40 +201,58 @@ int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
 		if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2))
 			break;
 		if (*s1 == ' ') {
-			while (s1[0] == s1[1] && n1) { s1++; n1--; }
-			while (s2[0] == s2[1] && n2) { s2++; n2--; }
+			while (n1 && s1[0] == s1[1]) { s1++; n1--; }
+			while (n2 && s2[0] == s2[1]) { s2++; n2--; }
 		}
 		s1++; s2++;
 		n1--; n2--;
 	}
-	if (! (*s1 && *s2)) {
-		/* check for trailing spaces only if one of the pointers
-		 * has reached the end of the strings otherwise we
-		 * can mistakenly match.
-		 * ex. "domain users" <-> "domainUpdates"
-		 */
-		while (*s1 == ' ') { s1++; n1--; }
-		while (*s2 == ' ') { s2++; n2--; }
+
+	/* check for trailing spaces only if the other pointers has
+	 * reached the end of the strings otherwise we can
+	 * mistakenly match.  ex. "domain users" <->
+	 * "domainUpdates"
+	 */
+	if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
+		while (n1 && *s1 == ' ') { s1++; n1--; }		
+	}
+	if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
+		while (n2 && *s2 == ' ') { s2++; n2--; }		
+	}
+	if (n1 == 0 && n2 != 0) {
+		return -(int)toupper(*s2);
+	}
+	if (n2 == 0 && n1 != 0) {
+		return (int)toupper(*s1);
 	}
-	return (int)(toupper(*s1)) - (int)(toupper(*s2));
+	if (n2 == 0 && n2 == 0) {
+		return 0;
+	}
+	return (int)toupper(*s1) - (int)toupper(*s2);
 
 utf8str:
 	/* no need to recheck from the start, just from the first utf8 char found */
 	b1 = ldb_casefold(ldb, mem_ctx, s1, n1);
 	b2 = ldb_casefold(ldb, mem_ctx, s2, n2);
 
-	if (b1 && b2) {
-		/* Both strings converted correctly */
-
-		u1 = b1;
-		u2 = b2;
-	} else {
-		/* One of the strings was not UTF8, so we have no options but to do a binary compare */
-
-		u1 = s1;
-		u2 = s2;
+	if (!b1 || !b2) {
+		/* One of the strings was not UTF8, so we have no
+		 * options but to do a binary compare */
+		talloc_free(b1);
+		talloc_free(b2);
+		if (memcmp(s1, s2, MIN(n1, n2)) == 0) {
+			if (n1 == n2) return 0;
+			if (n1 > n2) {
+				return (int)toupper(s1[n2]);
+			} else {
+				return -(int)toupper(s2[n1]);
+			}
+		}
 	}
 
+	u1 = b1;
+	u2 = b2;
+
 	while (*u1 & *u2) {
 		if (*u1 != *u2)
 			break;
-- 
cgit