r2857: this commit gets rid of smb_ucs2_t, wpstring and fpstring, plus lots of associated functions.

The motivation for this change was to avoid having to convert to/from ucs2 strings for so many operations. Doing that was slow, used many static buffers, and was also incorrect as it didn't cope properly with unicode codepoints above 65536 (which could not be represented correctly as smb_ucs2_t chars) The two core functions that allowed this change are next_codepoint() and push_codepoint(). These functions allow you to correctly walk a arbitrary multi-byte string a character at a time without converting the whole string to ucs2. While doing this cleanup I also fixed several ucs2 string handling bugs. See the commit for details. The following code (which counts the number of occuraces of 'c' in a string) shows how to use the new interface: size_t count_chars(const char *s, char c) { size_t count = 0; while (*s) { size_t size; codepoint_t c2 = next_codepoint(s, &size); if (c2 == c) count++; s += size; } return count; } (This used to be commit 814881f0e50019196b3aa9fbe4aeadbb98172040)
author: Andrew Tridgell <tridge@samba.org> 2004-10-08 08:13:00 +0000
committer: Gerald (Jerry) Carter <jerry@samba.org> 2007-10-10 12:59:39 -0500
commit: 7d32679e9683c81aca538f0267684332a28a286f (patch)
tree: 445aecfad24e8dab1fe7a200904a712212fa7091 /source4/lib
parent: 48f960ab47707ca24898834da4da440d1f7fb0d9 (diff)
download: samba-7d32679e9683c81aca538f0267684332a28a286f.tar.gz
samba-7d32679e9683c81aca538f0267684332a28a286f.tar.bz2
samba-7d32679e9683c81aca538f0267684332a28a286f.zip
9 files changed, 759 insertions, 752 deletions
diff --git a/source4/lib/basic.mk b/source4/lib/basic.mk
index ccd0935407..f8bb46a95d 100644
--- a/source4/lib/basic.mk
+++ b/source4/lib/basic.mk
@@ -24,6 +24,7 @@ ADD_OBJ_FILES = \
 		lib/xfile.o \
 		lib/wins_srv.o \
 		lib/util_str.o \
+		lib/util_strlist.o \
 		lib/util_sid.o \
 		lib/util_secdesc.o \
 		lib/util_uuid.o \
diff --git a/source4/lib/charcnv.c b/source4/lib/charcnv.c
index 7d00c7e78f..392ad3cc72 100644
--- a/source4/lib/charcnv.c
+++ b/source4/lib/charcnv.c
@@ -55,18 +55,6 @@ static const char *charset_name(charset_t ch)
 	return ret;
 }
 
-static void lazy_initialize_conv(void)
-{
-	static int initialized = False;
-
-	if (!initialized) {
-		initialized = True;
-		load_case_tables();
-		init_iconv();
-	}
-}
-
-
 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
 
 /*
@@ -107,6 +95,7 @@ void init_iconv(void)
 
 }
 
+
 /**
  * Convert string from one encoding to another, making error checking etc
  *
@@ -129,8 +118,6 @@ ssize_t convert_string(charset_t from, charset_t to,
 	if (srclen == (size_t)-1)
 		srclen = strlen(src)+1;
 
-	lazy_initialize_conv();
-
 	descriptor = get_conv_handle(from, to);
 
 	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
@@ -194,8 +181,6 @@ ssize_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 	if (src == NULL || srclen == (size_t)-1 || srclen == 0)
 		return (size_t)-1;
 
-	lazy_initialize_conv();
-
 	descriptor = get_conv_handle(from, to);
 
 	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
@@ -271,27 +256,27 @@ ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
 {
 	size_t src_len;
 	ssize_t ret;
-	char *tmpbuf = NULL;
-
-	/* treat a pstring as "unlimited" length */
-	if (dest_len == (size_t)-1)
-		dest_len = sizeof(pstring);
 
 	if (flags & STR_UPPER) {
-		tmpbuf = strupper_talloc(NULL, src);
-		if (!tmpbuf) {
+		char *tmpbuf = strupper_talloc(NULL, src);
+		if (tmpbuf == NULL) {
 			return -1;
 		}
-		src = tmpbuf;
+		ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
+		talloc_free(tmpbuf);
+		return ret;
 	}
+
+	/* treat a pstring as "unlimited" length */
+	if (dest_len == (size_t)-1)
+		dest_len = sizeof(pstring);
+
 	src_len = strlen(src);
 
 	if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
 		src_len++;
 
-	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
-	talloc_free(tmpbuf);
-	return ret;
+	return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
 }
 
 /**
@@ -375,6 +360,16 @@ ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 	size_t src_len = strlen(src);
 	size_t ret;
 
+	if (flags & STR_UPPER) {
+		char *tmpbuf = strupper_talloc(NULL, src);
+		if (tmpbuf == NULL) {
+			return -1;
+		}
+		ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
+		talloc_free(tmpbuf);
+		return ret;
+	}
+
 	/* treat a pstring as "unlimited" length */
 	if (dest_len == (size_t)-1)
 		dest_len = sizeof(pstring);
@@ -399,17 +394,6 @@ ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
 
 	len += ret;
 
-	if (flags & STR_UPPER) {
-		smb_ucs2_t *dest_ucs2 = dest;
-		size_t i;
-		for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
-			smb_ucs2_t v = toupper_w(dest_ucs2[i]);
-			if (v != dest_ucs2[i]) {
-				dest_ucs2[i] = v;
-			}
-		}
-	}
-
 	return len;
 }
 
@@ -423,12 +407,11 @@ ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
  * @returns The number of bytes occupied by the string in the destination
  *         or -1 in case of error.
  **/
-ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
+ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
 {
 	size_t src_len = strlen(src)+1;
-
 	*dest = NULL;
-	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest);
+	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, dest);
 }
 
 
@@ -474,12 +457,9 @@ size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, i
 
 	if (flags & STR_TERMINATE) {
 		if (src_len == (size_t)-1) {
-			src_len = strlen_w(src)*2 + 2;
+			src_len = utf16_len(src);
 		} else {
-			size_t len = strnlen_w(src, src_len/2);
-			if (len < src_len/2)
-				len++;
-			src_len = len*2;
+			src_len = utf16_len_n(src, src_len);
 		}
 	}
 
@@ -507,9 +487,9 @@ ssize_t pull_ucs2_pstring(char *dest, const void *src)
  * @returns The number of bytes occupied by the string in the destination
  **/
 
-ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
+ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src)
 {
-	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
+	size_t src_len = utf16_len(src);
 	*dest = NULL;
 	return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest);
 }
@@ -582,3 +562,131 @@ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len
 	}
 }
 
+
+/*
+  return the unicode codepoint for the next multi-byte CH_UNIX character
+  in the string
+
+  also return the number of bytes consumed (which tells the caller
+  how many bytes to skip to get to the next CH_UNIX character)
+
+  return INVALID_CODEPOINT if the next character cannot be converted
+*/
+codepoint_t next_codepoint(const char *str, size_t *size)
+{
+	/* it cannot occupy more than 4 bytes in UTF16 format */
+	uint8_t buf[4];
+	smb_iconv_t descriptor;
+	size_t ilen_orig;
+	size_t ilen;
+	size_t olen;
+	char *outbuf;
+
+	if ((str[0] & 0x80) == 0) {
+		*size = 1;
+		return (codepoint_t)str[0];
+	}
+
+	/* we assume that no multi-byte character can take
+	   more than 5 bytes. This is OK as we only
+	   support codepoints up to 1M */
+	ilen_orig = strnlen(str, 5);
+	ilen = ilen_orig;
+
+	descriptor = get_conv_handle(CH_UNIX, CH_UTF16);
+	if (descriptor == (smb_iconv_t)-1) {
+		*size = 1;
+		return INVALID_CODEPOINT;
+	}
+
+	/* this looks a little strange, but it is needed to cope
+	   with codepoints above 64k */
+	olen = 2;
+	outbuf = buf;
+	smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
+	if (olen == 2) {
+		olen = 4;
+		outbuf = buf;
+		smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
+		if (olen == 4) {
+			/* we didn't convert any bytes */
+			*size = 1;
+			return INVALID_CODEPOINT;
+		}
+		olen = 4 - olen;
+	} else {
+		olen = 2 - olen;
+	}
+
+	*size = ilen_orig - ilen;
+
+	if (olen == 2) {
+		return (codepoint_t)SVAL(buf, 0);
+	}
+	if (olen == 4) {
+		/* decode a 4 byte UTF16 character manually */
+		return (codepoint_t)0x10000 + 
+			(buf[2] | ((buf[3] & 0x3)<<8) | 
+			 (buf[0]<<10) | ((buf[1] & 0x3)<<18));
+	}
+
+	/* no other length is valid */
+	return INVALID_CODEPOINT;
+}
+
+/*
+  push a single codepoint into a CH_UNIX string the target string must
+  be able to hold the full character, which is guaranteed if it is at
+  least 5 bytes in size. The caller may pass less than 5 bytes if they
+  are sure the character will fit (for example, you can assume that
+  uppercase/lowercase of a character will not add more than 1 byte)
+
+  return the number of bytes occupied by the CH_UNIX character, or
+  -1 on failure
+*/
+ssize_t push_codepoint(char *str, codepoint_t c)
+{
+	smb_iconv_t descriptor;
+	uint8_t buf[4];
+	size_t ilen, olen;
+	const char *inbuf;
+	
+	if (c < 128) {
+		*str = c;
+		return 1;
+	}
+
+	descriptor = get_conv_handle(CH_UTF16, CH_UNIX);
+	if (descriptor == (smb_iconv_t)-1) {
+		return -1;
+	}
+
+	if (c < 0x10000) {
+		ilen = 2;
+		olen = 5;
+		inbuf = buf;
+		SSVAL(buf, 0, c);
+		smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
+		if (ilen != 0) {
+			return -1;
+		}
+		return 5 - olen;
+	}
+
+	c -= 0x10000;
+
+	buf[0] = (c>>10) & 0xFF;
+	buf[1] = (c>>18) | 0xd8;
+	buf[2] = c & 0xFF;
+	buf[3] = ((c>>8) & 0x3) | 0xdc;
+
+	ilen = 4;
+	olen = 5;
+	inbuf = buf;
+
+	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
+	if (ilen != 0) {
+		return -1;
+	}
+	return 5 - olen;
+}
diff --git a/source4/lib/cmdline/popt_common.c b/source4/lib/cmdline/popt_common.c
index 725a5060c0..554b46a940 100644
--- a/source4/lib/cmdline/popt_common.c
+++ b/source4/lib/cmdline/popt_common.c
@@ -391,8 +391,8 @@ static void popt_common_credentials_callback(poptContext con,
 				d_printf("ERROR: Unable to fetch machine password\n");
 				exit(1);
 			}
-			pstr_sprintf(cmdline_auth_info.username, "%s$", 
-				     lp_netbios_name());
+			snprintf(cmdline_auth_info.username, sizeof(cmdline_auth_info.username), 
+				 "%s$", lp_netbios_name());
 			pstrcpy(cmdline_auth_info.password,opt_password);
 			SAFE_FREE(opt_password);
 
@@ -451,17 +451,21 @@ const char *cmdline_get_userdomain(void)
 
 const char *cmdline_get_userpassword(void)
 {
-	pstring prompt;
+	char *prompt;
+	char *ret;
 
 	if (cmdline_auth_info.got_pass) {
 		return cmdline_auth_info.password;
 	}
 
-	pstr_sprintf(prompt, "Password for [%s\\%s]:", 
-			cmdline_get_userdomain(),
-			cmdline_get_username());
+	prompt = talloc_asprintf(NULL, "Password for [%s\\%s]:", 
+				 cmdline_get_userdomain(),
+				 cmdline_get_username());
 
-	return getpass(prompt);
+	ret = getpass(prompt);
+
+	talloc_free(prompt);
+	return ret;
 }
 
 void cmdline_set_userpassword(const char *pass)
diff --git a/source4/lib/iconv.c b/source4/lib/iconv.c
index 567f5b5902..f4f7660bcd 100644
--- a/source4/lib/iconv.c
+++ b/source4/lib/iconv.c
@@ -113,7 +113,7 @@ static size_t sys_iconv(void *cd,
 			char **outbuf, size_t *outbytesleft)
 {
 	size_t ret = iconv((iconv_t)cd, 
-			   inbuf, inbytesleft, 
+			   discard_const_p(char *, inbuf), inbytesleft, 
 			   outbuf, outbytesleft);
 	if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
 	return ret;
diff --git a/source4/lib/ms_fnmatch.c b/source4/lib/ms_fnmatch.c
index 507d2aea4a..386dac4ea3 100644
--- a/source4/lib/ms_fnmatch.c
+++ b/source4/lib/ms_fnmatch.c
@@ -27,13 +27,13 @@
 
 #include "includes.h"
 
-static int null_match(const smb_ucs2_t *p)
+static int null_match(const char *p)
 {
 	for (;*p;p++) {
-		if (*p != UCS2_CHAR('*') &&
-		    *p != UCS2_CHAR('<') &&
-		    *p != UCS2_CHAR('"') &&
-		    *p != UCS2_CHAR('>')) return -1;
+		if (*p != '*' &&
+		    *p != '<' &&
+		    *p != '"' &&
+		    *p != '>') return -1;
 	}
 	return 0;
 }
@@ -44,8 +44,8 @@ static int null_match(const smb_ucs2_t *p)
   not grow exponentially
 */
 struct max_n {
-	const smb_ucs2_t *predot;
-	const smb_ucs2_t *postdot;
+	const char *predot;
+	const char *postdot;
 };
 
 
@@ -54,20 +54,24 @@ struct max_n {
   an optimisation only. The ldot pointer is NULL if the string does
   not contain a '.', otherwise it points at the last dot in 'n'.
 */
-static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n, 
-			   struct max_n *max_n, const smb_ucs2_t *ldot)
+static int ms_fnmatch_core(const char *p, const char *n, 
+			   struct max_n *max_n, const char *ldot)
 {
-	smb_ucs2_t c;
+	codepoint_t c, c2;
 	int i;
+	size_t size, size_n;
+
+	while ((c = next_codepoint(p, &size))) {
+		p += size;
 
-	while ((c = *p++)) {
 		switch (c) {
+		case '*':
 			/* a '*' matches zero or more characters of any type */
-		case UCS2_CHAR('*'):
 			if (max_n->predot && max_n->predot <= n) {
 				return null_match(p);
 			}
-			for (i=0; n[i]; i++) {
+			for (i=0; n[i]; i += size_n) {
+				next_codepoint(n+i, &size_n);
 				if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) {
 					return 0;
 				}
@@ -75,20 +79,21 @@ static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n,
 			if (!max_n->predot || max_n->predot > n) max_n->predot = n;
 			return null_match(p);
 
+		case '<':
 			/* a '<' matches zero or more characters of
 			   any type, but stops matching at the last
 			   '.' in the string. */
-		case UCS2_CHAR('<'):
 			if (max_n->predot && max_n->predot <= n) {
 				return null_match(p);
 			}
 			if (max_n->postdot && max_n->postdot <= n && n <= ldot) {
 				return -1;
 			}
-			for (i=0; n[i]; i++) {
+			for (i=0; n[i]; i += size_n) {
+				next_codepoint(n+i, &size_n);
 				if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) return 0;
 				if (n+i == ldot) {
-					if (ms_fnmatch_core(p, n+i+1, max_n+1, ldot) == 0) return 0;
+					if (ms_fnmatch_core(p, n+i+size_n, max_n+1, ldot) == 0) return 0;
 					if (!max_n->postdot || max_n->postdot > n) max_n->postdot = n;
 					return -1;
 				}
@@ -96,39 +101,45 @@ static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n,
 			if (!max_n->predot || max_n->predot > n) max_n->predot = n;
 			return null_match(p);
 
+		case '?':
 			/* a '?' matches any single character */
-		case UCS2_CHAR('?'):
 			if (! *n) {
 				return -1;
 			}
-			n++;
+			next_codepoint(n, &size_n);
+			n += size_n;
 			break;
 
-			/* a '?' matches any single character */
-		case UCS2_CHAR('>'):
-			if (n[0] == UCS2_CHAR('.')) {
+		case '>':
+			/* a '?' matches any single character, but
+			   treats '.' specially */
+			if (n[0] == '.') {
 				if (! n[1] && null_match(p) == 0) {
 					return 0;
 				}
 				break;
 			}
 			if (! *n) return null_match(p);
-			n++;
+			next_codepoint(n, &size_n);
+			n += size_n;
 			break;
 
-		case UCS2_CHAR('"'):
+		case '"':
+			/* a bit like a soft '.' */
 			if (*n == 0 && null_match(p) == 0) {
 				return 0;
 			}
-			if (*n != UCS2_CHAR('.')) return -1;
-			n++;
+			if (*n != '.') return -1;
+			next_codepoint(n, &size_n);
+			n += size_n;
 			break;
 
 		default:
-			if (c != *n && toupper_w(c) != toupper_w(*n)) {
+			c2 = next_codepoint(n, &size_n);
+			if (c != c2 && codepoint_cmpi(c, c2) != 0) {
 				return -1;
 			}
-			n++;
+			n += size_n;
 			break;
 		}
 	}
@@ -142,7 +153,6 @@ static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n,
 
 int ms_fnmatch(const char *pattern, const char *string, enum protocol_types protocol)
 {
-	wpstring p, s;
 	int ret, count, i;
 	struct max_n *max_n = NULL;
 
@@ -156,31 +166,36 @@ int ms_fnmatch(const char *pattern, const char *string, enum protocol_types prot
 		return StrCaseCmp(pattern, string);
 	}
 
-	pstrcpy_wa(p, pattern);
-	pstrcpy_wa(s, string);
-
 	if (protocol <= PROTOCOL_LANMAN2) {
+		char *p = talloc_strdup(NULL, pattern);
+		if (p == NULL) {
+			return -1;
+		}
 		/*
 		  for older negotiated protocols it is possible to
 		  translate the pattern to produce a "new style"
 		  pattern that exactly matches w2k behaviour
 		*/
 		for (i=0;p[i];i++) {
-			if (p[i] == UCS2_CHAR('?')) {
-				p[i] = UCS2_CHAR('>');
-			} else if (p[i] == UCS2_CHAR('.') && 
-				   (p[i+1] == UCS2_CHAR('?') || 
-				    p[i+1] == UCS2_CHAR('*') ||
+			if (p[i] == '?') {
+				p[i] = '>';
+			} else if (p[i] == '.' && 
+				   (p[i+1] == '?' || 
+				    p[i+1] == '*' ||
 				    p[i+1] == 0)) {
-				p[i] = UCS2_CHAR('"');
-			} else if (p[i] == UCS2_CHAR('*') && p[i+1] == UCS2_CHAR('.')) {
-				p[i] = UCS2_CHAR('<');
+				p[i] = '"';
+			} else if (p[i] == '*' && 
+				   p[i+1] == '.') {
+				p[i] = '<';
 			}
 		}
+		ret = ms_fnmatch(p, string, PROTOCOL_NT1);
+		talloc_free(p);
+		return ret;
 	}
 
-	for (count=i=0;p[i];i++) {
-		if (p[i] == UCS2_CHAR('*') || p[i] == UCS2_CHAR('<')) count++;
+	for (count=i=0;pattern[i];i++) {
+		if (pattern[i] == '*' || pattern[i] == '<') count++;
 	}
 
 	max_n = talloc_array_p(NULL, struct max_n, count);
@@ -189,7 +204,7 @@ int ms_fnmatch(const char *pattern, const char *string, enum protocol_types prot
 	}
 	memset(max_n, 0, sizeof(struct max_n) * count);
 
-	ret = ms_fnmatch_core(p, s, max_n, strrchr_w(s, UCS2_CHAR('.')));
+	ret = ms_fnmatch_core(pattern, string, max_n, strrchr(string, '.'));
 
 	talloc_free(max_n);
 
diff --git a/source4/lib/registry/reg_backend_nt4/reg_backend_nt4.c b/source4/lib/registry/reg_backend_nt4/reg_backend_nt4.c
index c271c55991..14fff5b60d 100644
--- a/source4/lib/registry/reg_backend_nt4/reg_backend_nt4.c
+++ b/source4/lib/registry/reg_backend_nt4/reg_backend_nt4.c
@@ -1029,7 +1029,7 @@ static WERROR nk_to_key(TALLOC_CTX *mem_ctx, struct registry_hive *h, NK_HDR *nk
 	uint_t nk_id;
 	SK_HDR *sk_hdr;
 	int type;
-	char key_name[1024], cls_name[1024];
+	char key_name[1024];
 
 	if (!nk_hdr) return WERR_INVALID_PARAM;
 
@@ -1086,16 +1086,16 @@ static WERROR nk_to_key(TALLOC_CTX *mem_ctx, struct registry_hive *h, NK_HDR *nk
 	 */
 
 	if (clsname_len) { /* Just print in Ascii for now */
-		smb_ucs2_t *clsnamep;
+		void *clsnamep;
 		int clsnam_off;
 
 		clsnam_off = IVAL(&nk_hdr->clsnam_off,0);
-		clsnamep = (smb_ucs2_t *)LOCN(regf->base, clsnam_off);
+		clsnamep = LOCN(regf->base, clsnam_off);
 		DEBUG(2, ("Class Name Offset: %0X\n", clsnam_off));
 
 		pull_ucs2_talloc(mem_ctx, &tmp->class_name, clsnamep);
 
-		DEBUGADD(2,("  Class Name: %s\n", cls_name));
+		DEBUGADD(2,("  Class Name: %s\n", tmp->class_name));
 
 	}
 
diff --git a/source4/lib/util_str.c b/source4/lib/util_str.c
index 0e58face16..c4c68a3dcd 100644
--- a/source4/lib/util_str.c
+++ b/source4/lib/util_str.c
@@ -74,54 +74,37 @@ BOOL next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
 	return(True);
 }
 
-static uint16_t tmpbuf[sizeof(pstring)];
-
-
 /**
- Case insensitive string compararison.
+ Case insensitive string compararison
 **/
-static int StrCaseCmp_slow(const char *s1, const char *s2)
+int StrCaseCmp(const char *s1, const char *s2)
 {
-	smb_ucs2_t *u1 = NULL;
-	smb_ucs2_t *u2;
-	int ret;
-
-	if (convert_string_talloc(NULL, CH_UNIX, CH_UTF16, s1, strlen(s1)+1, (void **)&u1) == -1 ||
-	    convert_string_talloc(u1, CH_UNIX, CH_UTF16, s2, strlen(s2)+1, (void **)&u2) == -1) {
-		talloc_free(u1);
-		/* fallback to a simple comparison */
-		return strcasecmp(s1, s2);
-	}
+	codepoint_t c1=0, c2=0;
+	size_t size1, size2;
 
-	ret = strcasecmp_w(u1, u2);
+	while (*s1 && *s2) {
+		c1 = next_codepoint(s1, &size1);
+		c2 = next_codepoint(s2, &size2);
 
-	talloc_free(u1);
+		s1 += size1;
+		s2 += size2;
 
-	return ret;
-}
+		if (c1 == c2) {
+			continue;
+		}
 
-/**
- Case insensitive string compararison, accelerated version
-**/
-int StrCaseCmp(const char *s1, const char *s2)
-{
-	while (*s1 && *s2 &&
-	       (*s1 & 0x80) == 0 && 
-	       (*s2 & 0x80) == 0) {
-		char u1 = toupper(*s1);
-		char u2 = toupper(*s2);
-		if (u1 != u2) {
-			return u1 - u2;
+		if (c1 == INVALID_CODEPOINT ||
+		    c2 == INVALID_CODEPOINT) {
+			/* what else can we do?? */
+			return c1 - c2;
 		}
-		s1++;
-		s2++;
-	}
 
-	if (*s1 == 0 || *s2 == 0) {
-		return *s1 - *s2;
+		if (toupper_w(c1) != toupper_w(c2)) {
+			return c1 - c2;
+		}
 	}
 
-	return StrCaseCmp_slow(s1, s2);
+	return *s1 - *s2;
 }
 
 /**
@@ -136,27 +119,26 @@ BOOL strequal(const char *s1, const char *s2)
 	if (!s1 || !s2)
 		return(False);
   
-	return(StrCaseCmp(s1,s2)==0);
+	return StrCaseCmp(s1,s2) == 0;
 }
 
 /**
  Compare 2 strings (case sensitive).
 **/
-
 BOOL strcsequal(const char *s1,const char *s2)
 {
-  if (s1 == s2)
-	  return(True);
-  if (!s1 || !s2)
-	  return(False);
-  
-  return(strcmp(s1,s2)==0);
+	if (s1 == s2)
+		return(True);
+	if (!s1 || !s2)
+		return(False);
+	
+	return strcmp(s1,s2) == 0;
 }
 
+
 /**
 Do a case-insensitive, whitespace-ignoring string compare.
 **/
-
 int strwicmp(const char *psz1, const char *psz2)
 {
 	/* if BOTH strings are NULL, return TRUE, if ONE is NULL return */
@@ -187,20 +169,21 @@ int strwicmp(const char *psz1, const char *psz2)
  String replace.
  NOTE: oldc and newc must be 7 bit characters
 **/
-
-void string_replace(char *s,char oldc,char newc)
+void string_replace(char *s, char oldc, char newc)
 {
-	if (strchr(s, oldc)) {
-		push_ucs2(tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
-		string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc));
-		pull_ucs2(s, tmpbuf, strlen(s)+1, sizeof(tmpbuf), STR_TERMINATE);
+	while (*s) {
+		size_t size;
+		codepoint_t c = next_codepoint(s, &size);
+		if (c == oldc) {
+			*s = newc;
+		}
+		s += size;
 	}
 }
 
 /**
  Trim the specified elements off the front and back of a string.
 **/
-
 BOOL trim_string(char *s,const char *front,const char *back)
 {
 	BOOL ret = False;
@@ -238,60 +221,26 @@ BOOL trim_string(char *s,const char *front,const char *back)
 }
 
 /**
- Does a string have any uppercase chars in it?
-**/
-
-BOOL strhasupper(const char *s)
-{
-	smb_ucs2_t *ptr;
-	push_ucs2(tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
-	for(ptr=tmpbuf;*ptr;ptr++)
-		if(isupper_w(*ptr))
-			return True;
-	return(False);
-}
-
-/**
- Does a string have any lowercase chars in it?
-**/
-
-BOOL strhaslower(const char *s)
-{
-	smb_ucs2_t *ptr;
-	push_ucs2(tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
-	for(ptr=tmpbuf;*ptr;ptr++)
-		if(islower_w(*ptr))
-			return True;
-	return(False);
-}
-
-/**
  Find the number of 'c' chars in a string
 **/
-
-size_t count_chars(const char *s,char c)
+size_t count_chars(const char *s, char c)
 {
-	smb_ucs2_t *ptr;
-	int count;
-	smb_ucs2_t *alloc_tmpbuf = NULL;
+	size_t count = 0;
 
-	if (push_ucs2_talloc(NULL, &alloc_tmpbuf, s) == (size_t)-1) {
-		return 0;
+	while (*s) {
+		size_t size;
+		codepoint_t c2 = next_codepoint(s, &size);
+		if (c2 == c) count++;
+		s += size;
 	}
 
-	for(count=0,ptr=alloc_tmpbuf;*ptr;ptr++)
-		if(*ptr==UCS2_CHAR(c))
-			count++;
-
-	talloc_free(alloc_tmpbuf);
-	return(count);
+	return count;
 }
 
 /**
  Safe string copy into a known length string. maxlength does not
  include the terminating zero.
 **/
-
 char *safe_strcpy(char *dest,const char *src, size_t maxlength)
 {
 	size_t len;
@@ -334,7 +283,6 @@ char *safe_strcpy(char *dest,const char *src, size_t maxlength)
  Safe string cat into a string. maxlength does not
  include the terminating zero.
 **/
-
 char *safe_strcat(char *dest, const char *src, size_t maxlength)
 {
 	size_t src_len, dest_len;
@@ -446,7 +394,6 @@ char *StrnCpy(char *dest,const char *src,size_t n)
  valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
 
 **/
-
 size_t strhex_to_str(char *p, size_t len, const char *strhex)
 {
 	size_t i;
@@ -493,10 +440,10 @@ DATA_BLOB strhex_to_data_blob(const char *strhex)
 	return ret_blob;
 }
 
+
 /**
  * Routine to print a buffer as HEX digits, into an allocated string.
  */
-
 void hex_encode(const unsigned char *buff_in, size_t len, char **out_hex_buffer)
 {
 	int i;
@@ -512,7 +459,6 @@ void hex_encode(const unsigned char *buff_in, size_t len, char **out_hex_buffer)
 /**
  Check if a string is part of a list.
 **/
-
 BOOL in_list(const char *s, const char *list, BOOL casesensitive)
 {
 	pstring tok;
@@ -681,36 +627,31 @@ const char *octal_string(int i)
 
 
 /**
- Strchr and strrchr_m are very hard to do on general multi-byte strings. 
- We convert via ucs2 for now.
+ Strchr and strrchr_m are a bit complex on general multi-byte strings. 
 **/
-
 char *strchr_m(const char *s, char c)
 {
-	wpstring ws;
-	pstring s2;
-	smb_ucs2_t *p;
-
 	/* characters below 0x3F are guaranteed to not appear in
 	   non-initial position in multi-byte charsets */
 	if ((c & 0xC0) == 0) {
 		return strchr(s, c);
 	}
 
-	push_ucs2(ws, s, sizeof(ws), STR_TERMINATE);
-	p = strchr_w(ws, UCS2_CHAR(c));
-	if (!p)
-		return NULL;
-	*p = 0;
-	pull_ucs2_pstring(s2, ws);
-	return discard_const_p(char, s+strlen(s2));
+	while (*s) {
+		size_t size;
+		codepoint_t c2 = next_codepoint(s, &size);
+		if (c2 == c) {
+			return discard_const(s);
+		}
+		s += size;
+	}
+
+	return NULL;
 }
 
 char *strrchr_m(const char *s, char c)
 {
-	wpstring ws;
-	pstring s2;
-	smb_ucs2_t *p;
+	char *ret = NULL;
 
 	/* characters below 0x3F are guaranteed to not appear in
 	   non-initial position in multi-byte charsets */
@@ -718,69 +659,99 @@ char *strrchr_m(const char *s, char c)
 		return strrchr(s, c);
 	}
 
-	push_ucs2(ws, s, sizeof(ws), STR_TERMINATE);
-	p = strrchr_w(ws, UCS2_CHAR(c));
-	if (!p)
-		return NULL;
-	*p = 0;
-	pull_ucs2_pstring(s2, ws);
-	return discard_const_p(char, s+strlen(s2));
+	while (*s) {
+		size_t size;
+		codepoint_t c2 = next_codepoint(s, &size);
+		if (c2 == c) {
+			ret = discard_const(s);
+		}
+		s += size;
+	}
+
+	return ret;
 }
 
 /**
  Convert a string to lower case, allocated with talloc
 **/
-
 char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 {
-	size_t size;
-	smb_ucs2_t *buffer;
+	size_t size=0;
 	char *dest;
 
-	size = push_ucs2_talloc(ctx, &buffer, src);
-	if (size == -1) {
+	/* this takes advantage of the fact that upper/lower can't
+	   change the length of a character by more than 1 byte */
+	dest = talloc(ctx, 2*(strlen(src))+1);
+	if (dest == NULL) {
 		return NULL;
 	}
-	strlower_w(buffer);
 
-	size = pull_ucs2_talloc(ctx, &dest, buffer);
-	talloc_free(buffer);
+	while (*src) {
+		size_t c_size;
+		codepoint_t c = next_codepoint(src, &c_size);
+		src += c_size;
+
+		c = tolower_w(c);
+
+		c_size = push_codepoint(dest+size, c);
+		if (c_size == -1) {
+			talloc_free(dest);
+			return NULL;
+		}
+		size += c_size;
+	}
+
+	dest[size] = 0;
+
 	return dest;
 }
 
 /**
  Convert a string to UPPER case, allocated with talloc
 **/
-
 char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 {
-	size_t size;
-	smb_ucs2_t *buffer;
+	size_t size=0;
 	char *dest;
 
-	size = push_ucs2_talloc(ctx, &buffer, src);
-	if (size == -1) {
+	/* this takes advantage of the fact that upper/lower can't
+	   change the length of a character by more than 1 byte */
+	dest = talloc(ctx, 2*(strlen(src))+1);
+	if (dest == NULL) {
 		return NULL;
 	}
-	strupper_w(buffer);
 
-	size = pull_ucs2_talloc(ctx, &dest, buffer);
-	talloc_free(buffer);
+	while (*src) {
+		size_t c_size;
+		codepoint_t c = next_codepoint(src, &c_size);
+		src += c_size;
+
+		c = toupper_w(c);
+
+		c_size = push_codepoint(dest+size, c);
+		if (c_size == -1) {
+			talloc_free(dest);
+			return NULL;
+		}
+		size += c_size;
+	}
+
+	dest[size] = 0;
+
 	return dest;
 }
 
 /**
  Convert a string to lower case.
 **/
-
 void strlower_m(char *s)
 {
-	char *lower;
+	char *d;
+
 	/* this is quite a common operation, so we want it to be
 	   fast. We optimise for the ascii case, knowing that all our
 	   supported multi-byte character sets are ascii-compatible
 	   (ie. they match for the first 128 chars) */
-
 	while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 		*s = tolower((uint8_t)*s);
 		s++;
@@ -789,27 +760,32 @@ void strlower_m(char *s)
 	if (!*s)
 		return;
 
-	/* I assume that lowercased string takes the same number of bytes
-	 * as source string even in UTF-8 encoding. (VIV) */
-	lower = strlower_talloc(NULL, s);
-	if (lower) {
-		safe_strcpy(s, lower, strlen(s));
+	d = s;
+
+	while (*s) {
+		size_t c_size, c_size2;
+		codepoint_t c = next_codepoint(s, &c_size);
+		c_size2 = push_codepoint(d, tolower_w(c));
+		if (c_size2 > c_size) {
+			smb_panic("codepoint expansion in strlower_m\n");
+		}
+		s += c_size;
+		d += c_size2;
 	}
-	talloc_free(lower);
+	*d = 0;
 }
 
 /**
  Convert a string to UPPER case.
 **/
-
 void strupper_m(char *s)
 {
-	char *upper;
+	char *d;
+
 	/* this is quite a common operation, so we want it to be
 	   fast. We optimise for the ascii case, knowing that all our
 	   supported multi-byte character sets are ascii-compatible
 	   (ie. they match for the first 128 chars) */
-
 	while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 		*s = toupper((uint8_t)*s);
 		s++;
@@ -818,13 +794,19 @@ void strupper_m(char *s)
 	if (!*s)
 		return;
 
-	/* I assume that uppercased string takes the same number of bytes
-	 * as source string even in UTF-8 encoding. (VIV) */
-	upper = strupper_talloc(NULL, s);
-	if (upper) {
-		safe_strcpy(s, upper, strlen(s));
+	d = s;
+
+	while (*s) {
+		size_t c_size, c_size2;
+		codepoint_t c = next_codepoint(s, &c_size);
+		c_size2 = push_codepoint(d, toupper_w(c));
+		if (c_size2 > c_size) {
+			smb_panic("codepoint expansion in strupper_m\n");
+		}
+		s += c_size;
+		d += c_size2;
 	}
-	talloc_free(upper);
+	*d = 0;
 }
 
 /**
@@ -832,13 +814,9 @@ void strupper_m(char *s)
  be the same as the number of bytes in a string for single byte strings,
  but will be different for multibyte.
 **/
-
 size_t strlen_m(const char *s)
 {
 	size_t count = 0;
-	smb_ucs2_t *tmp;
-
-	size_t len;
 
 	if (!s) {
 		return 0;
@@ -853,12 +831,18 @@ size_t strlen_m(const char *s)
 		return count;
 	}
 
-	SMB_ASSERT(push_ucs2_talloc(NULL, &tmp, s) != -1);
-
-	len = count + strlen_w(tmp);
-	talloc_free(tmp);
+	while (*s) {
+		size_t c_size;
+		codepoint_t c = next_codepoint(s, &c_size);
+		if (c < 0x10000) {
+			count += 1;
+		} else {
+			count += 2;
+		}
+		s += c_size;
+	}
 
-	return len;
+	return count;
 }
 
 /**
@@ -879,7 +863,6 @@ size_t strlen_m_term(const char *s)
  Used in LDAP filters.
  Caller must free.
 **/
-
 char *binary_string(char *buf, int len)
 {
 	char *s;
@@ -898,21 +881,6 @@ char *binary_string(char *buf, int len)
 	return s;
 }
 
-/**
- Just a typesafety wrapper for snprintf into a pstring.
-**/
-
- int pstr_sprintf(pstring s, const char *fmt, ...)
-{
-	va_list ap;
-	int ret;
-
-	va_start(ap, fmt);
-	ret = vsnprintf(s, PSTRING_LEN, fmt, ap);
-	va_end(ap);
-	return ret;
-}
-
 #ifndef HAVE_STRNDUP
 /**
  Some platforms don't have strndup.
@@ -945,305 +913,6 @@ char *binary_string(char *buf, int len)
 }
 #endif
 
-/**
- List of Strings manipulation functions
-**/
-
-#define S_LIST_ABS 16 /* List Allocation Block Size */
-
-char **str_list_make(const char *string, const char *sep)
-{
-	char **list, **rlist;
-	const char *str;
-	char *s;
-	int num, lsize;
-	pstring tok;
-	
-	if (!string || !*string)
-		return NULL;
-	s = strdup(string);
-	if (!s) {
-		DEBUG(0,("str_list_make: Unable to allocate memory"));
-		return NULL;
-	}
-	if (!sep) sep = LIST_SEP;
-	
-	num = lsize = 0;
-	list = NULL;
-	
-	str = s;
-	while (next_token(&str, tok, sep, sizeof(tok))) {		
-		if (num == lsize) {
-			lsize += S_LIST_ABS;
-			rlist = (char **)Realloc(list, ((sizeof(char **)) * (lsize +1)));
-			if (!rlist) {
-				DEBUG(0,("str_list_make: Unable to allocate memory"));
-				str_list_free(&list);
-				SAFE_FREE(s);
-				return NULL;
-			} else
-				list = rlist;
-			memset (&list[num], 0, ((sizeof(char**)) * (S_LIST_ABS +1)));
-		}
-		
-		list[num] = strdup(tok);
-		if (!list[num]) {
-			DEBUG(0,("str_list_make: Unable to allocate memory"));
-			str_list_free(&list);
-			SAFE_FREE(s);
-			return NULL;
-		}
-	
-		num++;	
-	}
-	
-	SAFE_FREE(s);
-	return list;
-}
-
-BOOL str_list_copy(char ***dest, const char **src)
-{
-	char **list, **rlist;
-	int num, lsize;
-	
-	*dest = NULL;
-	if (!src)
-		return False;
-	
-	num = lsize = 0;
-	list = NULL;
-		
-	while (src[num]) {
-		if (num == lsize) {
-			lsize += S_LIST_ABS;
-			rlist = (char **)Realloc(list, ((sizeof(char **)) * (lsize +1)));
-			if (!rlist) {
-				DEBUG(0,("str_list_copy: Unable to re-allocate memory"));
-				str_list_free(&list);
-				return False;
-			} else
-				list = rlist;
-			memset (&list[num], 0, ((sizeof(char **)) * (S_LIST_ABS +1)));
-		}
-		
-		list[num] = strdup(src[num]);
-		if (!list[num]) {
-			DEBUG(0,("str_list_copy: Unable to allocate memory"));
-			str_list_free(&list);
-			return False;
-		}
-
-		num++;
-	}
-	
-	*dest = list;
-	return True;	
-}
-
-/**
- * Return true if all the elements of the list match exactly.
- **/
-BOOL str_list_compare(char **list1, char **list2)
-{
-	int num;
-	
-	if (!list1 || !list2)
-		return (list1 == list2); 
-	
-	for (num = 0; list1[num]; num++) {
-		if (!list2[num])
-			return False;
-		if (!strcsequal(list1[num], list2[num]))
-			return False;
-	}
-	if (list2[num])
-		return False; /* if list2 has more elements than list1 fail */
-	
-	return True;
-}
-
-void str_list_free(char ***list)
-{
-	char **tlist;
-	
-	if (!list || !*list)
-		return;
-	tlist = *list;
-	for(; *tlist; tlist++)
-		SAFE_FREE(*tlist);
-	SAFE_FREE(*list);
-}
-
-BOOL str_list_substitute(char **list, const char *pattern, const char *insert)
-{
-	char *p, *s, *t;
-	ssize_t ls, lp, li, ld, i, d;
-
-	if (!list)
-		return False;
-	if (!pattern)
-		return False;
-	if (!insert)
-		return False;
-
-	lp = (ssize_t)strlen(pattern);
-	li = (ssize_t)strlen(insert);
-	ld = li -lp;
-			
-	while (*list) {
-		s = *list;
-		ls = (ssize_t)strlen(s);
-
-		while ((p = strstr(s, pattern))) {
-			t = *list;
-			d = p -t;
-			if (ld) {
-				t = (char *) malloc(ls +ld +1);
-				if (!t) {
-					DEBUG(0,("str_list_substitute: Unable to allocate memory"));
-					return False;
-				}
-				memcpy(t, *list, d);
-				memcpy(t +d +li, p +lp, ls -d -lp +1);
-				SAFE_FREE(*list);
-				*list = t;
-				ls += ld;
-				s = t +d +li;
-			}
-			
-			for (i = 0; i < li; i++) {
-				switch (insert[i]) {
-					case '`':
-					case '"':
-					case '\'':
-					case ';':
-					case '$':
-					case '%':
-					case '\r':
-					case '\n':
-						t[d +i] = '_';
-						break;
-					default:
-						t[d +i] = insert[i];
-				}
-			}	
-		}
-		
-		list++;
-	}
-	
-	return True;
-}
-
-
-#define IPSTR_LIST_SEP	","
-
-/**
- * Add ip string representation to ipstr list. Used also
- * as part of @function ipstr_list_make
- *
- * @param ipstr_list pointer to string containing ip list;
- *        MUST BE already allocated and IS reallocated if necessary
- * @param ipstr_size pointer to current size of ipstr_list (might be changed
- *        as a result of reallocation)
- * @param ip IP address which is to be added to list
- * @return pointer to string appended with new ip and possibly
- *         reallocated to new length
- **/
-
-char* ipstr_list_add(char** ipstr_list, const struct in_addr *ip)
-{
-	char* new_ipstr = NULL;
-	
-	/* arguments checking */
-	if (!ipstr_list || !ip) return NULL;
-
-	/* attempt to convert ip to a string and append colon separator to it */
-	if (*ipstr_list) {
-		asprintf(&new_ipstr, "%s%s%s", *ipstr_list, IPSTR_LIST_SEP,inet_ntoa(*ip));
-		SAFE_FREE(*ipstr_list);
-	} else {
-		asprintf(&new_ipstr, "%s", inet_ntoa(*ip));
-	}
-	*ipstr_list = new_ipstr;
-	return *ipstr_list;
-}
-
-/**
- * Allocate and initialise an ipstr list using ip adresses
- * passed as arguments.
- *
- * @param ipstr_list pointer to string meant to be allocated and set
- * @param ip_list array of ip addresses to place in the list
- * @param ip_count number of addresses stored in ip_list
- * @return pointer to allocated ip string
- **/
- 
-char* ipstr_list_make(char** ipstr_list, const struct in_addr* ip_list, int ip_count)
-{
-	int i;
-	
-	/* arguments checking */
-	if (!ip_list && !ipstr_list) return 0;
-
-	*ipstr_list = NULL;
-	
-	/* process ip addresses given as arguments */
-	for (i = 0; i < ip_count; i++)
-		*ipstr_list = ipstr_list_add(ipstr_list, &ip_list[i]);
-	
-	return (*ipstr_list);
-}
-
-
-/**
- * Parse given ip string list into array of ip addresses
- * (as in_addr structures)
- *
- * @param ipstr ip string list to be parsed 
- * @param ip_list pointer to array of ip addresses which is
- *        allocated by this function and must be freed by caller
- * @return number of succesfully parsed addresses
- **/
- 
-int ipstr_list_parse(const char* ipstr_list, struct in_addr** ip_list)
-{
-	fstring token_str;
-	int count;
-
-	if (!ipstr_list || !ip_list) return 0;
-	
-	for (*ip_list = NULL, count = 0;
-	     next_token(&ipstr_list, token_str, IPSTR_LIST_SEP, FSTRING_LEN);
-	     count++) {
-	     
-		struct in_addr addr;
-
-		/* convert single token to ip address */
-		if ( (addr.s_addr = inet_addr(token_str)) == INADDR_NONE )
-			break;
-		
-		/* prepare place for another in_addr structure */
-		*ip_list = Realloc(*ip_list, (count + 1) * sizeof(struct in_addr));
-		if (!*ip_list) return -1;
-		
-		(*ip_list)[count] = addr;
-	}
-	
-	return count;
-}
-
-
-/**
- * Safely free ip string list
- *
- * @param ipstr_list ip string list to be freed
- **/
-
-void ipstr_list_free(char* ipstr_list)
-{
-	SAFE_FREE(ipstr_list);
-}
 
 /**
  Unescape a URL encoded string, in place.
diff --git a/source4/lib/util_strlist.c b/source4/lib/util_strlist.c
new file mode 100644
index 0000000000..12fb0946e2
--- /dev/null
+++ b/source4/lib/util_strlist.c
@@ -0,0 +1,323 @@
+/* 
+   Unix SMB/CIFS implementation.
+   
+   Copyright (C) Andrew Tridgell 1992-2004
+   Copyright (C) Simo Sorce      2001-2002
+   Copyright (C) Martin Pool     2003
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+/**
+ List of Strings manipulation functions
+**/
+
+#define S_LIST_ABS 16 /* List Allocation Block Size */
+
+char **str_list_make(const char *string, const char *sep)
+{
+	char **list, **rlist;
+	const char *str;
+	char *s;
+	int num, lsize;
+	pstring tok;
+	
+	if (!string || !*string)
+		return NULL;
+	s = strdup(string);
+	if (!s) {
+		DEBUG(0,("str_list_make: Unable to allocate memory"));
+		return NULL;
+	}
+	if (!sep) sep = LIST_SEP;
+	
+	num = lsize = 0;
+	list = NULL;
+	
+	str = s;
+	while (next_token(&str, tok, sep, sizeof(tok))) {		
+		if (num == lsize) {
+			lsize += S_LIST_ABS;
+			rlist = (char **)Realloc(list, ((sizeof(char **)) * (lsize +1)));
+			if (!rlist) {
+				DEBUG(0,("str_list_make: Unable to allocate memory"));
+				str_list_free(&list);
+				SAFE_FREE(s);
+				return NULL;
+			} else
+				list = rlist;
+			memset (&list[num], 0, ((sizeof(char**)) * (S_LIST_ABS +1)));
+		}
+		
+		list[num] = strdup(tok);
+		if (!list[num]) {
+			DEBUG(0,("str_list_make: Unable to allocate memory"));
+			str_list_free(&list);
+			SAFE_FREE(s);
+			return NULL;
+		}
+	
+		num++;	
+	}
+	
+	SAFE_FREE(s);
+	return list;
+}
+
+BOOL str_list_copy(char ***dest, const char **src)
+{
+	char **list, **rlist;
+	int num, lsize;
+	
+	*dest = NULL;
+	if (!src)
+		return False;
+	
+	num = lsize = 0;
+	list = NULL;
+		
+	while (src[num]) {
+		if (num == lsize) {
+			lsize += S_LIST_ABS;
+			rlist = (char **)Realloc(list, ((sizeof(char **)) * (lsize +1)));
+			if (!rlist) {
+				DEBUG(0,("str_list_copy: Unable to re-allocate memory"));
+				str_list_free(&list);
+				return False;
+			} else
+				list = rlist;
+			memset (&list[num], 0, ((sizeof(char **)) * (S_LIST_ABS +1)));
+		}
+		
+		list[num] = strdup(src[num]);
+		if (!list[num]) {
+			DEBUG(0,("str_list_copy: Unable to allocate memory"));
+			str_list_free(&list);
+			return False;
+		}
+
+		num++;
+	}
+	
+	*dest = list;
+	return True;	
+}
+
+/**
+   Return true if all the elements of the list match exactly.
+ **/
+BOOL str_list_compare(char **list1, char **list2)
+{
+	int num;
+	
+	if (!list1 || !list2)
+		return (list1 == list2); 
+	
+	for (num = 0; list1[num]; num++) {
+		if (!list2[num])
+			return False;
+		if (!strcsequal(list1[num], list2[num]))
+			return False;
+	}
+	if (list2[num])
+		return False; /* if list2 has more elements than list1 fail */
+	
+	return True;
+}
+
+void str_list_free(char ***list)
+{
+	char **tlist;
+	
+	if (!list || !*list)
+		return;
+	tlist = *list;
+	for(; *tlist; tlist++)
+		SAFE_FREE(*tlist);
+	SAFE_FREE(*list);
+}
+
+BOOL str_list_substitute(char **list, const char *pattern, const char *insert)
+{
+	char *p, *s, *t;
+	ssize_t ls, lp, li, ld, i, d;
+
+	if (!list)
+		return False;
+	if (!pattern)
+		return False;
+	if (!insert)
+		return False;
+
+	lp = (ssize_t)strlen(pattern);
+	li = (ssize_t)strlen(insert);
+	ld = li -lp;
+			
+	while (*list) {
+		s = *list;
+		ls = (ssize_t)strlen(s);
+
+		while ((p = strstr(s, pattern))) {
+			t = *list;
+			d = p -t;
+			if (ld) {
+				t = (char *) malloc(ls +ld +1);
+				if (!t) {
+					DEBUG(0,("str_list_substitute: Unable to allocate memory"));
+					return False;
+				}
+				memcpy(t, *list, d);
+				memcpy(t +d +li, p +lp, ls -d -lp +1);
+				SAFE_FREE(*list);
+				*list = t;
+				ls += ld;
+				s = t +d +li;
+			}
+			
+			for (i = 0; i < li; i++) {
+				switch (insert[i]) {
+					case '`':
+					case '"':
+					case '\'':
+					case ';':
+					case '$':
+					case '%':
+					case '\r':
+					case '\n':
+						t[d +i] = '_';
+						break;
+					default:
+						t[d +i] = insert[i];
+				}
+			}	
+		}
+		
+		list++;
+	}
+	
+	return True;
+}
+
+
+#define IPSTR_LIST_SEP	","
+
+/**
+ * Add ip string representation to ipstr list. Used also
+ * as part of @function ipstr_list_make
+ *
+ * @param ipstr_list pointer to string containing ip list;
+ *        MUST BE already allocated and IS reallocated if necessary
+ * @param ipstr_size pointer to current size of ipstr_list (might be changed
+ *        as a result of reallocation)
+ * @param ip IP address which is to be added to list
+ * @return pointer to string appended with new ip and possibly
+ *         reallocated to new length
+ **/
+
+char* ipstr_list_add(char** ipstr_list, const struct in_addr *ip)
+{
+	char* new_ipstr = NULL;
+	
+	/* arguments checking */
+	if (!ipstr_list || !ip) return NULL;
+
+	/* attempt to convert ip to a string and append colon separator to it */
+	if (*ipstr_list) {
+		asprintf(&new_ipstr, "%s%s%s", *ipstr_list, IPSTR_LIST_SEP,inet_ntoa(*ip));
+		SAFE_FREE(*ipstr_list);
+	} else {
+		asprintf(&new_ipstr, "%s", inet_ntoa(*ip));
+	}
+	*ipstr_list = new_ipstr;
+	return *ipstr_list;
+}
+
+/**
+ * Allocate and initialise an ipstr list using ip adresses
+ * passed as arguments.
+ *
+ * @param ipstr_list pointer to string meant to be allocated and set
+ * @param ip_list array of ip addresses to place in the list
+ * @param ip_count number of addresses stored in ip_list
+ * @return pointer to allocated ip string
+ **/
+ 
+char* ipstr_list_make(char** ipstr_list, const struct in_addr* ip_list, int ip_count)
+{
+	int i;
+	
+	/* arguments checking */
+	if (!ip_list && !ipstr_list) return 0;
+
+	*ipstr_list = NULL;
+	
+	/* process ip addresses given as arguments */
+	for (i = 0; i < ip_count; i++)
+		*ipstr_list = ipstr_list_add(ipstr_list, &ip_list[i]);
+	
+	return (*ipstr_list);
+}
+
+
+/**
+ * Parse given ip string list into array of ip addresses
+ * (as in_addr structures)
+ *
+ * @param ipstr ip string list to be parsed 
+ * @param ip_list pointer to array of ip addresses which is
+ *        allocated by this function and must be freed by caller
+ * @return number of succesfully parsed addresses
+ **/
+ 
+int ipstr_list_parse(const char* ipstr_list, struct in_addr** ip_list)
+{
+	fstring token_str;
+	int count;
+
+	if (!ipstr_list || !ip_list) return 0;
+	
+	for (*ip_list = NULL, count = 0;
+	     next_token(&ipstr_list, token_str, IPSTR_LIST_SEP, FSTRING_LEN);
+	     count++) {
+	     
+		struct in_addr addr;
+
+		/* convert single token to ip address */
+		if ( (addr.s_addr = inet_addr(token_str)) == INADDR_NONE )
+			break;
+		
+		/* prepare place for another in_addr structure */
+		*ip_list = Realloc(*ip_list, (count + 1) * sizeof(struct in_addr));
+		if (!*ip_list) return -1;
+		
+		(*ip_list)[count] = addr;
+	}
+	
+	return count;
+}
+
+
+/**
+ * Safely free ip string list
+ *
+ * @param ipstr_list ip string list to be freed
+ **/
+
+void ipstr_list_free(char* ipstr_list)
+{
+	SAFE_FREE(ipstr_list);
+}
diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c
index 713f50708b..8b7504986f 100644
--- a/source4/lib/util_unistr.c
+++ b/source4/lib/util_unistr.c
@@ -21,16 +21,16 @@
 
 #include "includes.h"
 
-/* these 3 tables define the unicode case handling.  They are loaded
+/* these 2 tables define the unicode case handling.  They are loaded
    at startup either via mmap() or read() from the lib directory */
-static smb_ucs2_t *upcase_table;
-static smb_ucs2_t *lowcase_table;
+static void *upcase_table;
+static void *lowcase_table;
 
 
 /*******************************************************************
 load the case handling tables
 ********************************************************************/
-void load_case_tables(void)
+static void load_case_tables(void)
 {
 	static int initialised;
 	int i;
@@ -56,14 +56,10 @@ void load_case_tables(void)
 			smb_panic("No memory for upcase tables");
 		}
 		for (i=0;i<0x10000;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, i);
-			upcase_table[v] = i;
+			SSVAL(upcase_table, i*2, i);
 		}
 		for (i=0;i<256;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, UCS2_CHAR(i));
-			upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i);
+			SSVAL(upcase_table, i*2, islower(i)?toupper(i):i);
 		}
 	}
 
@@ -74,199 +70,79 @@ void load_case_tables(void)
 			smb_panic("No memory for lowcase tables");
 		}
 		for (i=0;i<0x10000;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, i);
-			lowcase_table[v] = i;
+			SSVAL(lowcase_table, i*2, i);
 		}
 		for (i=0;i<256;i++) {
-			smb_ucs2_t v;
-			SSVAL(&v, 0, UCS2_CHAR(i));
-			lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i);
+			SSVAL(lowcase_table, i*2, isupper(i)?tolower(i):i);
 		}
 	}
 }
 
 /*******************************************************************
- Convert a wchar to upper case.
+ Convert a codepoint_t to upper case.
 ********************************************************************/
-smb_ucs2_t toupper_w(smb_ucs2_t val)
+codepoint_t toupper_w(codepoint_t val)
 {
-	return upcase_table[SVAL(&val,0)];
-}
-
-/*******************************************************************
- Convert a wchar to lower case.
-********************************************************************/
-static smb_ucs2_t tolower_w( smb_ucs2_t val )
-{
-	return lowcase_table[SVAL(&val,0)];
-
-}
-
-/*******************************************************************
-determine if a character is lowercase
-********************************************************************/
-BOOL islower_w(smb_ucs2_t c)
-{
-	return upcase_table[SVAL(&c,0)] != c;
+	if (val & 0xFFFF0000) {
+		return val;
+	}
+	if (val < 128) {
+		return toupper(val);
+	}
+	if (upcase_table == NULL) {
+		load_case_tables();
+	}
+	return SVAL(upcase_table, val*2);
 }
 
 /*******************************************************************
-determine if a character is uppercase
+ Convert a codepoint_t to lower case.
 ********************************************************************/
-BOOL isupper_w(smb_ucs2_t c)
+codepoint_t tolower_w(codepoint_t val)
 {
-	return lowcase_table[SVAL(&c,0)] != c;
+	if (val & 0xFFFF0000) {
+		return val;
+	}
+	if (val < 128) {
+		return tolower(val);
+	}
+	if (lowcase_table == NULL) {
+		load_case_tables();
+	}
+	return SVAL(lowcase_table, val*2);
 }
 
-
 /*******************************************************************
- Count the number of characters in a smb_ucs2_t string.
+return the number of bytes occupied by a buffer in CH_UTF16 format
+the result includes the null termination
 ********************************************************************/
-size_t strlen_w(const smb_ucs2_t *src)
+size_t utf16_len(const void *buf)
 {
 	size_t len;
 
-	for (len = 0; SVAL(src,0); len++, src++) ;
+	for (len = 0; SVAL(buf,len); len += 2) ;
 
-	return len;
+	return len + 2;
 }
 
 /*******************************************************************
- Count up to max number of characters in a smb_ucs2_t string.
+return the number of bytes occupied by a buffer in CH_UTF16 format
+the result includes the null termination
+limited by 'n' bytes
 ********************************************************************/
-size_t strnlen_w(const smb_ucs2_t *src, size_t max)
+size_t utf16_len_n(const void *src, size_t n)
 {
 	size_t len;
 
-	for (len = 0; (len < max) && SVAL(src, 0); len++, src++) ;
-
-	return len;
-}
-
-/*******************************************************************
-wide strchr()
-********************************************************************/
-smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
-{
-	while (*s != 0) {
-		if (c == *s) return discard_const_p(smb_ucs2_t, s);
-		s++;
-	}
-	if (c == *s) return discard_const_p(smb_ucs2_t, s);
-
-	return NULL;
-}
-
-smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
-{
-	return strchr_w(s, UCS2_CHAR(c));
-}
-
-smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
-{
-	const smb_ucs2_t *p = s;
-	int len = strlen_w(s);
-	if (len == 0) return NULL;
-	p += (len - 1);
-	do {
-		if (c == *p) return discard_const_p(smb_ucs2_t, p);
-	} while (p-- != s);
-	return NULL;
-}
-
-/*******************************************************************
- Convert a string to lower case.
- return True if any char is converted
-********************************************************************/
-BOOL strlower_w(smb_ucs2_t *s)
-{
-	BOOL ret = False;
-	while (*s) {
-		smb_ucs2_t v = tolower_w(*s);
-		if (v != *s) {
-			*s = v;
-			ret = True;
-		}
-		s++;
-	}
-	return ret;
-}
-
-/*******************************************************************
- Convert a string to upper case.
- return True if any char is converted
-********************************************************************/
-BOOL strupper_w(smb_ucs2_t *s)
-{
-	BOOL ret = False;
-	while (*s) {
-		smb_ucs2_t v = toupper_w(*s);
-		if (v != *s) {
-			*s = v;
-			ret = True;
-		}
-		s++;
-	}
-	return ret;
-}
-
-/*******************************************************************
-case insensitive string comparison
-********************************************************************/
-int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
-{
-	while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
-	return (tolower_w(*a) - tolower_w(*b));
-}
-
-/*******************************************************************
-replace any occurence of oldc with newc in unicode string
-********************************************************************/
-
-void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
-{
-	for(;*s;s++) {
-		if(*s==oldc) *s=newc;
-	}
-}
-
-
-/*
-  The *_wa() functions take a combination of 7 bit ascii
-  and wide characters They are used so that you can use string
-  functions combining C string constants with ucs2 strings
-
-  The char* arguments must NOT be multibyte - to be completely sure
-  of this only pass string constants */
+	for (len = 0; (len+2 < n) && SVAL(src, len); len += 2) ;
 
-
-void pstrcpy_wa(smb_ucs2_t *dest, const char *src)
-{
-	int i;
-	for (i=0;i<PSTRING_LEN;i++) {
-		dest[i] = UCS2_CHAR(src[i]);
-		if (src[i] == 0) return;
+	if (len+2 <= n) {
+		len += 2;
 	}
-}
 
-int strcmp_wa(const smb_ucs2_t *a, const char *b)
-{
-	while (*b && *a == UCS2_CHAR(*b)) { a++; b++; }
-	return (*a - UCS2_CHAR(*b));
+	return len;
 }
 
-const smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
-{
-	while (*s != 0) {
-		int i;
-		for (i=0; p[i] && *s != UCS2_CHAR(p[i]); i++) 
-			;
-		if (p[i]) return s;
-		s++;
-	}
-	return NULL;
-}
 
 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 {
@@ -275,3 +151,14 @@ size_t ucs2_align(const void *base_ptr, const void *p, int flags)
 	return PTR_DIFF(p, base_ptr) & 1;
 }
 
+/*
+  compare two codepoints case insensitively
+*/
+int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
+{
+	if (c1 == c2 ||
+	    toupper_w(c1) == toupper_w(c2)) {
+		return 0;
+	}
+	return c1 - c2;
+}
author	Andrew Tridgell <tridge@samba.org>	2004-10-08 08:13:00 +0000
committer	Gerald (Jerry) Carter <jerry@samba.org>	2007-10-10 12:59:39 -0500
commit	7d32679e9683c81aca538f0267684332a28a286f (patch)
tree	445aecfad24e8dab1fe7a200904a712212fa7091 /source4/lib
parent	48f960ab47707ca24898834da4da440d1f7fb0d9 (diff)
download	samba-7d32679e9683c81aca538f0267684332a28a286f.tar.gz samba-7d32679e9683c81aca538f0267684332a28a286f.tar.bz2 samba-7d32679e9683c81aca538f0267684332a28a286f.zip