lib/util: Merge basic string length and comparison functions

These functions now use the codepoints for more accurate string handling and now form common code. Andrew Bartlett Autobuild-User: Andrew Bartlett <abartlet@samba.org> Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104
author: Andrew Bartlett <abartlet@samba.org> 2011-03-18 19:10:23 +1100
committer: Andrew Bartlett <abartlet@samba.org> 2011-03-23 08:21:54 +0100
commit: 41051fd3d3ac7450771518aa12b660867ed7e819 (patch)
tree: 59c94244c7437d95e53a3227d18214f14a542d10 /lib/util/charset/util_unistr.c
parent: 89fd1cb767c966d5ba60323bdb04a6baf973e4a3 (diff)
download: samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.gz
samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.bz2
samba-41051fd3d3ac7450771518aa12b660867ed7e819.zip
1 files changed, 1 insertions, 333 deletions
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index b6bfb29e7d..ad2ba687f4 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -22,45 +22,6 @@
 #include "system/locale.h"
 
 /**
- Case insensitive string compararison
-**/
-_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
-{
-	codepoint_t c1=0, c2=0;
-	size_t size1, size2;
-	struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-	/* handle null ptr comparisons to simplify the use in qsort */
-	if (s1 == s2) return 0;
-	if (s1 == NULL) return -1;
-	if (s2 == NULL) return 1;
-
-	while (*s1 && *s2) {
-		c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-		c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-		s1 += size1;
-		s2 += size2;
-
-		if (c1 == c2) {
-			continue;
-		}
-
-		if (c1 == INVALID_CODEPOINT ||
-		    c2 == INVALID_CODEPOINT) {
-			/* what else can we do?? */
-			return strcasecmp(s1, s2);
-		}
-
-		if (toupper_m(c1) != toupper_m(c2)) {
-			return c1 - c2;
-		}
-	}
-
-	return *s1 - *s2;
-}
-
-/**
  * Get the next token from a string, return False if none found.
  * Handles double-quotes.
  * 
@@ -106,74 +67,6 @@ _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bu
 	return true;
 }
 
-/**
- Case insensitive string compararison, length limited
-**/
-_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
-{
-	codepoint_t c1=0, c2=0;
-	size_t size1, size2;
-	struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
-	/* handle null ptr comparisons to simplify the use in qsort */
-	if (s1 == s2) return 0;
-	if (s1 == NULL) return -1;
-	if (s2 == NULL) return 1;
-
-	while (*s1 && *s2 && n) {
-		n--;
-
-		c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
-		c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
-		s1 += size1;
-		s2 += size2;
-
-		if (c1 == c2) {
-			continue;
-		}
-
-		if (c1 == INVALID_CODEPOINT ||
-		    c2 == INVALID_CODEPOINT) {
-			/* what else can we do?? */
-			return strcasecmp(s1, s2);
-		}
-
-		if (toupper_m(c1) != toupper_m(c2)) {
-			return c1 - c2;
-		}
-	}
-
-	if (n == 0) {
-		return 0;
-	}
-
-	return *s1 - *s2;
-}
-
-/**
- * Compare 2 strings.
- *
- * @note The comparison is case-insensitive.
- **/
-_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
-{
-	return strcasecmp_m(s1,s2) == 0;
-}
-
-/**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
-{
-	if (s1 == s2)
-		return true;
-	if (!s1 || !s2)
-		return false;
-	
-	return strcmp(s1,s2) == 0;
-}
-
 
 /**
  String replace.
@@ -239,231 +132,6 @@ _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_
 }
 
 /**
- * Calculate the number of units (8 or 16-bit, depending on the
- * destination charset), that would be needed to convert the input
- * string which is expected to be in in src_charset encoding to the
- * destination charset (which should be a unicode charset).
- */
-_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
-{
-	size_t count = 0;
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-
-	if (!s) {
-		return 0;
-	}
-
-	while (*s && !(((uint8_t)*s) & 0x80)) {
-		s++;
-		count++;
-	}
-
-	if (!*s) {
-		return count;
-	}
-
-	while (*s) {
-		size_t c_size;
-		codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
-		s += c_size;
-
-		switch (dst_charset) {
-		case CH_UTF16LE:
-		case CH_UTF16BE:
-		case CH_UTF16MUNGED:
-			if (c < 0x10000) {
-				count += 1;
-			} else {
-				count += 2;
-			}
-			break;
-		case CH_UTF8:
-			/*
-			 * this only checks ranges, and does not
-			 * check for invalid codepoints
-			 */
-			if (c < 0x80) {
-				count += 1;
-			} else if (c < 0x800) {
-				count += 2;
-			} else if (c < 0x1000) {
-				count += 3;
-			} else {
-				count += 4;
-			}
-			break;
-		default:
-			/*
-			 * non-unicode encoding:
-			 * assume that each codepoint fits into
-			 * one unit in the destination encoding.
-			 */
-			count += 1;
-		}
-	}
-
-	return count;
-}
-
-_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
-				  const charset_t dst_charset)
-{
-	if (!s) {
-		return 0;
-	}
-	return strlen_m_ext(s, src_charset, dst_charset) + 1;
-}
-
-/**
- * Calculate the number of 16-bit units that would be needed to convert
- * the input string which is expected to be in CH_UNIX encoding to UTF16.
- *
- * This will be the same as the number of bytes in a string for single
- * byte strings, but will be different for multibyte.
- */
-_PUBLIC_ size_t strlen_m(const char *s)
-{
-	return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
-}
-
-/**
-   Work out the number of multibyte chars in a string, including the NULL
-   terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
-	if (!s) {
-		return 0;
-	}
-
-	return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
-	size_t len;
-	if (!s) {
-		return 0;
-	}
-	len = strlen_m(s);
-	if (len == 0) {
-		return 0;
-	}
-
-	return len+1;
-}
-
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings. 
-**/
-_PUBLIC_ char *strchr_m(const char *s, char c)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	if (s == NULL) {
-		return NULL;
-	}
-	/* characters below 0x3F are guaranteed to not appear in
-	   non-initial position in multi-byte charsets */
-	if ((c & 0xC0) == 0) {
-		return strchr(s, c);
-	}
-
-	while (*s) {
-		size_t size;
-		codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-		if (c2 == c) {
-			return discard_const_p(char, s);
-		}
-		s += size;
-	}
-
-	return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	char *ret = NULL;
-
-	if (s == NULL) {
-		return NULL;
-	}
-
-	/* characters below 0x3F are guaranteed to not appear in
-	   non-initial position in multi-byte charsets */
-	if ((c & 0xC0) == 0) {
-		return strrchr(s, c);
-	}
-
-	while (*s) {
-		size_t size;
-		codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-		if (c2 == c) {
-			ret = discard_const_p(char, s);
-		}
-		s += size;
-	}
-
-	return ret;
-}
-
-/**
-  return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	while (*string) {
-		size_t c_size;
-		codepoint_t s;
-		codepoint_t t;
-
-		s = next_codepoint_convenience(ic, string, &c_size);
-		string += c_size;
-
-		t = toupper_m(s);
-
-		if (s != t) {
-			return true; /* that means it has lower case chars */
-		}
-	}
-
-	return false;
-} 
-
-/**
-  return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	while (*string) {
-		size_t c_size;
-		codepoint_t s;
-		codepoint_t t;
-
-		s = next_codepoint_convenience(ic, string, &c_size);
-		string += c_size;
-
-		t = tolower_m(s);
-
-		if (s != t) {
-			return true; /* that means it has upper case chars */
-		}
-	}
-
-	return false;
-} 
-
-/**
  Convert a string to lower case, allocated with talloc
 **/
 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
@@ -517,7 +185,7 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
 	size_t size=0;
 	char *dest;
 	struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-	
+
 	if (!src) {
 		return NULL;
 	}
author	Andrew Bartlett <abartlet@samba.org>	2011-03-18 19:10:23 +1100
committer	Andrew Bartlett <abartlet@samba.org>	2011-03-23 08:21:54 +0100
commit	41051fd3d3ac7450771518aa12b660867ed7e819 (patch)
tree	59c94244c7437d95e53a3227d18214f14a542d10 /lib/util/charset/util_unistr.c
parent	89fd1cb767c966d5ba60323bdb04a6baf973e4a3 (diff)
download	samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.gz samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.bz2 samba-41051fd3d3ac7450771518aa12b660867ed7e819.zip