lib/util: Merge basic string length and comparison functions

These functions now use the codepoints for more accurate string handling and now form common code. Andrew Bartlett Autobuild-User: Andrew Bartlett <abartlet@samba.org> Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104
author: Andrew Bartlett <abartlet@samba.org> 2011-03-18 19:10:23 +1100
committer: Andrew Bartlett <abartlet@samba.org> 2011-03-23 08:21:54 +0100
commit: 41051fd3d3ac7450771518aa12b660867ed7e819 (patch)
tree: 59c94244c7437d95e53a3227d18214f14a542d10 /source3
parent: 89fd1cb767c966d5ba60323bdb04a6baf973e4a3 (diff)
download: samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.gz
samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.bz2
samba-41051fd3d3ac7450771518aa12b660867ed7e819.zip
2 files changed, 1 insertions, 293 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in
index 01245c1c12..2bc65c7285 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
 	  lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \
 	  lib/wins_srv.o \
 	  lib/util_str.o lib/util_sid.o \
-	  lib/util_unistr.o ../lib/util/charset/codepoints.o lib/util_file.o \
+	  lib/util_unistr.o ../lib/util/charset/codepoints.o ../lib/util/charset/util_str.o lib/util_file.o \
 	  lib/util.o lib/util_names.o \
 	  lib/util_sock.o lib/sock_exec.o lib/util_sec.o \
 	  lib/substitute.o lib/dbwrap_util.o \
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 84b280005e..1996174261 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -209,19 +209,6 @@ bool strnequal(const char *s1,const char *s2,size_t n)
 }
 
 /**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal(const char *s1,const char *s2)
-{
-	if (s1 == s2)
-		return true;
-	if (!s1 || !s2)
-		return false;
-
-	return strcmp(s1,s2) == 0;
-}
-
-/**
 Do a case-insensitive, whitespace-ignoring string compare.
 **/
 
@@ -445,54 +432,6 @@ bool trim_char(char *s,char cfront,char cback)
 }
 
 /**
-  return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	while (*string) {
-		size_t c_size;
-		codepoint_t s;
-		codepoint_t t;
-
-		s = next_codepoint_convenience(ic, string, &c_size);
-		string += c_size;
-
-		t = tolower_m(s);
-
-		if (s != t) {
-			return true; /* that means it has upper case chars */
-		}
-	}
-
-	return false;
-}
-
-/**
-  return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	while (*string) {
-		size_t c_size;
-		codepoint_t s;
-		codepoint_t t;
-
-		s = next_codepoint_convenience(ic, string, &c_size);
-		string += c_size;
-
-		t = toupper_m(s);
-
-		if (s != t) {
-			return true; /* that means it has lower case chars */
-		}
-	}
-
-	return false;
-}
-
-/**
  Safe string copy into a known length string. maxlength does not
  include the terminating zero.
 **/
@@ -1103,115 +1042,6 @@ char *string_truncate(char *s, unsigned int length)
 	return s;
 }
 
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings.
-**/
-_PUBLIC_ char *strchr_m(const char *src, char c)
-{
-	const char *s;
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	if (src == NULL) {
-		return NULL;
-	}
-	/* characters below 0x3F are guaranteed to not appear in
-	   non-initial position in multi-byte charsets */
-	if ((c & 0xC0) == 0) {
-		return strchr(src, c);
-	}
-
-	/* this is quite a common operation, so we want it to be
-	   fast. We optimise for the ascii case, knowing that all our
-	   supported multi-byte character sets are ascii-compatible
-	   (ie. they match for the first 128 chars) */
-
-	for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
-		if (*s == c)
-			return (char *)s;
-	}
-
-	if (!*s)
-		return NULL;
-
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
-	/* With compose characters we must restart from the beginning. JRA. */
-	s = src;
-#endif
-
-	while (*s) {
-		size_t size;
-		codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-		if (c2 == c) {
-			return discard_const_p(char, s);
-		}
-		s += size;
-	}
-
-	return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-	char *ret = NULL;
-
-	if (s == NULL) {
-		return NULL;
-	}
-
-	/* characters below 0x3F are guaranteed to not appear in
-	   non-initial position in multi-byte charsets */
-	if ((c & 0xC0) == 0) {
-		return strrchr(s, c);
-	}
-
-	/* this is quite a common operation, so we want it to be
-	   fast. We optimise for the ascii case, knowing that all our
-	   supported multi-byte character sets are ascii-compatible
-	   (ie. they match for the first 128 chars). Also, in Samba
-	   we only search for ascii characters in 'c' and that
-	   in all mb character sets with a compound character
-	   containing c, if 'c' is not a match at position
-	   p, then p[-1] > 0x7f. JRA. */
-
-	{
-		size_t len = strlen(s);
-		const char *cp = s;
-		bool got_mb = false;
-
-		if (len == 0)
-			return NULL;
-		cp += (len - 1);
-		do {
-			if (c == *cp) {
-				/* Could be a match. Part of a multibyte ? */
-				if ((cp > s) &&
-					(((unsigned char)cp[-1]) & 0x80)) {
-					/* Yep - go slow :-( */
-					got_mb = true;
-					break;
-				}
-				/* No - we have a match ! */
-				return (char *)cp;
-			}
-		} while (cp-- != s);
-		if (!got_mb)
-			return NULL;
-	}
-
-	while (*s) {
-		size_t size;
-		codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
-		if (c2 == c) {
-			ret = discard_const_p(char, s);
-		}
-		s += size;
-	}
-
-	return ret;
-}
 
 /***********************************************************************
  Return the equivalent of doing strrchr 'n' times - always going
@@ -1397,128 +1227,6 @@ void strupper_m(char *s)
 }
 
 /**
- * Calculate the number of units (8 or 16-bit, depending on the
- * destination charset), that would be needed to convert the input
- * string which is expected to be in in src_charset encoding to the
- * destination charset (which should be a unicode charset).
- */
-_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
-{
-	size_t count = 0;
-	struct smb_iconv_convenience *ic = get_iconv_convenience();
-
-	if (!s) {
-		return 0;
-	}
-
-	while (*s && !(((uint8_t)*s) & 0x80)) {
-		s++;
-		count++;
-	}
-
-	if (!*s) {
-		return count;
-	}
-
-	while (*s) {
-		size_t c_size;
-		codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
-		s += c_size;
-
-		switch (dst_charset) {
-		case CH_UTF16LE:
-		case CH_UTF16BE:
-		case CH_UTF16MUNGED:
-			if (c < 0x10000) {
-				/* Unicode char fits into 16 bits. */
-				count += 1;
-			} else {
-				/* Double-width unicode char - 32 bits. */
-				count += 2;
-			}
-			break;
-		case CH_UTF8:
-			/*
-			 * this only checks ranges, and does not
-			 * check for invalid codepoints
-			 */
-			if (c < 0x80) {
-				count += 1;
-			} else if (c < 0x800) {
-				count += 2;
-			} else if (c < 0x1000) {
-				count += 3;
-			} else {
-				count += 4;
-			}
-			break;
-		default:
-			/*
-			 * non-unicode encoding:
-			 * assume that each codepoint fits into
-			 * one unit in the destination encoding.
-			 */
-			count += 1;
-		}
-	}
-
-	return count;
-}
-
-_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
-				  const charset_t dst_charset)
-{
-	if (!s) {
-		return 0;
-	}
-	return strlen_m_ext(s, src_charset, dst_charset) + 1;
-}
-
-/**
- * Calculate the number of 16-bit units that would be needed to convert
- * the input string which is expected to be in CH_UNIX encoding to UTF16.
- *
- * This will be the same as the number of bytes in a string for single
- * byte strings, but will be different for multibyte.
- */
-_PUBLIC_ size_t strlen_m(const char *s)
-{
-	return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
-}
-
-/**
-   Work out the number of multibyte chars in a string, including the NULL
-   terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
-	if (!s) {
-		return 0;
-	}
-
-	return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
-	size_t len;
-	if (!s) {
-		return 0;
-	}
-	len = strlen_m(s);
-	if (len == 0) {
-		return 0;
-	}
-
-	return len+1;
-}
-
-/**
  Just a typesafety wrapper for snprintf into a fstring.
 **/
author	Andrew Bartlett <abartlet@samba.org>	2011-03-18 19:10:23 +1100
committer	Andrew Bartlett <abartlet@samba.org>	2011-03-23 08:21:54 +0100
commit	41051fd3d3ac7450771518aa12b660867ed7e819 (patch)
tree	59c94244c7437d95e53a3227d18214f14a542d10 /source3
parent	89fd1cb767c966d5ba60323bdb04a6baf973e4a3 (diff)
download	samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.gz samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.bz2 samba-41051fd3d3ac7450771518aa12b660867ed7e819.zip