From 41051fd3d3ac7450771518aa12b660867ed7e819 Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Fri, 18 Mar 2011 19:10:23 +1100 Subject: lib/util: Merge basic string length and comparison functions These functions now use the codepoints for more accurate string handling and now form common code. Andrew Bartlett Autobuild-User: Andrew Bartlett Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104 --- source3/Makefile.in | 2 +- source3/lib/util_str.c | 292 ------------------------------------------------- 2 files changed, 1 insertion(+), 293 deletions(-) (limited to 'source3') diff --git a/source3/Makefile.in b/source3/Makefile.in index 01245c1c12..2bc65c7285 100644 --- a/source3/Makefile.in +++ b/source3/Makefile.in @@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \ lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \ lib/wins_srv.o \ lib/util_str.o lib/util_sid.o \ - lib/util_unistr.o ../lib/util/charset/codepoints.o lib/util_file.o \ + lib/util_unistr.o ../lib/util/charset/codepoints.o ../lib/util/charset/util_str.o lib/util_file.o \ lib/util.o lib/util_names.o \ lib/util_sock.o lib/sock_exec.o lib/util_sec.o \ lib/substitute.o lib/dbwrap_util.o \ diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 84b280005e..1996174261 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -208,19 +208,6 @@ bool strnequal(const char *s1,const char *s2,size_t n) return(StrnCaseCmp(s1,s2,n)==0); } -/** - Compare 2 strings (case sensitive). -**/ -_PUBLIC_ bool strcsequal(const char *s1,const char *s2) -{ - if (s1 == s2) - return true; - if (!s1 || !s2) - return false; - - return strcmp(s1,s2) == 0; -} - /** Do a case-insensitive, whitespace-ignoring string compare. **/ @@ -444,54 +431,6 @@ bool trim_char(char *s,char cfront,char cback) return ret; } -/** - return True if any (multi-byte) character is upper case -*/ -_PUBLIC_ bool strhasupper(const char *string) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - while (*string) { - size_t c_size; - codepoint_t s; - codepoint_t t; - - s = next_codepoint_convenience(ic, string, &c_size); - string += c_size; - - t = tolower_m(s); - - if (s != t) { - return true; /* that means it has upper case chars */ - } - } - - return false; -} - -/** - return True if any (multi-byte) character is lower case -*/ -_PUBLIC_ bool strhaslower(const char *string) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - while (*string) { - size_t c_size; - codepoint_t s; - codepoint_t t; - - s = next_codepoint_convenience(ic, string, &c_size); - string += c_size; - - t = toupper_m(s); - - if (s != t) { - return true; /* that means it has lower case chars */ - } - } - - return false; -} - /** Safe string copy into a known length string. maxlength does not include the terminating zero. @@ -1103,115 +1042,6 @@ char *string_truncate(char *s, unsigned int length) return s; } -/** - Strchr and strrchr_m are a bit complex on general multi-byte strings. -**/ -_PUBLIC_ char *strchr_m(const char *src, char c) -{ - const char *s; - struct smb_iconv_convenience *ic = get_iconv_convenience(); - if (src == NULL) { - return NULL; - } - /* characters below 0x3F are guaranteed to not appear in - non-initial position in multi-byte charsets */ - if ((c & 0xC0) == 0) { - return strchr(src, c); - } - - /* this is quite a common operation, so we want it to be - fast. We optimise for the ascii case, knowing that all our - supported multi-byte character sets are ascii-compatible - (ie. they match for the first 128 chars) */ - - for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { - if (*s == c) - return (char *)s; - } - - if (!*s) - return NULL; - -#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS - /* With compose characters we must restart from the beginning. JRA. */ - s = src; -#endif - - while (*s) { - size_t size; - codepoint_t c2 = next_codepoint_convenience(ic, s, &size); - if (c2 == c) { - return discard_const_p(char, s); - } - s += size; - } - - return NULL; -} - -/** - * Multibyte-character version of strrchr - */ -_PUBLIC_ char *strrchr_m(const char *s, char c) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - char *ret = NULL; - - if (s == NULL) { - return NULL; - } - - /* characters below 0x3F are guaranteed to not appear in - non-initial position in multi-byte charsets */ - if ((c & 0xC0) == 0) { - return strrchr(s, c); - } - - /* this is quite a common operation, so we want it to be - fast. We optimise for the ascii case, knowing that all our - supported multi-byte character sets are ascii-compatible - (ie. they match for the first 128 chars). Also, in Samba - we only search for ascii characters in 'c' and that - in all mb character sets with a compound character - containing c, if 'c' is not a match at position - p, then p[-1] > 0x7f. JRA. */ - - { - size_t len = strlen(s); - const char *cp = s; - bool got_mb = false; - - if (len == 0) - return NULL; - cp += (len - 1); - do { - if (c == *cp) { - /* Could be a match. Part of a multibyte ? */ - if ((cp > s) && - (((unsigned char)cp[-1]) & 0x80)) { - /* Yep - go slow :-( */ - got_mb = true; - break; - } - /* No - we have a match ! */ - return (char *)cp; - } - } while (cp-- != s); - if (!got_mb) - return NULL; - } - - while (*s) { - size_t size; - codepoint_t c2 = next_codepoint_convenience(ic, s, &size); - if (c2 == c) { - ret = discard_const_p(char, s); - } - s += size; - } - - return ret; -} /*********************************************************************** Return the equivalent of doing strrchr 'n' times - always going @@ -1396,128 +1226,6 @@ void strupper_m(char *s) errno = errno_save; } -/** - * Calculate the number of units (8 or 16-bit, depending on the - * destination charset), that would be needed to convert the input - * string which is expected to be in in src_charset encoding to the - * destination charset (which should be a unicode charset). - */ -_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset) -{ - size_t count = 0; - struct smb_iconv_convenience *ic = get_iconv_convenience(); - - if (!s) { - return 0; - } - - while (*s && !(((uint8_t)*s) & 0x80)) { - s++; - count++; - } - - if (!*s) { - return count; - } - - while (*s) { - size_t c_size; - codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size); - s += c_size; - - switch (dst_charset) { - case CH_UTF16LE: - case CH_UTF16BE: - case CH_UTF16MUNGED: - if (c < 0x10000) { - /* Unicode char fits into 16 bits. */ - count += 1; - } else { - /* Double-width unicode char - 32 bits. */ - count += 2; - } - break; - case CH_UTF8: - /* - * this only checks ranges, and does not - * check for invalid codepoints - */ - if (c < 0x80) { - count += 1; - } else if (c < 0x800) { - count += 2; - } else if (c < 0x1000) { - count += 3; - } else { - count += 4; - } - break; - default: - /* - * non-unicode encoding: - * assume that each codepoint fits into - * one unit in the destination encoding. - */ - count += 1; - } - } - - return count; -} - -_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset, - const charset_t dst_charset) -{ - if (!s) { - return 0; - } - return strlen_m_ext(s, src_charset, dst_charset) + 1; -} - -/** - * Calculate the number of 16-bit units that would be needed to convert - * the input string which is expected to be in CH_UNIX encoding to UTF16. - * - * This will be the same as the number of bytes in a string for single - * byte strings, but will be different for multibyte. - */ -_PUBLIC_ size_t strlen_m(const char *s) -{ - return strlen_m_ext(s, CH_UNIX, CH_UTF16LE); -} - -/** - Work out the number of multibyte chars in a string, including the NULL - terminator. -**/ -_PUBLIC_ size_t strlen_m_term(const char *s) -{ - if (!s) { - return 0; - } - - return strlen_m(s) + 1; -} - -/* - * Weird helper routine for the winreg pipe: If nothing is around, return 0, - * if a string is there, include the terminator. - */ - -_PUBLIC_ size_t strlen_m_term_null(const char *s) -{ - size_t len; - if (!s) { - return 0; - } - len = strlen_m(s); - if (len == 0) { - return 0; - } - - return len+1; -} - /** Just a typesafety wrapper for snprintf into a fstring. **/ -- cgit