diff options
author | Andrew Bartlett <abartlet@samba.org> | 2011-03-18 19:10:23 +1100 |
---|---|---|
committer | Andrew Bartlett <abartlet@samba.org> | 2011-03-23 08:21:54 +0100 |
commit | 41051fd3d3ac7450771518aa12b660867ed7e819 (patch) | |
tree | 59c94244c7437d95e53a3227d18214f14a542d10 /lib/util/charset/util_unistr.c | |
parent | 89fd1cb767c966d5ba60323bdb04a6baf973e4a3 (diff) | |
download | samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.gz samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.bz2 samba-41051fd3d3ac7450771518aa12b660867ed7e819.zip |
lib/util: Merge basic string length and comparison functions
These functions now use the codepoints for more accurate string
handling and now form common code.
Andrew Bartlett
Autobuild-User: Andrew Bartlett <abartlet@samba.org>
Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104
Diffstat (limited to 'lib/util/charset/util_unistr.c')
-rw-r--r-- | lib/util/charset/util_unistr.c | 334 |
1 files changed, 1 insertions, 333 deletions
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index b6bfb29e7d..ad2ba687f4 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -22,45 +22,6 @@ #include "system/locale.h" /** - Case insensitive string compararison -**/ -_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2) -{ - codepoint_t c1=0, c2=0; - size_t size1, size2; - struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience(); - - /* handle null ptr comparisons to simplify the use in qsort */ - if (s1 == s2) return 0; - if (s1 == NULL) return -1; - if (s2 == NULL) return 1; - - while (*s1 && *s2) { - c1 = next_codepoint_convenience(iconv_convenience, s1, &size1); - c2 = next_codepoint_convenience(iconv_convenience, s2, &size2); - - s1 += size1; - s2 += size2; - - if (c1 == c2) { - continue; - } - - if (c1 == INVALID_CODEPOINT || - c2 == INVALID_CODEPOINT) { - /* what else can we do?? */ - return strcasecmp(s1, s2); - } - - if (toupper_m(c1) != toupper_m(c2)) { - return c1 - c2; - } - } - - return *s1 - *s2; -} - -/** * Get the next token from a string, return False if none found. * Handles double-quotes. * @@ -106,74 +67,6 @@ _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bu return true; } -/** - Case insensitive string compararison, length limited -**/ -_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n) -{ - codepoint_t c1=0, c2=0; - size_t size1, size2; - struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience(); - - /* handle null ptr comparisons to simplify the use in qsort */ - if (s1 == s2) return 0; - if (s1 == NULL) return -1; - if (s2 == NULL) return 1; - - while (*s1 && *s2 && n) { - n--; - - c1 = next_codepoint_convenience(iconv_convenience, s1, &size1); - c2 = next_codepoint_convenience(iconv_convenience, s2, &size2); - - s1 += size1; - s2 += size2; - - if (c1 == c2) { - continue; - } - - if (c1 == INVALID_CODEPOINT || - c2 == INVALID_CODEPOINT) { - /* what else can we do?? */ - return strcasecmp(s1, s2); - } - - if (toupper_m(c1) != toupper_m(c2)) { - return c1 - c2; - } - } - - if (n == 0) { - return 0; - } - - return *s1 - *s2; -} - -/** - * Compare 2 strings. - * - * @note The comparison is case-insensitive. - **/ -_PUBLIC_ bool strequal_m(const char *s1, const char *s2) -{ - return strcasecmp_m(s1,s2) == 0; -} - -/** - Compare 2 strings (case sensitive). -**/ -_PUBLIC_ bool strcsequal_m(const char *s1,const char *s2) -{ - if (s1 == s2) - return true; - if (!s1 || !s2) - return false; - - return strcmp(s1,s2) == 0; -} - /** String replace. @@ -239,231 +132,6 @@ _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_ } /** - * Calculate the number of units (8 or 16-bit, depending on the - * destination charset), that would be needed to convert the input - * string which is expected to be in in src_charset encoding to the - * destination charset (which should be a unicode charset). - */ -_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset) -{ - size_t count = 0; - struct smb_iconv_convenience *ic = get_iconv_convenience(); - - if (!s) { - return 0; - } - - while (*s && !(((uint8_t)*s) & 0x80)) { - s++; - count++; - } - - if (!*s) { - return count; - } - - while (*s) { - size_t c_size; - codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size); - s += c_size; - - switch (dst_charset) { - case CH_UTF16LE: - case CH_UTF16BE: - case CH_UTF16MUNGED: - if (c < 0x10000) { - count += 1; - } else { - count += 2; - } - break; - case CH_UTF8: - /* - * this only checks ranges, and does not - * check for invalid codepoints - */ - if (c < 0x80) { - count += 1; - } else if (c < 0x800) { - count += 2; - } else if (c < 0x1000) { - count += 3; - } else { - count += 4; - } - break; - default: - /* - * non-unicode encoding: - * assume that each codepoint fits into - * one unit in the destination encoding. - */ - count += 1; - } - } - - return count; -} - -_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset, - const charset_t dst_charset) -{ - if (!s) { - return 0; - } - return strlen_m_ext(s, src_charset, dst_charset) + 1; -} - -/** - * Calculate the number of 16-bit units that would be needed to convert - * the input string which is expected to be in CH_UNIX encoding to UTF16. - * - * This will be the same as the number of bytes in a string for single - * byte strings, but will be different for multibyte. - */ -_PUBLIC_ size_t strlen_m(const char *s) -{ - return strlen_m_ext(s, CH_UNIX, CH_UTF16LE); -} - -/** - Work out the number of multibyte chars in a string, including the NULL - terminator. -**/ -_PUBLIC_ size_t strlen_m_term(const char *s) -{ - if (!s) { - return 0; - } - - return strlen_m(s) + 1; -} - -/* - * Weird helper routine for the winreg pipe: If nothing is around, return 0, - * if a string is there, include the terminator. - */ - -_PUBLIC_ size_t strlen_m_term_null(const char *s) -{ - size_t len; - if (!s) { - return 0; - } - len = strlen_m(s); - if (len == 0) { - return 0; - } - - return len+1; -} - -/** - Strchr and strrchr_m are a bit complex on general multi-byte strings. -**/ -_PUBLIC_ char *strchr_m(const char *s, char c) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - if (s == NULL) { - return NULL; - } - /* characters below 0x3F are guaranteed to not appear in - non-initial position in multi-byte charsets */ - if ((c & 0xC0) == 0) { - return strchr(s, c); - } - - while (*s) { - size_t size; - codepoint_t c2 = next_codepoint_convenience(ic, s, &size); - if (c2 == c) { - return discard_const_p(char, s); - } - s += size; - } - - return NULL; -} - -/** - * Multibyte-character version of strrchr - */ -_PUBLIC_ char *strrchr_m(const char *s, char c) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - char *ret = NULL; - - if (s == NULL) { - return NULL; - } - - /* characters below 0x3F are guaranteed to not appear in - non-initial position in multi-byte charsets */ - if ((c & 0xC0) == 0) { - return strrchr(s, c); - } - - while (*s) { - size_t size; - codepoint_t c2 = next_codepoint_convenience(ic, s, &size); - if (c2 == c) { - ret = discard_const_p(char, s); - } - s += size; - } - - return ret; -} - -/** - return True if any (multi-byte) character is lower case -*/ -_PUBLIC_ bool strhaslower(const char *string) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - while (*string) { - size_t c_size; - codepoint_t s; - codepoint_t t; - - s = next_codepoint_convenience(ic, string, &c_size); - string += c_size; - - t = toupper_m(s); - - if (s != t) { - return true; /* that means it has lower case chars */ - } - } - - return false; -} - -/** - return True if any (multi-byte) character is upper case -*/ -_PUBLIC_ bool strhasupper(const char *string) -{ - struct smb_iconv_convenience *ic = get_iconv_convenience(); - while (*string) { - size_t c_size; - codepoint_t s; - codepoint_t t; - - s = next_codepoint_convenience(ic, string, &c_size); - string += c_size; - - t = tolower_m(s); - - if (s != t) { - return true; /* that means it has upper case chars */ - } - } - - return false; -} - -/** Convert a string to lower case, allocated with talloc **/ _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src) @@ -517,7 +185,7 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n) size_t size=0; char *dest; struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience(); - + if (!src) { return NULL; } |