summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Bartlett <abartlet@samba.org>2011-03-18 19:10:23 +1100
committerAndrew Bartlett <abartlet@samba.org>2011-03-23 08:21:54 +0100
commit41051fd3d3ac7450771518aa12b660867ed7e819 (patch)
tree59c94244c7437d95e53a3227d18214f14a542d10
parent89fd1cb767c966d5ba60323bdb04a6baf973e4a3 (diff)
downloadsamba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.gz
samba-41051fd3d3ac7450771518aa12b660867ed7e819.tar.bz2
samba-41051fd3d3ac7450771518aa12b660867ed7e819.zip
lib/util: Merge basic string length and comparison functions
These functions now use the codepoints for more accurate string handling and now form common code. Andrew Bartlett Autobuild-User: Andrew Bartlett <abartlet@samba.org> Autobuild-Date: Wed Mar 23 08:21:54 CET 2011 on sn-devel-104
-rw-r--r--lib/util/charset/charset.h2
-rw-r--r--lib/util/charset/tests/charset.c16
-rw-r--r--lib/util/charset/util_str.c416
-rw-r--r--lib/util/charset/util_unistr.c334
-rw-r--r--lib/util/charset/wscript_build2
-rw-r--r--source3/Makefile.in2
-rw-r--r--source3/lib/util_str.c292
7 files changed, 428 insertions, 636 deletions
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 474d77e54e..943bfa4695 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -128,7 +128,7 @@ size_t strlen_m_term_null(const char *s);
size_t strlen_m(const char *s);
char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
void string_replace_m(char *s, char oldc, char newc);
-bool strcsequal_m(const char *s1,const char *s2);
+bool strcsequal(const char *s1,const char *s2);
bool strequal_m(const char *s1, const char *s2);
int strncasecmp_m(const char *s1, const char *s2, size_t n);
bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c
index 72fd11b128..351b91c7b7 100644
--- a/lib/util/charset/tests/charset.c
+++ b/lib/util/charset/tests/charset.c
@@ -69,14 +69,14 @@ static bool test_strequal_m(struct torture_context *tctx)
return true;
}
-static bool test_strcsequal_m(struct torture_context *tctx)
+static bool test_strcsequal(struct torture_context *tctx)
{
- torture_assert(tctx, !strcsequal_m("foo", "bar"), "different strings");
- torture_assert(tctx, strcsequal_m("foo", "foo"), "same case strings");
- torture_assert(tctx, !strcsequal_m("foo", "Foo"), "different case strings");
- torture_assert(tctx, !strcsequal_m(NULL, "Foo"), "one NULL");
- torture_assert(tctx, !strcsequal_m("foo", NULL), "other NULL");
- torture_assert(tctx, strcsequal_m(NULL, NULL), "both NULL");
+ torture_assert(tctx, !strcsequal("foo", "bar"), "different strings");
+ torture_assert(tctx, strcsequal("foo", "foo"), "same case strings");
+ torture_assert(tctx, !strcsequal("foo", "Foo"), "different case strings");
+ torture_assert(tctx, !strcsequal(NULL, "Foo"), "one NULL");
+ torture_assert(tctx, !strcsequal("foo", NULL), "other NULL");
+ torture_assert(tctx, strcsequal(NULL, NULL), "both NULL");
return true;
}
@@ -253,7 +253,7 @@ struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx)
torture_suite_add_simple_test(suite, "codepoint_cmpi", test_codepoint_cmpi);
torture_suite_add_simple_test(suite, "strcasecmp_m", test_strcasecmp_m);
torture_suite_add_simple_test(suite, "strequal_m", test_strequal_m);
- torture_suite_add_simple_test(suite, "strcsequal_m", test_strcsequal_m);
+ torture_suite_add_simple_test(suite, "strcsequal", test_strcsequal);
torture_suite_add_simple_test(suite, "string_replace_m", test_string_replace_m);
torture_suite_add_simple_test(suite, "strncasecmp_m", test_strncasecmp_m);
torture_suite_add_simple_test(suite, "next_token", test_next_token);
diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
new file mode 100644
index 0000000000..597b031675
--- /dev/null
+++ b/lib/util/charset/util_str.c
@@ -0,0 +1,416 @@
+/*
+ Unix SMB/CIFS implementation.
+ Samba utility functions
+ Copyright (C) Andrew Tridgell 1992-2001
+ Copyright (C) Simo Sorce 2001
+ Copyright (C) Andrew Bartlett 2011
+ Copyright (C) Jeremy Allison 1992-2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/locale.h"
+
+#ifdef strcasecmp
+#undef strcasecmp
+#endif
+
+/**
+ Case insensitive string compararison
+**/
+_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
+{
+ codepoint_t c1=0, c2=0;
+ size_t size1, size2;
+ struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+
+ /* handle null ptr comparisons to simplify the use in qsort */
+ if (s1 == s2) return 0;
+ if (s1 == NULL) return -1;
+ if (s2 == NULL) return 1;
+
+ while (*s1 && *s2) {
+ c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
+ c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
+
+ s1 += size1;
+ s2 += size2;
+
+ if (c1 == c2) {
+ continue;
+ }
+
+ if (c1 == INVALID_CODEPOINT ||
+ c2 == INVALID_CODEPOINT) {
+ /* what else can we do?? */
+ return strcasecmp(s1, s2);
+ }
+
+ if (toupper_m(c1) != toupper_m(c2)) {
+ return c1 - c2;
+ }
+ }
+
+ return *s1 - *s2;
+}
+
+/**
+ Case insensitive string compararison, length limited
+**/
+_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
+{
+ codepoint_t c1=0, c2=0;
+ size_t size1, size2;
+ struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
+
+ /* handle null ptr comparisons to simplify the use in qsort */
+ if (s1 == s2) return 0;
+ if (s1 == NULL) return -1;
+ if (s2 == NULL) return 1;
+
+ while (*s1 && *s2 && n) {
+ n--;
+
+ c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
+ c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
+
+ s1 += size1;
+ s2 += size2;
+
+ if (c1 == c2) {
+ continue;
+ }
+
+ if (c1 == INVALID_CODEPOINT ||
+ c2 == INVALID_CODEPOINT) {
+ /* what else can we do?? */
+ return strcasecmp(s1, s2);
+ }
+
+ if (toupper_m(c1) != toupper_m(c2)) {
+ return c1 - c2;
+ }
+ }
+
+ if (n == 0) {
+ return 0;
+ }
+
+ return *s1 - *s2;
+}
+
+/**
+ * Compare 2 strings.
+ *
+ * @note The comparison is case-insensitive.
+ **/
+_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
+{
+ return strcasecmp_m(s1,s2) == 0;
+}
+
+/**
+ Compare 2 strings (case sensitive).
+**/
+_PUBLIC_ bool strcsequal(const char *s1,const char *s2)
+{
+ if (s1 == s2)
+ return true;
+ if (!s1 || !s2)
+ return false;
+
+ return strcmp(s1,s2) == 0;
+}
+
+/**
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in src_charset encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
+{
+ size_t count = 0;
+ struct smb_iconv_convenience *ic = get_iconv_convenience();
+
+ if (!s) {
+ return 0;
+ }
+
+ while (*s && !(((uint8_t)*s) & 0x80)) {
+ s++;
+ count++;
+ }
+
+ if (!*s) {
+ return count;
+ }
+
+ while (*s) {
+ size_t c_size;
+ codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
+ s += c_size;
+
+ switch (dst_charset) {
+ case CH_UTF16LE:
+ case CH_UTF16BE:
+ case CH_UTF16MUNGED:
+ if (c < 0x10000) {
+ /* Unicode char fits into 16 bits. */
+ count += 1;
+ } else {
+ /* Double-width unicode char - 32 bits. */
+ count += 2;
+ }
+ break;
+ case CH_UTF8:
+ /*
+ * this only checks ranges, and does not
+ * check for invalid codepoints
+ */
+ if (c < 0x80) {
+ count += 1;
+ } else if (c < 0x800) {
+ count += 2;
+ } else if (c < 0x1000) {
+ count += 3;
+ } else {
+ count += 4;
+ }
+ break;
+ default:
+ /*
+ * non-unicode encoding:
+ * assume that each codepoint fits into
+ * one unit in the destination encoding.
+ */
+ count += 1;
+ }
+ }
+
+ return count;
+}
+
+_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
+ const charset_t dst_charset)
+{
+ if (!s) {
+ return 0;
+ }
+ return strlen_m_ext(s, src_charset, dst_charset) + 1;
+}
+
+/**
+ * Calculate the number of 16-bit units that would be needed to convert
+ * the input string which is expected to be in CH_UNIX encoding to UTF16.
+ *
+ * This will be the same as the number of bytes in a string for single
+ * byte strings, but will be different for multibyte.
+ */
+_PUBLIC_ size_t strlen_m(const char *s)
+{
+ return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
+}
+
+/**
+ Work out the number of multibyte chars in a string, including the NULL
+ terminator.
+**/
+_PUBLIC_ size_t strlen_m_term(const char *s)
+{
+ if (!s) {
+ return 0;
+ }
+
+ return strlen_m(s) + 1;
+}
+
+/*
+ * Weird helper routine for the winreg pipe: If nothing is around, return 0,
+ * if a string is there, include the terminator.
+ */
+
+_PUBLIC_ size_t strlen_m_term_null(const char *s)
+{
+ size_t len;
+ if (!s) {
+ return 0;
+ }
+ len = strlen_m(s);
+ if (len == 0) {
+ return 0;
+ }
+
+ return len+1;
+}
+
+/**
+ Strchr and strrchr_m are a bit complex on general multi-byte strings.
+**/
+_PUBLIC_ char *strchr_m(const char *src, char c)
+{
+ const char *s;
+ struct smb_iconv_convenience *ic = get_iconv_convenience();
+ if (src == NULL) {
+ return NULL;
+ }
+ /* characters below 0x3F are guaranteed to not appear in
+ non-initial position in multi-byte charsets */
+ if ((c & 0xC0) == 0) {
+ return strchr(src, c);
+ }
+
+ /* this is quite a common operation, so we want it to be
+ fast. We optimise for the ascii case, knowing that all our
+ supported multi-byte character sets are ascii-compatible
+ (ie. they match for the first 128 chars) */
+
+ for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
+ if (*s == c)
+ return (char *)s;
+ }
+
+ if (!*s)
+ return NULL;
+
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ /* With compose characters we must restart from the beginning. JRA. */
+ s = src;
+#endif
+
+ while (*s) {
+ size_t size;
+ codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+ if (c2 == c) {
+ return discard_const_p(char, s);
+ }
+ s += size;
+ }
+
+ return NULL;
+}
+
+/**
+ * Multibyte-character version of strrchr
+ */
+_PUBLIC_ char *strrchr_m(const char *s, char c)
+{
+ struct smb_iconv_convenience *ic = get_iconv_convenience();
+ char *ret = NULL;
+
+ if (s == NULL) {
+ return NULL;
+ }
+
+ /* characters below 0x3F are guaranteed to not appear in
+ non-initial position in multi-byte charsets */
+ if ((c & 0xC0) == 0) {
+ return strrchr(s, c);
+ }
+
+ /* this is quite a common operation, so we want it to be
+ fast. We optimise for the ascii case, knowing that all our
+ supported multi-byte character sets are ascii-compatible
+ (ie. they match for the first 128 chars). Also, in Samba
+ we only search for ascii characters in 'c' and that
+ in all mb character sets with a compound character
+ containing c, if 'c' is not a match at position
+ p, then p[-1] > 0x7f. JRA. */
+
+ {
+ size_t len = strlen(s);
+ const char *cp = s;
+ bool got_mb = false;
+
+ if (len == 0)
+ return NULL;
+ cp += (len - 1);
+ do {
+ if (c == *cp) {
+ /* Could be a match. Part of a multibyte ? */
+ if ((cp > s) &&
+ (((unsigned char)cp[-1]) & 0x80)) {
+ /* Yep - go slow :-( */
+ got_mb = true;
+ break;
+ }
+ /* No - we have a match ! */
+ return (char *)cp;
+ }
+ } while (cp-- != s);
+ if (!got_mb)
+ return NULL;
+ }
+
+ while (*s) {
+ size_t size;
+ codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
+ if (c2 == c) {
+ ret = discard_const_p(char, s);
+ }
+ s += size;
+ }
+
+ return ret;
+}
+
+/**
+ return True if any (multi-byte) character is lower case
+*/
+_PUBLIC_ bool strhaslower(const char *string)
+{
+ struct smb_iconv_convenience *ic = get_iconv_convenience();
+ while (*string) {
+ size_t c_size;
+ codepoint_t s;
+ codepoint_t t;
+
+ s = next_codepoint_convenience(ic, string, &c_size);
+ string += c_size;
+
+ t = toupper_m(s);
+
+ if (s != t) {
+ return true; /* that means it has lower case chars */
+ }
+ }
+
+ return false;
+}
+
+/**
+ return True if any (multi-byte) character is upper case
+*/
+_PUBLIC_ bool strhasupper(const char *string)
+{
+ struct smb_iconv_convenience *ic = get_iconv_convenience();
+ while (*string) {
+ size_t c_size;
+ codepoint_t s;
+ codepoint_t t;
+
+ s = next_codepoint_convenience(ic, string, &c_size);
+ string += c_size;
+
+ t = tolower_m(s);
+
+ if (s != t) {
+ return true; /* that means it has upper case chars */
+ }
+ }
+
+ return false;
+}
+
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index b6bfb29e7d..ad2ba687f4 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -22,45 +22,6 @@
#include "system/locale.h"
/**
- Case insensitive string compararison
-**/
-_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
-{
- codepoint_t c1=0, c2=0;
- size_t size1, size2;
- struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
- /* handle null ptr comparisons to simplify the use in qsort */
- if (s1 == s2) return 0;
- if (s1 == NULL) return -1;
- if (s2 == NULL) return 1;
-
- while (*s1 && *s2) {
- c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
- c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
- s1 += size1;
- s2 += size2;
-
- if (c1 == c2) {
- continue;
- }
-
- if (c1 == INVALID_CODEPOINT ||
- c2 == INVALID_CODEPOINT) {
- /* what else can we do?? */
- return strcasecmp(s1, s2);
- }
-
- if (toupper_m(c1) != toupper_m(c2)) {
- return c1 - c2;
- }
- }
-
- return *s1 - *s2;
-}
-
-/**
* Get the next token from a string, return False if none found.
* Handles double-quotes.
*
@@ -106,74 +67,6 @@ _PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bu
return true;
}
-/**
- Case insensitive string compararison, length limited
-**/
-_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
-{
- codepoint_t c1=0, c2=0;
- size_t size1, size2;
- struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
- /* handle null ptr comparisons to simplify the use in qsort */
- if (s1 == s2) return 0;
- if (s1 == NULL) return -1;
- if (s2 == NULL) return 1;
-
- while (*s1 && *s2 && n) {
- n--;
-
- c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
- c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
-
- s1 += size1;
- s2 += size2;
-
- if (c1 == c2) {
- continue;
- }
-
- if (c1 == INVALID_CODEPOINT ||
- c2 == INVALID_CODEPOINT) {
- /* what else can we do?? */
- return strcasecmp(s1, s2);
- }
-
- if (toupper_m(c1) != toupper_m(c2)) {
- return c1 - c2;
- }
- }
-
- if (n == 0) {
- return 0;
- }
-
- return *s1 - *s2;
-}
-
-/**
- * Compare 2 strings.
- *
- * @note The comparison is case-insensitive.
- **/
-_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
-{
- return strcasecmp_m(s1,s2) == 0;
-}
-
-/**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
-{
- if (s1 == s2)
- return true;
- if (!s1 || !s2)
- return false;
-
- return strcmp(s1,s2) == 0;
-}
-
/**
String replace.
@@ -239,231 +132,6 @@ _PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_
}
/**
- * Calculate the number of units (8 or 16-bit, depending on the
- * destination charset), that would be needed to convert the input
- * string which is expected to be in in src_charset encoding to the
- * destination charset (which should be a unicode charset).
- */
-_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
-{
- size_t count = 0;
- struct smb_iconv_convenience *ic = get_iconv_convenience();
-
- if (!s) {
- return 0;
- }
-
- while (*s && !(((uint8_t)*s) & 0x80)) {
- s++;
- count++;
- }
-
- if (!*s) {
- return count;
- }
-
- while (*s) {
- size_t c_size;
- codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
- s += c_size;
-
- switch (dst_charset) {
- case CH_UTF16LE:
- case CH_UTF16BE:
- case CH_UTF16MUNGED:
- if (c < 0x10000) {
- count += 1;
- } else {
- count += 2;
- }
- break;
- case CH_UTF8:
- /*
- * this only checks ranges, and does not
- * check for invalid codepoints
- */
- if (c < 0x80) {
- count += 1;
- } else if (c < 0x800) {
- count += 2;
- } else if (c < 0x1000) {
- count += 3;
- } else {
- count += 4;
- }
- break;
- default:
- /*
- * non-unicode encoding:
- * assume that each codepoint fits into
- * one unit in the destination encoding.
- */
- count += 1;
- }
- }
-
- return count;
-}
-
-_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
- const charset_t dst_charset)
-{
- if (!s) {
- return 0;
- }
- return strlen_m_ext(s, src_charset, dst_charset) + 1;
-}
-
-/**
- * Calculate the number of 16-bit units that would be needed to convert
- * the input string which is expected to be in CH_UNIX encoding to UTF16.
- *
- * This will be the same as the number of bytes in a string for single
- * byte strings, but will be different for multibyte.
- */
-_PUBLIC_ size_t strlen_m(const char *s)
-{
- return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
-}
-
-/**
- Work out the number of multibyte chars in a string, including the NULL
- terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
- if (!s) {
- return 0;
- }
-
- return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
- size_t len;
- if (!s) {
- return 0;
- }
- len = strlen_m(s);
- if (len == 0) {
- return 0;
- }
-
- return len+1;
-}
-
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings.
-**/
-_PUBLIC_ char *strchr_m(const char *s, char c)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- if (s == NULL) {
- return NULL;
- }
- /* characters below 0x3F are guaranteed to not appear in
- non-initial position in multi-byte charsets */
- if ((c & 0xC0) == 0) {
- return strchr(s, c);
- }
-
- while (*s) {
- size_t size;
- codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
- if (c2 == c) {
- return discard_const_p(char, s);
- }
- s += size;
- }
-
- return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- char *ret = NULL;
-
- if (s == NULL) {
- return NULL;
- }
-
- /* characters below 0x3F are guaranteed to not appear in
- non-initial position in multi-byte charsets */
- if ((c & 0xC0) == 0) {
- return strrchr(s, c);
- }
-
- while (*s) {
- size_t size;
- codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
- if (c2 == c) {
- ret = discard_const_p(char, s);
- }
- s += size;
- }
-
- return ret;
-}
-
-/**
- return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- while (*string) {
- size_t c_size;
- codepoint_t s;
- codepoint_t t;
-
- s = next_codepoint_convenience(ic, string, &c_size);
- string += c_size;
-
- t = toupper_m(s);
-
- if (s != t) {
- return true; /* that means it has lower case chars */
- }
- }
-
- return false;
-}
-
-/**
- return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- while (*string) {
- size_t c_size;
- codepoint_t s;
- codepoint_t t;
-
- s = next_codepoint_convenience(ic, string, &c_size);
- string += c_size;
-
- t = tolower_m(s);
-
- if (s != t) {
- return true; /* that means it has upper case chars */
- }
- }
-
- return false;
-}
-
-/**
Convert a string to lower case, allocated with talloc
**/
_PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
@@ -517,7 +185,7 @@ _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
size_t size=0;
char *dest;
struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
-
+
if (!src) {
return NULL;
}
diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build
index ab7cfc412d..a245ef1b0c 100644
--- a/lib/util/charset/wscript_build
+++ b/lib/util/charset/wscript_build
@@ -13,6 +13,6 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER',
public_deps='iconv replace talloc')
bld.SAMBA_SUBSYSTEM('CODEPOINTS',
- source='codepoints.c',
+ source='codepoints.c util_str.c',
deps='DYNCONFIG ICONV_WRAPPER'
)
diff --git a/source3/Makefile.in b/source3/Makefile.in
index 01245c1c12..2bc65c7285 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \
lib/wins_srv.o \
lib/util_str.o lib/util_sid.o \
- lib/util_unistr.o ../lib/util/charset/codepoints.o lib/util_file.o \
+ lib/util_unistr.o ../lib/util/charset/codepoints.o ../lib/util/charset/util_str.o lib/util_file.o \
lib/util.o lib/util_names.o \
lib/util_sock.o lib/sock_exec.o lib/util_sec.o \
lib/substitute.o lib/dbwrap_util.o \
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 84b280005e..1996174261 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -209,19 +209,6 @@ bool strnequal(const char *s1,const char *s2,size_t n)
}
/**
- Compare 2 strings (case sensitive).
-**/
-_PUBLIC_ bool strcsequal(const char *s1,const char *s2)
-{
- if (s1 == s2)
- return true;
- if (!s1 || !s2)
- return false;
-
- return strcmp(s1,s2) == 0;
-}
-
-/**
Do a case-insensitive, whitespace-ignoring string compare.
**/
@@ -445,54 +432,6 @@ bool trim_char(char *s,char cfront,char cback)
}
/**
- return True if any (multi-byte) character is upper case
-*/
-_PUBLIC_ bool strhasupper(const char *string)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- while (*string) {
- size_t c_size;
- codepoint_t s;
- codepoint_t t;
-
- s = next_codepoint_convenience(ic, string, &c_size);
- string += c_size;
-
- t = tolower_m(s);
-
- if (s != t) {
- return true; /* that means it has upper case chars */
- }
- }
-
- return false;
-}
-
-/**
- return True if any (multi-byte) character is lower case
-*/
-_PUBLIC_ bool strhaslower(const char *string)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- while (*string) {
- size_t c_size;
- codepoint_t s;
- codepoint_t t;
-
- s = next_codepoint_convenience(ic, string, &c_size);
- string += c_size;
-
- t = toupper_m(s);
-
- if (s != t) {
- return true; /* that means it has lower case chars */
- }
- }
-
- return false;
-}
-
-/**
Safe string copy into a known length string. maxlength does not
include the terminating zero.
**/
@@ -1103,115 +1042,6 @@ char *string_truncate(char *s, unsigned int length)
return s;
}
-/**
- Strchr and strrchr_m are a bit complex on general multi-byte strings.
-**/
-_PUBLIC_ char *strchr_m(const char *src, char c)
-{
- const char *s;
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- if (src == NULL) {
- return NULL;
- }
- /* characters below 0x3F are guaranteed to not appear in
- non-initial position in multi-byte charsets */
- if ((c & 0xC0) == 0) {
- return strchr(src, c);
- }
-
- /* this is quite a common operation, so we want it to be
- fast. We optimise for the ascii case, knowing that all our
- supported multi-byte character sets are ascii-compatible
- (ie. they match for the first 128 chars) */
-
- for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
- if (*s == c)
- return (char *)s;
- }
-
- if (!*s)
- return NULL;
-
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
- /* With compose characters we must restart from the beginning. JRA. */
- s = src;
-#endif
-
- while (*s) {
- size_t size;
- codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
- if (c2 == c) {
- return discard_const_p(char, s);
- }
- s += size;
- }
-
- return NULL;
-}
-
-/**
- * Multibyte-character version of strrchr
- */
-_PUBLIC_ char *strrchr_m(const char *s, char c)
-{
- struct smb_iconv_convenience *ic = get_iconv_convenience();
- char *ret = NULL;
-
- if (s == NULL) {
- return NULL;
- }
-
- /* characters below 0x3F are guaranteed to not appear in
- non-initial position in multi-byte charsets */
- if ((c & 0xC0) == 0) {
- return strrchr(s, c);
- }
-
- /* this is quite a common operation, so we want it to be
- fast. We optimise for the ascii case, knowing that all our
- supported multi-byte character sets are ascii-compatible
- (ie. they match for the first 128 chars). Also, in Samba
- we only search for ascii characters in 'c' and that
- in all mb character sets with a compound character
- containing c, if 'c' is not a match at position
- p, then p[-1] > 0x7f. JRA. */
-
- {
- size_t len = strlen(s);
- const char *cp = s;
- bool got_mb = false;
-
- if (len == 0)
- return NULL;
- cp += (len - 1);
- do {
- if (c == *cp) {
- /* Could be a match. Part of a multibyte ? */
- if ((cp > s) &&
- (((unsigned char)cp[-1]) & 0x80)) {
- /* Yep - go slow :-( */
- got_mb = true;
- break;
- }
- /* No - we have a match ! */
- return (char *)cp;
- }
- } while (cp-- != s);
- if (!got_mb)
- return NULL;
- }
-
- while (*s) {
- size_t size;
- codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
- if (c2 == c) {
- ret = discard_const_p(char, s);
- }
- s += size;
- }
-
- return ret;
-}
/***********************************************************************
Return the equivalent of doing strrchr 'n' times - always going
@@ -1397,128 +1227,6 @@ void strupper_m(char *s)
}
/**
- * Calculate the number of units (8 or 16-bit, depending on the
- * destination charset), that would be needed to convert the input
- * string which is expected to be in in src_charset encoding to the
- * destination charset (which should be a unicode charset).
- */
-_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
-{
- size_t count = 0;
- struct smb_iconv_convenience *ic = get_iconv_convenience();
-
- if (!s) {
- return 0;
- }
-
- while (*s && !(((uint8_t)*s) & 0x80)) {
- s++;
- count++;
- }
-
- if (!*s) {
- return count;
- }
-
- while (*s) {
- size_t c_size;
- codepoint_t c = next_codepoint_convenience_ext(ic, s, src_charset, &c_size);
- s += c_size;
-
- switch (dst_charset) {
- case CH_UTF16LE:
- case CH_UTF16BE:
- case CH_UTF16MUNGED:
- if (c < 0x10000) {
- /* Unicode char fits into 16 bits. */
- count += 1;
- } else {
- /* Double-width unicode char - 32 bits. */
- count += 2;
- }
- break;
- case CH_UTF8:
- /*
- * this only checks ranges, and does not
- * check for invalid codepoints
- */
- if (c < 0x80) {
- count += 1;
- } else if (c < 0x800) {
- count += 2;
- } else if (c < 0x1000) {
- count += 3;
- } else {
- count += 4;
- }
- break;
- default:
- /*
- * non-unicode encoding:
- * assume that each codepoint fits into
- * one unit in the destination encoding.
- */
- count += 1;
- }
- }
-
- return count;
-}
-
-_PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
- const charset_t dst_charset)
-{
- if (!s) {
- return 0;
- }
- return strlen_m_ext(s, src_charset, dst_charset) + 1;
-}
-
-/**
- * Calculate the number of 16-bit units that would be needed to convert
- * the input string which is expected to be in CH_UNIX encoding to UTF16.
- *
- * This will be the same as the number of bytes in a string for single
- * byte strings, but will be different for multibyte.
- */
-_PUBLIC_ size_t strlen_m(const char *s)
-{
- return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
-}
-
-/**
- Work out the number of multibyte chars in a string, including the NULL
- terminator.
-**/
-_PUBLIC_ size_t strlen_m_term(const char *s)
-{
- if (!s) {
- return 0;
- }
-
- return strlen_m(s) + 1;
-}
-
-/*
- * Weird helper routine for the winreg pipe: If nothing is around, return 0,
- * if a string is there, include the terminator.
- */
-
-_PUBLIC_ size_t strlen_m_term_null(const char *s)
-{
- size_t len;
- if (!s) {
- return 0;
- }
- len = strlen_m(s);
- if (len == 0) {
- return 0;
- }
-
- return len+1;
-}
-
-/**
Just a typesafety wrapper for snprintf into a fstring.
**/