summaryrefslogtreecommitdiff
path: root/source4/lib/util_str.c
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2004-10-08 08:13:00 +0000
committerGerald (Jerry) Carter <jerry@samba.org>2007-10-10 12:59:39 -0500
commit7d32679e9683c81aca538f0267684332a28a286f (patch)
tree445aecfad24e8dab1fe7a200904a712212fa7091 /source4/lib/util_str.c
parent48f960ab47707ca24898834da4da440d1f7fb0d9 (diff)
downloadsamba-7d32679e9683c81aca538f0267684332a28a286f.tar.gz
samba-7d32679e9683c81aca538f0267684332a28a286f.tar.bz2
samba-7d32679e9683c81aca538f0267684332a28a286f.zip
r2857: this commit gets rid of smb_ucs2_t, wpstring and fpstring, plus lots of associated functions.
The motivation for this change was to avoid having to convert to/from ucs2 strings for so many operations. Doing that was slow, used many static buffers, and was also incorrect as it didn't cope properly with unicode codepoints above 65536 (which could not be represented correctly as smb_ucs2_t chars) The two core functions that allowed this change are next_codepoint() and push_codepoint(). These functions allow you to correctly walk a arbitrary multi-byte string a character at a time without converting the whole string to ucs2. While doing this cleanup I also fixed several ucs2 string handling bugs. See the commit for details. The following code (which counts the number of occuraces of 'c' in a string) shows how to use the new interface: size_t count_chars(const char *s, char c) { size_t count = 0; while (*s) { size_t size; codepoint_t c2 = next_codepoint(s, &size); if (c2 == c) count++; s += size; } return count; } (This used to be commit 814881f0e50019196b3aa9fbe4aeadbb98172040)
Diffstat (limited to 'source4/lib/util_str.c')
-rw-r--r--source4/lib/util_str.c631
1 files changed, 150 insertions, 481 deletions
diff --git a/source4/lib/util_str.c b/source4/lib/util_str.c
index 0e58face16..c4c68a3dcd 100644
--- a/source4/lib/util_str.c
+++ b/source4/lib/util_str.c
@@ -74,54 +74,37 @@ BOOL next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
return(True);
}
-static uint16_t tmpbuf[sizeof(pstring)];
-
-
/**
- Case insensitive string compararison.
+ Case insensitive string compararison
**/
-static int StrCaseCmp_slow(const char *s1, const char *s2)
+int StrCaseCmp(const char *s1, const char *s2)
{
- smb_ucs2_t *u1 = NULL;
- smb_ucs2_t *u2;
- int ret;
-
- if (convert_string_talloc(NULL, CH_UNIX, CH_UTF16, s1, strlen(s1)+1, (void **)&u1) == -1 ||
- convert_string_talloc(u1, CH_UNIX, CH_UTF16, s2, strlen(s2)+1, (void **)&u2) == -1) {
- talloc_free(u1);
- /* fallback to a simple comparison */
- return strcasecmp(s1, s2);
- }
+ codepoint_t c1=0, c2=0;
+ size_t size1, size2;
- ret = strcasecmp_w(u1, u2);
+ while (*s1 && *s2) {
+ c1 = next_codepoint(s1, &size1);
+ c2 = next_codepoint(s2, &size2);
- talloc_free(u1);
+ s1 += size1;
+ s2 += size2;
- return ret;
-}
+ if (c1 == c2) {
+ continue;
+ }
-/**
- Case insensitive string compararison, accelerated version
-**/
-int StrCaseCmp(const char *s1, const char *s2)
-{
- while (*s1 && *s2 &&
- (*s1 & 0x80) == 0 &&
- (*s2 & 0x80) == 0) {
- char u1 = toupper(*s1);
- char u2 = toupper(*s2);
- if (u1 != u2) {
- return u1 - u2;
+ if (c1 == INVALID_CODEPOINT ||
+ c2 == INVALID_CODEPOINT) {
+ /* what else can we do?? */
+ return c1 - c2;
}
- s1++;
- s2++;
- }
- if (*s1 == 0 || *s2 == 0) {
- return *s1 - *s2;
+ if (toupper_w(c1) != toupper_w(c2)) {
+ return c1 - c2;
+ }
}
- return StrCaseCmp_slow(s1, s2);
+ return *s1 - *s2;
}
/**
@@ -136,27 +119,26 @@ BOOL strequal(const char *s1, const char *s2)
if (!s1 || !s2)
return(False);
- return(StrCaseCmp(s1,s2)==0);
+ return StrCaseCmp(s1,s2) == 0;
}
/**
Compare 2 strings (case sensitive).
**/
-
BOOL strcsequal(const char *s1,const char *s2)
{
- if (s1 == s2)
- return(True);
- if (!s1 || !s2)
- return(False);
-
- return(strcmp(s1,s2)==0);
+ if (s1 == s2)
+ return(True);
+ if (!s1 || !s2)
+ return(False);
+
+ return strcmp(s1,s2) == 0;
}
+
/**
Do a case-insensitive, whitespace-ignoring string compare.
**/
-
int strwicmp(const char *psz1, const char *psz2)
{
/* if BOTH strings are NULL, return TRUE, if ONE is NULL return */
@@ -187,20 +169,21 @@ int strwicmp(const char *psz1, const char *psz2)
String replace.
NOTE: oldc and newc must be 7 bit characters
**/
-
-void string_replace(char *s,char oldc,char newc)
+void string_replace(char *s, char oldc, char newc)
{
- if (strchr(s, oldc)) {
- push_ucs2(tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
- string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc));
- pull_ucs2(s, tmpbuf, strlen(s)+1, sizeof(tmpbuf), STR_TERMINATE);
+ while (*s) {
+ size_t size;
+ codepoint_t c = next_codepoint(s, &size);
+ if (c == oldc) {
+ *s = newc;
+ }
+ s += size;
}
}
/**
Trim the specified elements off the front and back of a string.
**/
-
BOOL trim_string(char *s,const char *front,const char *back)
{
BOOL ret = False;
@@ -238,60 +221,26 @@ BOOL trim_string(char *s,const char *front,const char *back)
}
/**
- Does a string have any uppercase chars in it?
-**/
-
-BOOL strhasupper(const char *s)
-{
- smb_ucs2_t *ptr;
- push_ucs2(tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
- for(ptr=tmpbuf;*ptr;ptr++)
- if(isupper_w(*ptr))
- return True;
- return(False);
-}
-
-/**
- Does a string have any lowercase chars in it?
-**/
-
-BOOL strhaslower(const char *s)
-{
- smb_ucs2_t *ptr;
- push_ucs2(tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE);
- for(ptr=tmpbuf;*ptr;ptr++)
- if(islower_w(*ptr))
- return True;
- return(False);
-}
-
-/**
Find the number of 'c' chars in a string
**/
-
-size_t count_chars(const char *s,char c)
+size_t count_chars(const char *s, char c)
{
- smb_ucs2_t *ptr;
- int count;
- smb_ucs2_t *alloc_tmpbuf = NULL;
+ size_t count = 0;
- if (push_ucs2_talloc(NULL, &alloc_tmpbuf, s) == (size_t)-1) {
- return 0;
+ while (*s) {
+ size_t size;
+ codepoint_t c2 = next_codepoint(s, &size);
+ if (c2 == c) count++;
+ s += size;
}
- for(count=0,ptr=alloc_tmpbuf;*ptr;ptr++)
- if(*ptr==UCS2_CHAR(c))
- count++;
-
- talloc_free(alloc_tmpbuf);
- return(count);
+ return count;
}
/**
Safe string copy into a known length string. maxlength does not
include the terminating zero.
**/
-
char *safe_strcpy(char *dest,const char *src, size_t maxlength)
{
size_t len;
@@ -334,7 +283,6 @@ char *safe_strcpy(char *dest,const char *src, size_t maxlength)
Safe string cat into a string. maxlength does not
include the terminating zero.
**/
-
char *safe_strcat(char *dest, const char *src, size_t maxlength)
{
size_t src_len, dest_len;
@@ -446,7 +394,6 @@ char *StrnCpy(char *dest,const char *src,size_t n)
valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
**/
-
size_t strhex_to_str(char *p, size_t len, const char *strhex)
{
size_t i;
@@ -493,10 +440,10 @@ DATA_BLOB strhex_to_data_blob(const char *strhex)
return ret_blob;
}
+
/**
* Routine to print a buffer as HEX digits, into an allocated string.
*/
-
void hex_encode(const unsigned char *buff_in, size_t len, char **out_hex_buffer)
{
int i;
@@ -512,7 +459,6 @@ void hex_encode(const unsigned char *buff_in, size_t len, char **out_hex_buffer)
/**
Check if a string is part of a list.
**/
-
BOOL in_list(const char *s, const char *list, BOOL casesensitive)
{
pstring tok;
@@ -681,36 +627,31 @@ const char *octal_string(int i)
/**
- Strchr and strrchr_m are very hard to do on general multi-byte strings.
- We convert via ucs2 for now.
+ Strchr and strrchr_m are a bit complex on general multi-byte strings.
**/
-
char *strchr_m(const char *s, char c)
{
- wpstring ws;
- pstring s2;
- smb_ucs2_t *p;
-
/* characters below 0x3F are guaranteed to not appear in
non-initial position in multi-byte charsets */
if ((c & 0xC0) == 0) {
return strchr(s, c);
}
- push_ucs2(ws, s, sizeof(ws), STR_TERMINATE);
- p = strchr_w(ws, UCS2_CHAR(c));
- if (!p)
- return NULL;
- *p = 0;
- pull_ucs2_pstring(s2, ws);
- return discard_const_p(char, s+strlen(s2));
+ while (*s) {
+ size_t size;
+ codepoint_t c2 = next_codepoint(s, &size);
+ if (c2 == c) {
+ return discard_const(s);
+ }
+ s += size;
+ }
+
+ return NULL;
}
char *strrchr_m(const char *s, char c)
{
- wpstring ws;
- pstring s2;
- smb_ucs2_t *p;
+ char *ret = NULL;
/* characters below 0x3F are guaranteed to not appear in
non-initial position in multi-byte charsets */
@@ -718,69 +659,99 @@ char *strrchr_m(const char *s, char c)
return strrchr(s, c);
}
- push_ucs2(ws, s, sizeof(ws), STR_TERMINATE);
- p = strrchr_w(ws, UCS2_CHAR(c));
- if (!p)
- return NULL;
- *p = 0;
- pull_ucs2_pstring(s2, ws);
- return discard_const_p(char, s+strlen(s2));
+ while (*s) {
+ size_t size;
+ codepoint_t c2 = next_codepoint(s, &size);
+ if (c2 == c) {
+ ret = discard_const(s);
+ }
+ s += size;
+ }
+
+ return ret;
}
/**
Convert a string to lower case, allocated with talloc
**/
-
char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
{
- size_t size;
- smb_ucs2_t *buffer;
+ size_t size=0;
char *dest;
- size = push_ucs2_talloc(ctx, &buffer, src);
- if (size == -1) {
+ /* this takes advantage of the fact that upper/lower can't
+ change the length of a character by more than 1 byte */
+ dest = talloc(ctx, 2*(strlen(src))+1);
+ if (dest == NULL) {
return NULL;
}
- strlower_w(buffer);
- size = pull_ucs2_talloc(ctx, &dest, buffer);
- talloc_free(buffer);
+ while (*src) {
+ size_t c_size;
+ codepoint_t c = next_codepoint(src, &c_size);
+ src += c_size;
+
+ c = tolower_w(c);
+
+ c_size = push_codepoint(dest+size, c);
+ if (c_size == -1) {
+ talloc_free(dest);
+ return NULL;
+ }
+ size += c_size;
+ }
+
+ dest[size] = 0;
+
return dest;
}
/**
Convert a string to UPPER case, allocated with talloc
**/
-
char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
{
- size_t size;
- smb_ucs2_t *buffer;
+ size_t size=0;
char *dest;
- size = push_ucs2_talloc(ctx, &buffer, src);
- if (size == -1) {
+ /* this takes advantage of the fact that upper/lower can't
+ change the length of a character by more than 1 byte */
+ dest = talloc(ctx, 2*(strlen(src))+1);
+ if (dest == NULL) {
return NULL;
}
- strupper_w(buffer);
- size = pull_ucs2_talloc(ctx, &dest, buffer);
- talloc_free(buffer);
+ while (*src) {
+ size_t c_size;
+ codepoint_t c = next_codepoint(src, &c_size);
+ src += c_size;
+
+ c = toupper_w(c);
+
+ c_size = push_codepoint(dest+size, c);
+ if (c_size == -1) {
+ talloc_free(dest);
+ return NULL;
+ }
+ size += c_size;
+ }
+
+ dest[size] = 0;
+
return dest;
}
/**
Convert a string to lower case.
**/
-
void strlower_m(char *s)
{
- char *lower;
+ char *d;
+
/* this is quite a common operation, so we want it to be
fast. We optimise for the ascii case, knowing that all our
supported multi-byte character sets are ascii-compatible
(ie. they match for the first 128 chars) */
-
while (*s && !(((uint8_t)s[0]) & 0x7F)) {
*s = tolower((uint8_t)*s);
s++;
@@ -789,27 +760,32 @@ void strlower_m(char *s)
if (!*s)
return;
- /* I assume that lowercased string takes the same number of bytes
- * as source string even in UTF-8 encoding. (VIV) */
- lower = strlower_talloc(NULL, s);
- if (lower) {
- safe_strcpy(s, lower, strlen(s));
+ d = s;
+
+ while (*s) {
+ size_t c_size, c_size2;
+ codepoint_t c = next_codepoint(s, &c_size);
+ c_size2 = push_codepoint(d, tolower_w(c));
+ if (c_size2 > c_size) {
+ smb_panic("codepoint expansion in strlower_m\n");
+ }
+ s += c_size;
+ d += c_size2;
}
- talloc_free(lower);
+ *d = 0;
}
/**
Convert a string to UPPER case.
**/
-
void strupper_m(char *s)
{
- char *upper;
+ char *d;
+
/* this is quite a common operation, so we want it to be
fast. We optimise for the ascii case, knowing that all our
supported multi-byte character sets are ascii-compatible
(ie. they match for the first 128 chars) */
-
while (*s && !(((uint8_t)s[0]) & 0x7F)) {
*s = toupper((uint8_t)*s);
s++;
@@ -818,13 +794,19 @@ void strupper_m(char *s)
if (!*s)
return;
- /* I assume that uppercased string takes the same number of bytes
- * as source string even in UTF-8 encoding. (VIV) */
- upper = strupper_talloc(NULL, s);
- if (upper) {
- safe_strcpy(s, upper, strlen(s));
+ d = s;
+
+ while (*s) {
+ size_t c_size, c_size2;
+ codepoint_t c = next_codepoint(s, &c_size);
+ c_size2 = push_codepoint(d, toupper_w(c));
+ if (c_size2 > c_size) {
+ smb_panic("codepoint expansion in strupper_m\n");
+ }
+ s += c_size;
+ d += c_size2;
}
- talloc_free(upper);
+ *d = 0;
}
/**
@@ -832,13 +814,9 @@ void strupper_m(char *s)
be the same as the number of bytes in a string for single byte strings,
but will be different for multibyte.
**/
-
size_t strlen_m(const char *s)
{
size_t count = 0;
- smb_ucs2_t *tmp;
-
- size_t len;
if (!s) {
return 0;
@@ -853,12 +831,18 @@ size_t strlen_m(const char *s)
return count;
}
- SMB_ASSERT(push_ucs2_talloc(NULL, &tmp, s) != -1);
-
- len = count + strlen_w(tmp);
- talloc_free(tmp);
+ while (*s) {
+ size_t c_size;
+ codepoint_t c = next_codepoint(s, &c_size);
+ if (c < 0x10000) {
+ count += 1;
+ } else {
+ count += 2;
+ }
+ s += c_size;
+ }
- return len;
+ return count;
}
/**
@@ -879,7 +863,6 @@ size_t strlen_m_term(const char *s)
Used in LDAP filters.
Caller must free.
**/
-
char *binary_string(char *buf, int len)
{
char *s;
@@ -898,21 +881,6 @@ char *binary_string(char *buf, int len)
return s;
}
-/**
- Just a typesafety wrapper for snprintf into a pstring.
-**/
-
- int pstr_sprintf(pstring s, const char *fmt, ...)
-{
- va_list ap;
- int ret;
-
- va_start(ap, fmt);
- ret = vsnprintf(s, PSTRING_LEN, fmt, ap);
- va_end(ap);
- return ret;
-}
-
#ifndef HAVE_STRNDUP
/**
Some platforms don't have strndup.
@@ -945,305 +913,6 @@ char *binary_string(char *buf, int len)
}
#endif
-/**
- List of Strings manipulation functions
-**/
-
-#define S_LIST_ABS 16 /* List Allocation Block Size */
-
-char **str_list_make(const char *string, const char *sep)
-{
- char **list, **rlist;
- const char *str;
- char *s;
- int num, lsize;
- pstring tok;
-
- if (!string || !*string)
- return NULL;
- s = strdup(string);
- if (!s) {
- DEBUG(0,("str_list_make: Unable to allocate memory"));
- return NULL;
- }
- if (!sep) sep = LIST_SEP;
-
- num = lsize = 0;
- list = NULL;
-
- str = s;
- while (next_token(&str, tok, sep, sizeof(tok))) {
- if (num == lsize) {
- lsize += S_LIST_ABS;
- rlist = (char **)Realloc(list, ((sizeof(char **)) * (lsize +1)));
- if (!rlist) {
- DEBUG(0,("str_list_make: Unable to allocate memory"));
- str_list_free(&list);
- SAFE_FREE(s);
- return NULL;
- } else
- list = rlist;
- memset (&list[num], 0, ((sizeof(char**)) * (S_LIST_ABS +1)));
- }
-
- list[num] = strdup(tok);
- if (!list[num]) {
- DEBUG(0,("str_list_make: Unable to allocate memory"));
- str_list_free(&list);
- SAFE_FREE(s);
- return NULL;
- }
-
- num++;
- }
-
- SAFE_FREE(s);
- return list;
-}
-
-BOOL str_list_copy(char ***dest, const char **src)
-{
- char **list, **rlist;
- int num, lsize;
-
- *dest = NULL;
- if (!src)
- return False;
-
- num = lsize = 0;
- list = NULL;
-
- while (src[num]) {
- if (num == lsize) {
- lsize += S_LIST_ABS;
- rlist = (char **)Realloc(list, ((sizeof(char **)) * (lsize +1)));
- if (!rlist) {
- DEBUG(0,("str_list_copy: Unable to re-allocate memory"));
- str_list_free(&list);
- return False;
- } else
- list = rlist;
- memset (&list[num], 0, ((sizeof(char **)) * (S_LIST_ABS +1)));
- }
-
- list[num] = strdup(src[num]);
- if (!list[num]) {
- DEBUG(0,("str_list_copy: Unable to allocate memory"));
- str_list_free(&list);
- return False;
- }
-
- num++;
- }
-
- *dest = list;
- return True;
-}
-
-/**
- * Return true if all the elements of the list match exactly.
- **/
-BOOL str_list_compare(char **list1, char **list2)
-{
- int num;
-
- if (!list1 || !list2)
- return (list1 == list2);
-
- for (num = 0; list1[num]; num++) {
- if (!list2[num])
- return False;
- if (!strcsequal(list1[num], list2[num]))
- return False;
- }
- if (list2[num])
- return False; /* if list2 has more elements than list1 fail */
-
- return True;
-}
-
-void str_list_free(char ***list)
-{
- char **tlist;
-
- if (!list || !*list)
- return;
- tlist = *list;
- for(; *tlist; tlist++)
- SAFE_FREE(*tlist);
- SAFE_FREE(*list);
-}
-
-BOOL str_list_substitute(char **list, const char *pattern, const char *insert)
-{
- char *p, *s, *t;
- ssize_t ls, lp, li, ld, i, d;
-
- if (!list)
- return False;
- if (!pattern)
- return False;
- if (!insert)
- return False;
-
- lp = (ssize_t)strlen(pattern);
- li = (ssize_t)strlen(insert);
- ld = li -lp;
-
- while (*list) {
- s = *list;
- ls = (ssize_t)strlen(s);
-
- while ((p = strstr(s, pattern))) {
- t = *list;
- d = p -t;
- if (ld) {
- t = (char *) malloc(ls +ld +1);
- if (!t) {
- DEBUG(0,("str_list_substitute: Unable to allocate memory"));
- return False;
- }
- memcpy(t, *list, d);
- memcpy(t +d +li, p +lp, ls -d -lp +1);
- SAFE_FREE(*list);
- *list = t;
- ls += ld;
- s = t +d +li;
- }
-
- for (i = 0; i < li; i++) {
- switch (insert[i]) {
- case '`':
- case '"':
- case '\'':
- case ';':
- case '$':
- case '%':
- case '\r':
- case '\n':
- t[d +i] = '_';
- break;
- default:
- t[d +i] = insert[i];
- }
- }
- }
-
- list++;
- }
-
- return True;
-}
-
-
-#define IPSTR_LIST_SEP ","
-
-/**
- * Add ip string representation to ipstr list. Used also
- * as part of @function ipstr_list_make
- *
- * @param ipstr_list pointer to string containing ip list;
- * MUST BE already allocated and IS reallocated if necessary
- * @param ipstr_size pointer to current size of ipstr_list (might be changed
- * as a result of reallocation)
- * @param ip IP address which is to be added to list
- * @return pointer to string appended with new ip and possibly
- * reallocated to new length
- **/
-
-char* ipstr_list_add(char** ipstr_list, const struct in_addr *ip)
-{
- char* new_ipstr = NULL;
-
- /* arguments checking */
- if (!ipstr_list || !ip) return NULL;
-
- /* attempt to convert ip to a string and append colon separator to it */
- if (*ipstr_list) {
- asprintf(&new_ipstr, "%s%s%s", *ipstr_list, IPSTR_LIST_SEP,inet_ntoa(*ip));
- SAFE_FREE(*ipstr_list);
- } else {
- asprintf(&new_ipstr, "%s", inet_ntoa(*ip));
- }
- *ipstr_list = new_ipstr;
- return *ipstr_list;
-}
-
-/**
- * Allocate and initialise an ipstr list using ip adresses
- * passed as arguments.
- *
- * @param ipstr_list pointer to string meant to be allocated and set
- * @param ip_list array of ip addresses to place in the list
- * @param ip_count number of addresses stored in ip_list
- * @return pointer to allocated ip string
- **/
-
-char* ipstr_list_make(char** ipstr_list, const struct in_addr* ip_list, int ip_count)
-{
- int i;
-
- /* arguments checking */
- if (!ip_list && !ipstr_list) return 0;
-
- *ipstr_list = NULL;
-
- /* process ip addresses given as arguments */
- for (i = 0; i < ip_count; i++)
- *ipstr_list = ipstr_list_add(ipstr_list, &ip_list[i]);
-
- return (*ipstr_list);
-}
-
-
-/**
- * Parse given ip string list into array of ip addresses
- * (as in_addr structures)
- *
- * @param ipstr ip string list to be parsed
- * @param ip_list pointer to array of ip addresses which is
- * allocated by this function and must be freed by caller
- * @return number of succesfully parsed addresses
- **/
-
-int ipstr_list_parse(const char* ipstr_list, struct in_addr** ip_list)
-{
- fstring token_str;
- int count;
-
- if (!ipstr_list || !ip_list) return 0;
-
- for (*ip_list = NULL, count = 0;
- next_token(&ipstr_list, token_str, IPSTR_LIST_SEP, FSTRING_LEN);
- count++) {
-
- struct in_addr addr;
-
- /* convert single token to ip address */
- if ( (addr.s_addr = inet_addr(token_str)) == INADDR_NONE )
- break;
-
- /* prepare place for another in_addr structure */
- *ip_list = Realloc(*ip_list, (count + 1) * sizeof(struct in_addr));
- if (!*ip_list) return -1;
-
- (*ip_list)[count] = addr;
- }
-
- return count;
-}
-
-
-/**
- * Safely free ip string list
- *
- * @param ipstr_list ip string list to be freed
- **/
-
-void ipstr_list_free(char* ipstr_list)
-{
- SAFE_FREE(ipstr_list);
-}
/**
Unescape a URL encoded string, in place.