From d8231592bc4497fbec0a9ced20cd8cc84782bb9d Mon Sep 17 00:00:00 2001 From: Tim Potter Date: Mon, 15 Sep 2003 05:02:12 +0000 Subject: Merge from Samba 3.0: >Fix for MacOS/X which uses STUPID BROKEN UNICODE COMPOSE CHARACTERS ! >(rant off :-). Inspired by work from Benjamin Riefenstahl . >Also add MacOSX/Darwin configure fixes. >Jerry - can we put this in 3.0 release ? :-). >Jeremy. (This used to be commit 67acde75d3569b612f87646ff0740f8020e8fdcf) --- source3/lib/charcnv.c | 79 +++++++++++++++++++++++++++++++++----------------- source3/lib/util_str.c | 14 +++++++-- 2 files changed, 65 insertions(+), 28 deletions(-) diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 5f3cf64a68..dafc88fb77 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -176,6 +176,14 @@ static size_t convert_string_internal(charset_t from, charset_t to, descriptor = conv_handles[from][to]; + if (srclen == (size_t)-1) { + if (from == CH_UCS2) { + srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2; + } else { + srclen = strlen((const char *)src)+1; + } + } + if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { if (!conv_silent) DEBUG(0,("convert_string_internal: Conversion not supported.\n")); @@ -248,31 +256,40 @@ size_t convert_string(charset_t from, charset_t to, void const *src, size_t srclen, void *dest, size_t destlen) { + /* + * NB. We deliberately don't do a strlen here is srclen == -1. + * This is very expensive over millions of calls and is taken + * care of in the slow path in convert_string_internal. JRA. + */ + if (srclen == 0) return 0; if (from != CH_UCS2 && to != CH_UCS2) { const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; size_t retval = 0; /* If all characters are ascii, fast path here. */ - while (srclen && destlen) { + while (slen && dlen) { if ((lastp = *p) <= 0x7f) { *q++ = *p++; - if (srclen != (size_t)-1) { - srclen--; + if (slen != (size_t)-1) { + slen--; } - destlen--; + dlen--; retval++; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen(p)+1; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; @@ -280,25 +297,28 @@ size_t convert_string(charset_t from, charset_t to, const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; size_t retval = 0; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; /* If all characters are ascii, fast path here. */ - while ((srclen >= 2) && destlen) { - if ((lastp = *p) <= 0x7f && p[1] == 0) { + while ((slen >= 2) && dlen) { + if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { *q++ = *p; - if (srclen != (size_t)-1) { - srclen -= 2; + if (slen != (size_t)-1) { + slen -= 2; } p += 2; - destlen--; + dlen--; retval++; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen_w((const void *)p)+2; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; @@ -306,29 +326,36 @@ size_t convert_string(charset_t from, charset_t to, const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; size_t retval = 0; + size_t slen = srclen; + size_t dlen = destlen; unsigned char lastp; /* If all characters are ascii, fast path here. */ - while (srclen && (destlen >= 2)) { + while (slen && (dlen >= 2)) { if ((lastp = *p) <= 0x7F) { *q++ = *p++; *q++ = '\0'; - if (srclen != (size_t)-1) { - srclen--; + if (slen != (size_t)-1) { + slen--; } - destlen -= 2; + dlen -= 2; retval += 2; if (!lastp) break; } else { - if (srclen == (size_t)-1) { - srclen = strlen(p)+1; - } - return retval + convert_string_internal(from, to, p, srclen, q, destlen); +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + return retval + convert_string_internal(from, to, p, slen, q, dlen); +#endif } } return retval; } + +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + general_case: +#endif return convert_string_internal(from, to, src, srclen, dest, destlen); } @@ -542,7 +569,7 @@ char *strdup_upper(const char *s) strupper_w(buffer); - size = convert_string(CH_UCS2, CH_UNIX, buffer, sizeof(buffer), out_buffer, sizeof(out_buffer)); + size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer)); if (size == -1) { return NULL; } diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 82b312e241..15ac1639a9 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -382,6 +382,10 @@ void string_replace(pstring s,char oldc,char newc) return; /* Slow (mb) path. */ +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + /* With compose characters we must restart from the beginning. JRA. */ + p = s; +#endif push_ucs2(NULL, tmpbuf, p, sizeof(tmpbuf), STR_TERMINATE); string_replace_w(tmpbuf, UCS2_CHAR(oldc), UCS2_CHAR(newc)); pull_ucs2(NULL, p, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE); @@ -1175,18 +1179,19 @@ char *string_truncate(char *s, unsigned int length) We convert via ucs2 for now. **/ -char *strchr_m(const char *s, char c) +char *strchr_m(const char *src, char c) { wpstring ws; pstring s2; smb_ucs2_t *p; + const char *s; /* this is quite a common operation, so we want it to be fast. We optimise for the ascii case, knowing that all our supported multi-byte character sets are ascii-compatible (ie. they match for the first 128 chars) */ - while (*s && (((unsigned char)s[0]) & 0x80)) { + for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { if (*s == c) return s; } @@ -1194,6 +1199,11 @@ char *strchr_m(const char *s, char c) if (!*s) return NULL; +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + /* With compose characters we must restart from the beginning. JRA. */ + s = src; +#endif + push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); p = strchr_w(ws, UCS2_CHAR(c)); if (!p) -- cgit