diff options
Diffstat (limited to 'source4/lib/util_unistr.c')
-rw-r--r-- | source4/lib/util_unistr.c | 225 |
1 files changed, 56 insertions, 169 deletions
diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c index 713f50708b..8b7504986f 100644 --- a/source4/lib/util_unistr.c +++ b/source4/lib/util_unistr.c @@ -21,16 +21,16 @@ #include "includes.h" -/* these 3 tables define the unicode case handling. They are loaded +/* these 2 tables define the unicode case handling. They are loaded at startup either via mmap() or read() from the lib directory */ -static smb_ucs2_t *upcase_table; -static smb_ucs2_t *lowcase_table; +static void *upcase_table; +static void *lowcase_table; /******************************************************************* load the case handling tables ********************************************************************/ -void load_case_tables(void) +static void load_case_tables(void) { static int initialised; int i; @@ -56,14 +56,10 @@ void load_case_tables(void) smb_panic("No memory for upcase tables"); } for (i=0;i<0x10000;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, i); - upcase_table[v] = i; + SSVAL(upcase_table, i*2, i); } for (i=0;i<256;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, UCS2_CHAR(i)); - upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i); + SSVAL(upcase_table, i*2, islower(i)?toupper(i):i); } } @@ -74,199 +70,79 @@ void load_case_tables(void) smb_panic("No memory for lowcase tables"); } for (i=0;i<0x10000;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, i); - lowcase_table[v] = i; + SSVAL(lowcase_table, i*2, i); } for (i=0;i<256;i++) { - smb_ucs2_t v; - SSVAL(&v, 0, UCS2_CHAR(i)); - lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i); + SSVAL(lowcase_table, i*2, isupper(i)?tolower(i):i); } } } /******************************************************************* - Convert a wchar to upper case. + Convert a codepoint_t to upper case. ********************************************************************/ -smb_ucs2_t toupper_w(smb_ucs2_t val) +codepoint_t toupper_w(codepoint_t val) { - return upcase_table[SVAL(&val,0)]; -} - -/******************************************************************* - Convert a wchar to lower case. -********************************************************************/ -static smb_ucs2_t tolower_w( smb_ucs2_t val ) -{ - return lowcase_table[SVAL(&val,0)]; - -} - -/******************************************************************* -determine if a character is lowercase -********************************************************************/ -BOOL islower_w(smb_ucs2_t c) -{ - return upcase_table[SVAL(&c,0)] != c; + if (val & 0xFFFF0000) { + return val; + } + if (val < 128) { + return toupper(val); + } + if (upcase_table == NULL) { + load_case_tables(); + } + return SVAL(upcase_table, val*2); } /******************************************************************* -determine if a character is uppercase + Convert a codepoint_t to lower case. ********************************************************************/ -BOOL isupper_w(smb_ucs2_t c) +codepoint_t tolower_w(codepoint_t val) { - return lowcase_table[SVAL(&c,0)] != c; + if (val & 0xFFFF0000) { + return val; + } + if (val < 128) { + return tolower(val); + } + if (lowcase_table == NULL) { + load_case_tables(); + } + return SVAL(lowcase_table, val*2); } - /******************************************************************* - Count the number of characters in a smb_ucs2_t string. +return the number of bytes occupied by a buffer in CH_UTF16 format +the result includes the null termination ********************************************************************/ -size_t strlen_w(const smb_ucs2_t *src) +size_t utf16_len(const void *buf) { size_t len; - for (len = 0; SVAL(src,0); len++, src++) ; + for (len = 0; SVAL(buf,len); len += 2) ; - return len; + return len + 2; } /******************************************************************* - Count up to max number of characters in a smb_ucs2_t string. +return the number of bytes occupied by a buffer in CH_UTF16 format +the result includes the null termination +limited by 'n' bytes ********************************************************************/ -size_t strnlen_w(const smb_ucs2_t *src, size_t max) +size_t utf16_len_n(const void *src, size_t n) { size_t len; - for (len = 0; (len < max) && SVAL(src, 0); len++, src++) ; - - return len; -} - -/******************************************************************* -wide strchr() -********************************************************************/ -smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) -{ - while (*s != 0) { - if (c == *s) return discard_const_p(smb_ucs2_t, s); - s++; - } - if (c == *s) return discard_const_p(smb_ucs2_t, s); - - return NULL; -} - -smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c) -{ - return strchr_w(s, UCS2_CHAR(c)); -} - -smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c) -{ - const smb_ucs2_t *p = s; - int len = strlen_w(s); - if (len == 0) return NULL; - p += (len - 1); - do { - if (c == *p) return discard_const_p(smb_ucs2_t, p); - } while (p-- != s); - return NULL; -} - -/******************************************************************* - Convert a string to lower case. - return True if any char is converted -********************************************************************/ -BOOL strlower_w(smb_ucs2_t *s) -{ - BOOL ret = False; - while (*s) { - smb_ucs2_t v = tolower_w(*s); - if (v != *s) { - *s = v; - ret = True; - } - s++; - } - return ret; -} - -/******************************************************************* - Convert a string to upper case. - return True if any char is converted -********************************************************************/ -BOOL strupper_w(smb_ucs2_t *s) -{ - BOOL ret = False; - while (*s) { - smb_ucs2_t v = toupper_w(*s); - if (v != *s) { - *s = v; - ret = True; - } - s++; - } - return ret; -} - -/******************************************************************* -case insensitive string comparison -********************************************************************/ -int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) -{ - while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; } - return (tolower_w(*a) - tolower_w(*b)); -} - -/******************************************************************* -replace any occurence of oldc with newc in unicode string -********************************************************************/ - -void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc) -{ - for(;*s;s++) { - if(*s==oldc) *s=newc; - } -} - - -/* - The *_wa() functions take a combination of 7 bit ascii - and wide characters They are used so that you can use string - functions combining C string constants with ucs2 strings - - The char* arguments must NOT be multibyte - to be completely sure - of this only pass string constants */ + for (len = 0; (len+2 < n) && SVAL(src, len); len += 2) ; - -void pstrcpy_wa(smb_ucs2_t *dest, const char *src) -{ - int i; - for (i=0;i<PSTRING_LEN;i++) { - dest[i] = UCS2_CHAR(src[i]); - if (src[i] == 0) return; + if (len+2 <= n) { + len += 2; } -} -int strcmp_wa(const smb_ucs2_t *a, const char *b) -{ - while (*b && *a == UCS2_CHAR(*b)) { a++; b++; } - return (*a - UCS2_CHAR(*b)); + return len; } -const smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p) -{ - while (*s != 0) { - int i; - for (i=0; p[i] && *s != UCS2_CHAR(p[i]); i++) - ; - if (p[i]) return s; - s++; - } - return NULL; -} size_t ucs2_align(const void *base_ptr, const void *p, int flags) { @@ -275,3 +151,14 @@ size_t ucs2_align(const void *base_ptr, const void *p, int flags) return PTR_DIFF(p, base_ptr) & 1; } +/* + compare two codepoints case insensitively +*/ +int codepoint_cmpi(codepoint_t c1, codepoint_t c2) +{ + if (c1 == c2 || + toupper_w(c1) == toupper_w(c2)) { + return 0; + } + return c1 - c2; +} |