summaryrefslogtreecommitdiff
path: root/source4/lib/util_unistr.c
diff options
context:
space:
mode:
Diffstat (limited to 'source4/lib/util_unistr.c')
-rw-r--r--source4/lib/util_unistr.c225
1 files changed, 56 insertions, 169 deletions
diff --git a/source4/lib/util_unistr.c b/source4/lib/util_unistr.c
index 713f50708b..8b7504986f 100644
--- a/source4/lib/util_unistr.c
+++ b/source4/lib/util_unistr.c
@@ -21,16 +21,16 @@
#include "includes.h"
-/* these 3 tables define the unicode case handling. They are loaded
+/* these 2 tables define the unicode case handling. They are loaded
at startup either via mmap() or read() from the lib directory */
-static smb_ucs2_t *upcase_table;
-static smb_ucs2_t *lowcase_table;
+static void *upcase_table;
+static void *lowcase_table;
/*******************************************************************
load the case handling tables
********************************************************************/
-void load_case_tables(void)
+static void load_case_tables(void)
{
static int initialised;
int i;
@@ -56,14 +56,10 @@ void load_case_tables(void)
smb_panic("No memory for upcase tables");
}
for (i=0;i<0x10000;i++) {
- smb_ucs2_t v;
- SSVAL(&v, 0, i);
- upcase_table[v] = i;
+ SSVAL(upcase_table, i*2, i);
}
for (i=0;i<256;i++) {
- smb_ucs2_t v;
- SSVAL(&v, 0, UCS2_CHAR(i));
- upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i);
+ SSVAL(upcase_table, i*2, islower(i)?toupper(i):i);
}
}
@@ -74,199 +70,79 @@ void load_case_tables(void)
smb_panic("No memory for lowcase tables");
}
for (i=0;i<0x10000;i++) {
- smb_ucs2_t v;
- SSVAL(&v, 0, i);
- lowcase_table[v] = i;
+ SSVAL(lowcase_table, i*2, i);
}
for (i=0;i<256;i++) {
- smb_ucs2_t v;
- SSVAL(&v, 0, UCS2_CHAR(i));
- lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i);
+ SSVAL(lowcase_table, i*2, isupper(i)?tolower(i):i);
}
}
}
/*******************************************************************
- Convert a wchar to upper case.
+ Convert a codepoint_t to upper case.
********************************************************************/
-smb_ucs2_t toupper_w(smb_ucs2_t val)
+codepoint_t toupper_w(codepoint_t val)
{
- return upcase_table[SVAL(&val,0)];
-}
-
-/*******************************************************************
- Convert a wchar to lower case.
-********************************************************************/
-static smb_ucs2_t tolower_w( smb_ucs2_t val )
-{
- return lowcase_table[SVAL(&val,0)];
-
-}
-
-/*******************************************************************
-determine if a character is lowercase
-********************************************************************/
-BOOL islower_w(smb_ucs2_t c)
-{
- return upcase_table[SVAL(&c,0)] != c;
+ if (val & 0xFFFF0000) {
+ return val;
+ }
+ if (val < 128) {
+ return toupper(val);
+ }
+ if (upcase_table == NULL) {
+ load_case_tables();
+ }
+ return SVAL(upcase_table, val*2);
}
/*******************************************************************
-determine if a character is uppercase
+ Convert a codepoint_t to lower case.
********************************************************************/
-BOOL isupper_w(smb_ucs2_t c)
+codepoint_t tolower_w(codepoint_t val)
{
- return lowcase_table[SVAL(&c,0)] != c;
+ if (val & 0xFFFF0000) {
+ return val;
+ }
+ if (val < 128) {
+ return tolower(val);
+ }
+ if (lowcase_table == NULL) {
+ load_case_tables();
+ }
+ return SVAL(lowcase_table, val*2);
}
-
/*******************************************************************
- Count the number of characters in a smb_ucs2_t string.
+return the number of bytes occupied by a buffer in CH_UTF16 format
+the result includes the null termination
********************************************************************/
-size_t strlen_w(const smb_ucs2_t *src)
+size_t utf16_len(const void *buf)
{
size_t len;
- for (len = 0; SVAL(src,0); len++, src++) ;
+ for (len = 0; SVAL(buf,len); len += 2) ;
- return len;
+ return len + 2;
}
/*******************************************************************
- Count up to max number of characters in a smb_ucs2_t string.
+return the number of bytes occupied by a buffer in CH_UTF16 format
+the result includes the null termination
+limited by 'n' bytes
********************************************************************/
-size_t strnlen_w(const smb_ucs2_t *src, size_t max)
+size_t utf16_len_n(const void *src, size_t n)
{
size_t len;
- for (len = 0; (len < max) && SVAL(src, 0); len++, src++) ;
-
- return len;
-}
-
-/*******************************************************************
-wide strchr()
-********************************************************************/
-smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
-{
- while (*s != 0) {
- if (c == *s) return discard_const_p(smb_ucs2_t, s);
- s++;
- }
- if (c == *s) return discard_const_p(smb_ucs2_t, s);
-
- return NULL;
-}
-
-smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
-{
- return strchr_w(s, UCS2_CHAR(c));
-}
-
-smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
-{
- const smb_ucs2_t *p = s;
- int len = strlen_w(s);
- if (len == 0) return NULL;
- p += (len - 1);
- do {
- if (c == *p) return discard_const_p(smb_ucs2_t, p);
- } while (p-- != s);
- return NULL;
-}
-
-/*******************************************************************
- Convert a string to lower case.
- return True if any char is converted
-********************************************************************/
-BOOL strlower_w(smb_ucs2_t *s)
-{
- BOOL ret = False;
- while (*s) {
- smb_ucs2_t v = tolower_w(*s);
- if (v != *s) {
- *s = v;
- ret = True;
- }
- s++;
- }
- return ret;
-}
-
-/*******************************************************************
- Convert a string to upper case.
- return True if any char is converted
-********************************************************************/
-BOOL strupper_w(smb_ucs2_t *s)
-{
- BOOL ret = False;
- while (*s) {
- smb_ucs2_t v = toupper_w(*s);
- if (v != *s) {
- *s = v;
- ret = True;
- }
- s++;
- }
- return ret;
-}
-
-/*******************************************************************
-case insensitive string comparison
-********************************************************************/
-int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
-{
- while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
- return (tolower_w(*a) - tolower_w(*b));
-}
-
-/*******************************************************************
-replace any occurence of oldc with newc in unicode string
-********************************************************************/
-
-void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
-{
- for(;*s;s++) {
- if(*s==oldc) *s=newc;
- }
-}
-
-
-/*
- The *_wa() functions take a combination of 7 bit ascii
- and wide characters They are used so that you can use string
- functions combining C string constants with ucs2 strings
-
- The char* arguments must NOT be multibyte - to be completely sure
- of this only pass string constants */
+ for (len = 0; (len+2 < n) && SVAL(src, len); len += 2) ;
-
-void pstrcpy_wa(smb_ucs2_t *dest, const char *src)
-{
- int i;
- for (i=0;i<PSTRING_LEN;i++) {
- dest[i] = UCS2_CHAR(src[i]);
- if (src[i] == 0) return;
+ if (len+2 <= n) {
+ len += 2;
}
-}
-int strcmp_wa(const smb_ucs2_t *a, const char *b)
-{
- while (*b && *a == UCS2_CHAR(*b)) { a++; b++; }
- return (*a - UCS2_CHAR(*b));
+ return len;
}
-const smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
-{
- while (*s != 0) {
- int i;
- for (i=0; p[i] && *s != UCS2_CHAR(p[i]); i++)
- ;
- if (p[i]) return s;
- s++;
- }
- return NULL;
-}
size_t ucs2_align(const void *base_ptr, const void *p, int flags)
{
@@ -275,3 +151,14 @@ size_t ucs2_align(const void *base_ptr, const void *p, int flags)
return PTR_DIFF(p, base_ptr) & 1;
}
+/*
+ compare two codepoints case insensitively
+*/
+int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
+{
+ if (c1 == c2 ||
+ toupper_w(c1) == toupper_w(c2)) {
+ return 0;
+ }
+ return c1 - c2;
+}