From 9941dfe9f6532ecbc317685046d74e6f90c41695 Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Tue, 12 Apr 2011 16:31:08 +1000 Subject: lib/util/charset Move source3/lib/util_unistr.c to the common code. This file (largely) contains functions to deal with UTF16 strings. Andrew Bartlett Signed-off-by: Andrew Tridgell --- lib/util/charset/charset.h | 20 +++ lib/util/charset/util_unistr_w.c | 324 +++++++++++++++++++++++++++++++++++++++ lib/util/charset/wscript_build | 2 +- 3 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 lib/util/charset/util_unistr_w.c (limited to 'lib/util/charset') diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 16bb9c62fb..3a6e6a3216 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -240,6 +240,26 @@ void load_case_tables(void); void load_case_tables_library(void); bool smb_register_charset(const struct charset_functions *funcs_in); +/* The following definitions come from util_unistr_w.c */ + +size_t strlen_w(const smb_ucs2_t *src); +size_t strnlen_w(const smb_ucs2_t *src, size_t max); +smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c); +smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c); +smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c); +smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n); +smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins); +bool strlower_w(smb_ucs2_t *s); +bool strupper_w(smb_ucs2_t *s); +int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b); +int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b); +int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len); +int strcmp_wa(const smb_ucs2_t *a, const char *b); +int toupper_ascii(int c); +int tolower_ascii(int c); +int isupper_ascii(int c); +int islower_ascii(int c); + /* * Define stub for charset module which implements 8-bit encoding with gaps. * Encoding tables for such module should be produced from glibc's CHARMAPs diff --git a/lib/util/charset/util_unistr_w.c b/lib/util/charset/util_unistr_w.c new file mode 100644 index 0000000000..a550e52776 --- /dev/null +++ b/lib/util/charset/util_unistr_w.c @@ -0,0 +1,324 @@ +/* + Unix SMB/CIFS implementation. + Samba utility functions + Copyright (C) Andrew Tridgell 1992-2001 + Copyright (C) Simo Sorce 2001 + Copyright (C) Jeremy Allison 2005 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "includes.h" + +/* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */ +#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\ + ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest)) + + +/* return an ascii version of a ucs2 character */ +#define UCS2_TO_CHAR(c) (((c) >> UCS2_SHIFT) & 0xff) + +static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len); + +/******************************************************************* + Count the number of two-byte pairs in a UTF16 string. +********************************************************************/ + +size_t strlen_w(const smb_ucs2_t *src) +{ + size_t len; + smb_ucs2_t c; + + for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) { + ; + } + + return len; +} + +/******************************************************************* + Count up to max number of characters in a smb_ucs2_t string. +********************************************************************/ + +size_t strnlen_w(const smb_ucs2_t *src, size_t max) +{ + size_t len; + smb_ucs2_t c; + + for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) { + ; + } + + return len; +} + +/******************************************************************* + Wide strchr(). +********************************************************************/ + +smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) +{ + smb_ucs2_t cp; + while (*(COPY_UCS2_CHAR(&cp,s))) { + if (c == cp) { + return (smb_ucs2_t *)s; + } + s++; + } + if (c == cp) { + return (smb_ucs2_t *)s; + } + + return NULL; +} + +smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c) +{ + return strchr_w(s, UCS2_CHAR(c)); +} + +/******************************************************************* + Wide strrchr(). +********************************************************************/ + +smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c) +{ + smb_ucs2_t cp; + const smb_ucs2_t *p = s; + int len = strlen_w(s); + + if (len == 0) { + return NULL; + } + p += (len - 1); + do { + if (c == *(COPY_UCS2_CHAR(&cp,p))) { + return (smb_ucs2_t *)p; + } + } while (p-- != s); + return NULL; +} + +/******************************************************************* + Wide version of strrchr that returns after doing strrchr 'n' times. +********************************************************************/ + +smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n) +{ + smb_ucs2_t cp; + const smb_ucs2_t *p = s; + int len = strlen_w(s); + + if (len == 0 || !n) { + return NULL; + } + p += (len - 1); + do { + if (c == *(COPY_UCS2_CHAR(&cp,p))) { + n--; + } + + if (!n) { + return (smb_ucs2_t *)p; + } + } while (p-- != s); + return NULL; +} + +/******************************************************************* + Wide strstr(). +********************************************************************/ + +smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins) +{ + smb_ucs2_t *r; + size_t inslen; + + if (!s || !*s || !ins || !*ins) { + return NULL; + } + + inslen = strlen_w(ins); + r = (smb_ucs2_t *)s; + + while ((r = strchr_w(r, *ins))) { + if (strncmp_w(r, ins, inslen) == 0) { + return r; + } + r++; + } + + return NULL; +} + +/******************************************************************* + Convert a string to lower case. + return True if any char is converted + + This is unsafe for any string involving a UTF16 character +********************************************************************/ + +bool strlower_w(smb_ucs2_t *s) +{ + smb_ucs2_t cp; + bool ret = false; + + while (*(COPY_UCS2_CHAR(&cp,s))) { + smb_ucs2_t v = tolower_m(cp); + if (v != cp) { + COPY_UCS2_CHAR(s,&v); + ret = true; + } + s++; + } + return ret; +} + +/******************************************************************* + Convert a string to upper case. + return True if any char is converted + + This is unsafe for any string involving a UTF16 character +********************************************************************/ + +bool strupper_w(smb_ucs2_t *s) +{ + smb_ucs2_t cp; + bool ret = false; + while (*(COPY_UCS2_CHAR(&cp,s))) { + smb_ucs2_t v = toupper_m(cp); + if (v != cp) { + COPY_UCS2_CHAR(s,&v); + ret = true; + } + s++; + } + return ret; +} + +int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) +{ + smb_ucs2_t cpa, cpb; + + while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) { + a++; + b++; + } + return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))); + /* warning: if *a != *b and both are not 0 we return a random + greater or lesser than 0 number not realted to which + string is longer */ +} + +static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) +{ + smb_ucs2_t cpa, cpb; + size_t n = 0; + + while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) { + a++; + b++; + n++; + } + return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0; +} + +/******************************************************************* + Case insensitive string comparison. +********************************************************************/ + +int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) +{ + smb_ucs2_t cpa, cpb; + + while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) { + a++; + b++; + } + return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))); +} + +/******************************************************************* + Case insensitive string comparison, length limited. +********************************************************************/ + +int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) +{ + smb_ucs2_t cpa, cpb; + size_t n = 0; + + while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) { + a++; + b++; + n++; + } + return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0; +} + +/* + The *_wa() functions take a combination of 7 bit ascii + and wide characters They are used so that you can use string + functions combining C string constants with ucs2 strings + + The char* arguments must NOT be multibyte - to be completely sure + of this only pass string constants */ + +int strcmp_wa(const smb_ucs2_t *a, const char *b) +{ + smb_ucs2_t cp = 0; + + while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) { + a++; + b++; + } + return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b)); +} + +/************************************************************* + ascii only toupper - saves the need for smbd to be in C locale. +*************************************************************/ + +int toupper_ascii(int c) +{ + smb_ucs2_t uc = toupper_m(UCS2_CHAR(c)); + return UCS2_TO_CHAR(uc); +} + +/************************************************************* + ascii only tolower - saves the need for smbd to be in C locale. +*************************************************************/ + +int tolower_ascii(int c) +{ + smb_ucs2_t uc = tolower_m(UCS2_CHAR(c)); + return UCS2_TO_CHAR(uc); +} + +/************************************************************* + ascii only isupper - saves the need for smbd to be in C locale. +*************************************************************/ + +int isupper_ascii(int c) +{ + return isupper_m(UCS2_CHAR(c)); +} + +/************************************************************* + ascii only islower - saves the need for smbd to be in C locale. +*************************************************************/ + +int islower_ascii(int c) +{ + return islower_m(UCS2_CHAR(c)); +} diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build index a245ef1b0c..29e168dce1 100644 --- a/lib/util/charset/wscript_build +++ b/lib/util/charset/wscript_build @@ -13,6 +13,6 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER', public_deps='iconv replace talloc') bld.SAMBA_SUBSYSTEM('CODEPOINTS', - source='codepoints.c util_str.c', + source='codepoints.c util_str.c util_unistr_w.c', deps='DYNCONFIG ICONV_WRAPPER' ) -- cgit