From 9941dfe9f6532ecbc317685046d74e6f90c41695 Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Tue, 12 Apr 2011 16:31:08 +1000 Subject: lib/util/charset Move source3/lib/util_unistr.c to the common code. This file (largely) contains functions to deal with UTF16 strings. Andrew Bartlett Signed-off-by: Andrew Tridgell --- source3/Makefile.in | 2 +- source3/include/proto.h | 26 +--- source3/lib/charcnv.c | 39 +++++ source3/lib/util_unistr.c | 364 ---------------------------------------------- source3/wscript_build | 2 +- 5 files changed, 44 insertions(+), 389 deletions(-) delete mode 100644 source3/lib/util_unistr.c (limited to 'source3') diff --git a/source3/Makefile.in b/source3/Makefile.in index b3b1de48c5..a0503278b9 100644 --- a/source3/Makefile.in +++ b/source3/Makefile.in @@ -451,7 +451,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \ lib/bitmap.o lib/dprintf.o $(UTIL_REG_OBJ) \ lib/wins_srv.o \ lib/util_str.o ../lib/util/base64.o lib/util_sid.o \ - lib/util_unistr.o ../lib/util/charset/codepoints.o ../lib/util/charset/util_str.o lib/util_file.o \ + ../lib/util/charset/util_unistr_w.o ../lib/util/charset/codepoints.o ../lib/util/charset/util_str.o lib/util_file.o \ lib/util.o lib/util_cmdline.o lib/util_names.o \ lib/util_sock.o lib/sock_exec.o lib/util_sec.o \ lib/substitute.o lib/dbwrap_util.o \ diff --git a/source3/include/proto.h b/source3/include/proto.h index 48b94aacce..3f44b949f2 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -126,6 +126,9 @@ size_t pull_string_talloc(TALLOC_CTX *ctx, size_t src_len, int flags); size_t align_string(const void *base_ptr, const char *p, int flags); +size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate); +int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags); +int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src); /* The following definitions come from lib/conn_tdb.c */ @@ -1046,29 +1049,6 @@ char *escape_shell_string(const char *src); char **str_list_make_v3(TALLOC_CTX *mem_ctx, const char *string, const char *sep); char *sanitize_username(TALLOC_CTX *mem_ctx, const char *username); -/* The following definitions come from lib/util_unistr.c */ - -size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate); -int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags); -int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src); -size_t strlen_w(const smb_ucs2_t *src); -size_t strnlen_w(const smb_ucs2_t *src, size_t max); -smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c); -smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c); -smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c); -smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n); -smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins); -bool strlower_w(smb_ucs2_t *s); -bool strupper_w(smb_ucs2_t *s); -int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b); -int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b); -int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len); -int strcmp_wa(const smb_ucs2_t *a, const char *b); -int toupper_ascii(int c); -int tolower_ascii(int c); -int isupper_ascii(int c); -int islower_ascii(int c); - /* The following definitions come from lib/version.c */ const char *samba_version_string(void); diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 6e5b606e64..edcccc25e7 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -1366,3 +1366,42 @@ size_t align_string(const void *base_ptr, const char *p, int flags) return 0; } +/******************************************************************* + Write a string in (little-endian) unicode format. src is in + the current DOS codepage. len is the length in bytes of the + string pointed to by dst. + + if null_terminate is True then null terminate the packet (adds 2 bytes) + + the return value is the length in bytes consumed by the string, including the + null termination if applied +********************************************************************/ + +size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate) +{ + int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE + : STR_UNICODE|STR_NOALIGN; + return push_ucs2(NULL, dst, src, len, flags); +} + + +/* Converts a string from internal samba format to unicode + */ + +int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags) +{ + return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN); +} + +/* Converts a string from internal samba format to unicode. Always terminates. + * Actually just a wrapper round push_ucs2_talloc(). + */ + +int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) +{ + size_t size; + if (push_ucs2_talloc(ctx, dest, src, &size)) + return size; + else + return -1; +} diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c deleted file mode 100644 index 26450e319b..0000000000 --- a/source3/lib/util_unistr.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - Unix SMB/CIFS implementation. - Samba utility functions - Copyright (C) Andrew Tridgell 1992-2001 - Copyright (C) Simo Sorce 2001 - Copyright (C) Jeremy Allison 2005 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include "includes.h" - -/* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */ -#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\ - ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest)) - - -/* return an ascii version of a ucs2 character */ -#define UCS2_TO_CHAR(c) (((c) >> UCS2_SHIFT) & 0xff) - -static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len); - -/******************************************************************* - Write a string in (little-endian) unicode format. src is in - the current DOS codepage. len is the length in bytes of the - string pointed to by dst. - - if null_terminate is True then null terminate the packet (adds 2 bytes) - - the return value is the length in bytes consumed by the string, including the - null termination if applied -********************************************************************/ - -size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate) -{ - int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE - : STR_UNICODE|STR_NOALIGN; - return push_ucs2(NULL, dst, src, len, flags); -} - - -/* Converts a string from internal samba format to unicode - */ - -int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags) -{ - return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN); -} - -/* Converts a string from internal samba format to unicode. Always terminates. - * Actually just a wrapper round push_ucs2_talloc(). - */ - -int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) -{ - size_t size; - if (push_ucs2_talloc(ctx, dest, src, &size)) - return size; - else - return -1; -} - -/******************************************************************* - Count the number of two-byte pairs in a UTF16 string. -********************************************************************/ - -size_t strlen_w(const smb_ucs2_t *src) -{ - size_t len; - smb_ucs2_t c; - - for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) { - ; - } - - return len; -} - -/******************************************************************* - Count up to max number of characters in a smb_ucs2_t string. -********************************************************************/ - -size_t strnlen_w(const smb_ucs2_t *src, size_t max) -{ - size_t len; - smb_ucs2_t c; - - for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) { - ; - } - - return len; -} - -/******************************************************************* - Wide strchr(). -********************************************************************/ - -smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) -{ - smb_ucs2_t cp; - while (*(COPY_UCS2_CHAR(&cp,s))) { - if (c == cp) { - return (smb_ucs2_t *)s; - } - s++; - } - if (c == cp) { - return (smb_ucs2_t *)s; - } - - return NULL; -} - -smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c) -{ - return strchr_w(s, UCS2_CHAR(c)); -} - -/******************************************************************* - Wide strrchr(). -********************************************************************/ - -smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c) -{ - smb_ucs2_t cp; - const smb_ucs2_t *p = s; - int len = strlen_w(s); - - if (len == 0) { - return NULL; - } - p += (len - 1); - do { - if (c == *(COPY_UCS2_CHAR(&cp,p))) { - return (smb_ucs2_t *)p; - } - } while (p-- != s); - return NULL; -} - -/******************************************************************* - Wide version of strrchr that returns after doing strrchr 'n' times. -********************************************************************/ - -smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n) -{ - smb_ucs2_t cp; - const smb_ucs2_t *p = s; - int len = strlen_w(s); - - if (len == 0 || !n) { - return NULL; - } - p += (len - 1); - do { - if (c == *(COPY_UCS2_CHAR(&cp,p))) { - n--; - } - - if (!n) { - return (smb_ucs2_t *)p; - } - } while (p-- != s); - return NULL; -} - -/******************************************************************* - Wide strstr(). -********************************************************************/ - -smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins) -{ - smb_ucs2_t *r; - size_t inslen; - - if (!s || !*s || !ins || !*ins) { - return NULL; - } - - inslen = strlen_w(ins); - r = (smb_ucs2_t *)s; - - while ((r = strchr_w(r, *ins))) { - if (strncmp_w(r, ins, inslen) == 0) { - return r; - } - r++; - } - - return NULL; -} - -/******************************************************************* - Convert a string to lower case. - return True if any char is converted - - This is unsafe for any string involving a UTF16 character -********************************************************************/ - -bool strlower_w(smb_ucs2_t *s) -{ - smb_ucs2_t cp; - bool ret = False; - - while (*(COPY_UCS2_CHAR(&cp,s))) { - smb_ucs2_t v = tolower_m(cp); - if (v != cp) { - COPY_UCS2_CHAR(s,&v); - ret = True; - } - s++; - } - return ret; -} - -/******************************************************************* - Convert a string to upper case. - return True if any char is converted - - This is unsafe for any string involving a UTF16 character -********************************************************************/ - -bool strupper_w(smb_ucs2_t *s) -{ - smb_ucs2_t cp; - bool ret = False; - while (*(COPY_UCS2_CHAR(&cp,s))) { - smb_ucs2_t v = toupper_m(cp); - if (v != cp) { - COPY_UCS2_CHAR(s,&v); - ret = True; - } - s++; - } - return ret; -} - -int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) -{ - smb_ucs2_t cpa, cpb; - - while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) { - a++; - b++; - } - return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))); - /* warning: if *a != *b and both are not 0 we return a random - greater or lesser than 0 number not realted to which - string is longer */ -} - -static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) -{ - smb_ucs2_t cpa, cpb; - size_t n = 0; - - while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) { - a++; - b++; - n++; - } - return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0; -} - -/******************************************************************* - Case insensitive string comparison. -********************************************************************/ - -int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) -{ - smb_ucs2_t cpa, cpb; - - while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) { - a++; - b++; - } - return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))); -} - -/******************************************************************* - Case insensitive string comparison, length limited. -********************************************************************/ - -int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) -{ - smb_ucs2_t cpa, cpb; - size_t n = 0; - - while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) { - a++; - b++; - n++; - } - return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0; -} - -/* - The *_wa() functions take a combination of 7 bit ascii - and wide characters They are used so that you can use string - functions combining C string constants with ucs2 strings - - The char* arguments must NOT be multibyte - to be completely sure - of this only pass string constants */ - -int strcmp_wa(const smb_ucs2_t *a, const char *b) -{ - smb_ucs2_t cp = 0; - - while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) { - a++; - b++; - } - return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b)); -} - -/************************************************************* - ascii only toupper - saves the need for smbd to be in C locale. -*************************************************************/ - -int toupper_ascii(int c) -{ - smb_ucs2_t uc = toupper_m(UCS2_CHAR(c)); - return UCS2_TO_CHAR(uc); -} - -/************************************************************* - ascii only tolower - saves the need for smbd to be in C locale. -*************************************************************/ - -int tolower_ascii(int c) -{ - smb_ucs2_t uc = tolower_m(UCS2_CHAR(c)); - return UCS2_TO_CHAR(uc); -} - -/************************************************************* - ascii only isupper - saves the need for smbd to be in C locale. -*************************************************************/ - -int isupper_ascii(int c) -{ - return isupper_m(UCS2_CHAR(c)); -} - -/************************************************************* - ascii only islower - saves the need for smbd to be in C locale. -*************************************************************/ - -int islower_ascii(int c) -{ - return islower_m(UCS2_CHAR(c)); -} diff --git a/source3/wscript_build b/source3/wscript_build index 300f5b3863..71d91388dd 100755 --- a/source3/wscript_build +++ b/source3/wscript_build @@ -963,7 +963,7 @@ bld.SAMBA3_SUBSYSTEM('tdb-wrap3', vars=locals()) bld.SAMBA3_SUBSYSTEM('CHARSET3', - source='''lib/util_str.c lib/util_unistr.c lib/charcnv.c lib/fstring.c''', + source='''lib/util_str.c lib/charcnv.c lib/fstring.c''', public_deps='ICONV_WRAPPER CODEPOINTS', deps='DYNCONFIG') -- cgit