diff options
author | Andrew Bartlett <abartlet@samba.org> | 2011-02-18 13:47:28 +1100 |
---|---|---|
committer | Andrew Bartlett <abartlet@samba.org> | 2011-02-18 18:41:01 +1100 |
commit | 2a3a86a86f3d1ab97adda563beda7ee35f6a2414 (patch) | |
tree | 38b9f9b9818482cdfe6ecdb9ee797fe2f3531e7d | |
parent | 5155a5f5c130ff5b71ce4e37877378a6967046b4 (diff) | |
download | samba-2a3a86a86f3d1ab97adda563beda7ee35f6a2414.tar.gz samba-2a3a86a86f3d1ab97adda563beda7ee35f6a2414.tar.bz2 samba-2a3a86a86f3d1ab97adda563beda7ee35f6a2414.zip |
lib/util/charcnv Move iconv handle setup in common
We now use the struct smb_iconv_convenience at the core of all our
iconv code, and use global_iconv_convenience for the callers that
don't specify one.
Andrew Bartlett
-rw-r--r-- | lib/util/charset/charcnv.c | 289 | ||||
-rw-r--r-- | lib/util/charset/charset.h | 5 | ||||
-rw-r--r-- | lib/util/charset/codepoints.c | 362 | ||||
-rw-r--r-- | lib/util/charset/util_unistr.c | 26 | ||||
-rw-r--r-- | lib/util/charset/wscript_build | 4 | ||||
-rw-r--r-- | source3/lib/charcnv.c | 302 | ||||
-rw-r--r-- | source4/param/loadparm.c | 6 | ||||
-rw-r--r-- | source4/param/util.c | 1 |
8 files changed, 380 insertions, 615 deletions
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c index 59b36e3062..dd2c725125 100644 --- a/lib/util/charset/charcnv.c +++ b/lib/util/charset/charcnv.c @@ -38,137 +38,6 @@ * @sa lib/iconv.c */ -struct smb_iconv_convenience { - TALLOC_CTX *child_ctx; - const char *unix_charset; - const char *dos_charset; - bool native_iconv; - smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS]; -}; - - -/** - * Return the name of a charset to give to iconv(). - **/ -static const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch) -{ - switch (ch) { - case CH_UTF16: return "UTF-16LE"; - case CH_UNIX: return ic->unix_charset; - case CH_DOS: return ic->dos_charset; - case CH_UTF8: return "UTF8"; - case CH_UTF16BE: return "UTF-16BE"; - case CH_UTF16MUNGED: return "UTF16_MUNGED"; - default: - return "ASCII"; - } -} - -/** - re-initialize iconv conversion descriptors -**/ -static int close_iconv_convenience(struct smb_iconv_convenience *data) -{ - unsigned c1, c2; - for (c1=0;c1<NUM_CHARSETS;c1++) { - for (c2=0;c2<NUM_CHARSETS;c2++) { - if (data->conv_handles[c1][c2] != NULL) { - if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) { - smb_iconv_close(data->conv_handles[c1][c2]); - } - data->conv_handles[c1][c2] = NULL; - } - } - } - - return 0; -} - -/* - the old_ic is passed in here as the smb_iconv_convenience structure - is used as a global pointer in some places (eg. python modules). We - don't want to invalidate those global pointers, but we do want to - update them with the right charset information when loadparm - runs. To do that we need to re-use the structure pointer, but - re-fill the elements in the structure with the updated values - */ -_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx, - const char *dos_charset, - const char *unix_charset, - bool native_iconv, - struct smb_iconv_convenience *old_ic) -{ - struct smb_iconv_convenience *ret; - - if (old_ic != NULL) { - ret = old_ic; - close_iconv_convenience(ret); - talloc_free(ret->child_ctx); - ZERO_STRUCTP(ret); - } else { - ret = talloc_zero(mem_ctx, struct smb_iconv_convenience); - } - if (ret == NULL) { - return NULL; - } - - /* we use a child context to allow us to free all ptrs without - freeing the structure itself */ - ret->child_ctx = talloc_new(ret); - if (ret->child_ctx == NULL) { - return NULL; - } - - talloc_set_destructor(ret, close_iconv_convenience); - - ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset); - ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset); - ret->native_iconv = native_iconv; - - return ret; -} - -/* - on-demand initialisation of conversion handles -*/ -static smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic, - charset_t from, charset_t to) -{ - const char *n1, *n2; - static bool initialised; - - if (initialised == false) { - initialised = true; - } - - if (ic->conv_handles[from][to]) { - return ic->conv_handles[from][to]; - } - - n1 = charset_name(ic, from); - n2 = charset_name(ic, to); - - ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1, - ic->native_iconv); - - if (ic->conv_handles[from][to] == (smb_iconv_t)-1) { - if ((from == CH_DOS || to == CH_DOS) && - strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) { - DEBUG(0,("dos charset '%s' unavailable - using ASCII\n", - charset_name(ic, CH_DOS))); - ic->dos_charset = "ASCII"; - - n1 = charset_name(ic, from); - n2 = charset_name(ic, to); - - ic->conv_handles[from][to] = - smb_iconv_open_ex(ic, n2, n1, ic->native_iconv); - } - } - - return ic->conv_handles[from][to]; -} - /** * Convert string from one encoding to another, making error checking etc * @@ -363,161 +232,3 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx, return true; } - -/** - * Return the unicode codepoint for the next character in the input - * string in the given src_charset. - * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. - * - * Also return the number of bytes consumed (which tells the caller - * how many bytes to skip to get to the next src_charset-character). - * - * This is implemented (in the non-ascii-case) by first converting the - * next character in the input string to UTF16_LE and then calculating - * the unicode codepoint from that. - * - * Return INVALID_CODEPOINT if the next character cannot be converted. - */ -_PUBLIC_ codepoint_t next_codepoint_convenience_ext( - struct smb_iconv_convenience *ic, - const char *str, charset_t src_charset, - size_t *bytes_consumed) -{ - /* it cannot occupy more than 4 bytes in UTF16 format */ - uint8_t buf[4]; - smb_iconv_t descriptor; - size_t ilen_orig; - size_t ilen; - size_t olen; - char *outbuf; - - if ((str[0] & 0x80) == 0) { - *bytes_consumed = 1; - return (codepoint_t)str[0]; - } - - /* - * we assume that no multi-byte character can take more than 5 bytes. - * This is OK as we only support codepoints up to 1M (U+100000) - */ - ilen_orig = strnlen(str, 5); - ilen = ilen_orig; - - descriptor = get_conv_handle(ic, src_charset, CH_UTF16); - if (descriptor == (smb_iconv_t)-1) { - *bytes_consumed = 1; - return INVALID_CODEPOINT; - } - - /* - * this looks a little strange, but it is needed to cope with - * codepoints above 64k (U+1000) which are encoded as per RFC2781. - */ - olen = 2; - outbuf = (char *)buf; - smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); - if (olen == 2) { - olen = 4; - outbuf = (char *)buf; - smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); - if (olen == 4) { - /* we didn't convert any bytes */ - *bytes_consumed = 1; - return INVALID_CODEPOINT; - } - olen = 4 - olen; - } else { - olen = 2 - olen; - } - - *bytes_consumed = ilen_orig - ilen; - - if (olen == 2) { - return (codepoint_t)SVAL(buf, 0); - } - if (olen == 4) { - /* decode a 4 byte UTF16 character manually */ - return (codepoint_t)0x10000 + - (buf[2] | ((buf[3] & 0x3)<<8) | - (buf[0]<<10) | ((buf[1] & 0x3)<<18)); - } - - /* no other length is valid */ - return INVALID_CODEPOINT; -} - -/* - return the unicode codepoint for the next multi-byte CH_UNIX character - in the string - - also return the number of bytes consumed (which tells the caller - how many bytes to skip to get to the next CH_UNIX character) - - return INVALID_CODEPOINT if the next character cannot be converted -*/ -_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, - const char *str, size_t *size) -{ - return next_codepoint_convenience_ext(ic, str, CH_UNIX, size); -} - -/* - push a single codepoint into a CH_UNIX string the target string must - be able to hold the full character, which is guaranteed if it is at - least 5 bytes in size. The caller may pass less than 5 bytes if they - are sure the character will fit (for example, you can assume that - uppercase/lowercase of a character will not add more than 1 byte) - - return the number of bytes occupied by the CH_UNIX character, or - -1 on failure -*/ -_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, - char *str, codepoint_t c) -{ - smb_iconv_t descriptor; - uint8_t buf[4]; - size_t ilen, olen; - const char *inbuf; - - if (c < 128) { - *str = c; - return 1; - } - - descriptor = get_conv_handle(ic, - CH_UTF16, CH_UNIX); - if (descriptor == (smb_iconv_t)-1) { - return -1; - } - - if (c < 0x10000) { - ilen = 2; - olen = 5; - inbuf = (char *)buf; - SSVAL(buf, 0, c); - smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); - if (ilen != 0) { - return -1; - } - return 5 - olen; - } - - c -= 0x10000; - - buf[0] = (c>>10) & 0xFF; - buf[1] = (c>>18) | 0xd8; - buf[2] = c & 0xFF; - buf[3] = ((c>>8) & 0x3) | 0xdc; - - ilen = 4; - olen = 5; - inbuf = (char *)buf; - - smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); - if (ilen != 0) { - return -1; - } - return 5 - olen; -} - - diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 28d762578b..b4a5a55461 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -170,6 +170,10 @@ ssize_t iconv_talloc(TALLOC_CTX *mem_ctx, void *dest); extern struct smb_iconv_convenience *global_iconv_convenience; +struct smb_iconv_convenience *get_iconv_convenience(void); +smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic, + charset_t from, charset_t to); +const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch); codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, size_t *size); @@ -195,6 +199,7 @@ int codepoint_cmpi(codepoint_t c1, codepoint_t c2); struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx, const char *dos_charset, const char *unix_charset, + const char *display_charset, bool native_iconv, struct smb_iconv_convenience *old_ic); diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c index 53febb8b5e..01183e4ad4 100644 --- a/lib/util/charset/codepoints.c +++ b/lib/util/charset/codepoints.c @@ -1,8 +1,10 @@ /* Unix SMB/CIFS implementation. - Samba utility functions - Copyright (C) Andrew Tridgell 1992-2001 + Character set conversion Extensions + Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001 + Copyright (C) Andrew Tridgell 2001 Copyright (C) Simo Sorce 2001 + Copyright (C) Jelmer Vernooij 2007 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,12 +18,17 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +*/ #include "includes.h" +#include "lib/util/charset/charset.h" #include "system/locale.h" #include "dynconfig.h" +#ifdef strcasecmp +#undef strcasecmp +#endif + /** * @file * @brief Unicode string manipulation @@ -126,3 +133,352 @@ _PUBLIC_ int codepoint_cmpi(codepoint_t c1, codepoint_t c2) } +struct smb_iconv_convenience { + TALLOC_CTX *child_ctx; + const char *unix_charset; + const char *dos_charset; + const char *display_charset; + bool native_iconv; + smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS]; +}; + +struct smb_iconv_convenience *global_iconv_convenience = NULL; + +struct smb_iconv_convenience *get_iconv_convenience(void) +{ + if (global_iconv_convenience == NULL) + global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(), + "ASCII", "UTF-8", "ASCII", true, NULL); + return global_iconv_convenience; +} + +/** + * Return the name of a charset to give to iconv(). + **/ +const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch) +{ + switch (ch) { + case CH_UTF16: return "UTF-16LE"; + case CH_UNIX: return ic->unix_charset; + case CH_DOS: return ic->dos_charset; + case CH_DISPLAY: return ic->display_charset; + case CH_UTF8: return "UTF8"; + case CH_UTF16BE: return "UTF-16BE"; + case CH_UTF16MUNGED: return "UTF16_MUNGED"; + default: + return "ASCII"; + } +} + +/** + re-initialize iconv conversion descriptors +**/ +static int close_iconv_convenience(struct smb_iconv_convenience *data) +{ + unsigned c1, c2; + for (c1=0;c1<NUM_CHARSETS;c1++) { + for (c2=0;c2<NUM_CHARSETS;c2++) { + if (data->conv_handles[c1][c2] != NULL) { + if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) { + smb_iconv_close(data->conv_handles[c1][c2]); + } + data->conv_handles[c1][c2] = NULL; + } + } + } + + return 0; +} + +static const char *map_locale(const char *charset) +{ + if (strcmp(charset, "LOCALE") != 0) { + return charset; + } +#if defined(HAVE_NL_LANGINFO) && defined(CODESET) + { + const char *ln; + smb_iconv_t handle; + + ln = nl_langinfo(CODESET); + if (ln == NULL) { + DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n")); + return "ASCII"; + } + /* Check whether the charset name is supported + by iconv */ + handle = smb_iconv_open(ln, "UCS-2LE"); + if (handle == (smb_iconv_t) -1) { + DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); + return "ASCII"; + } else { + DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); + smb_iconv_close(handle); + } + return ln; + } +#endif + return "ASCII"; +} + +/* + the old_ic is passed in here as the smb_iconv_convenience structure + is used as a global pointer in some places (eg. python modules). We + don't want to invalidate those global pointers, but we do want to + update them with the right charset information when loadparm + runs. To do that we need to re-use the structure pointer, but + re-fill the elements in the structure with the updated values + */ +_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx, + const char *dos_charset, + const char *unix_charset, + const char *display_charset, + bool native_iconv, + struct smb_iconv_convenience *old_ic) +{ + struct smb_iconv_convenience *ret; + + display_charset = map_locale(display_charset); + + if (old_ic != NULL) { + ret = old_ic; + close_iconv_convenience(ret); + talloc_free(ret->child_ctx); + ZERO_STRUCTP(ret); + } else { + ret = talloc_zero(mem_ctx, struct smb_iconv_convenience); + } + if (ret == NULL) { + return NULL; + } + + /* we use a child context to allow us to free all ptrs without + freeing the structure itself */ + ret->child_ctx = talloc_new(ret); + if (ret->child_ctx == NULL) { + return NULL; + } + + talloc_set_destructor(ret, close_iconv_convenience); + + ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset); + ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset); + ret->display_charset = talloc_strdup(ret->child_ctx, display_charset); + ret->native_iconv = native_iconv; + + return ret; +} + +/* + on-demand initialisation of conversion handles +*/ +smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic, + charset_t from, charset_t to) +{ + const char *n1, *n2; + static bool initialised; + + if (initialised == false) { + initialised = true; + } + + if (ic->conv_handles[from][to]) { + return ic->conv_handles[from][to]; + } + + n1 = charset_name(ic, from); + n2 = charset_name(ic, to); + + ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1, + ic->native_iconv); + + if (ic->conv_handles[from][to] == (smb_iconv_t)-1) { + if ((from == CH_DOS || to == CH_DOS) && + strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) { + DEBUG(0,("dos charset '%s' unavailable - using ASCII\n", + charset_name(ic, CH_DOS))); + ic->dos_charset = "ASCII"; + + n1 = charset_name(ic, from); + n2 = charset_name(ic, to); + + ic->conv_handles[from][to] = + smb_iconv_open_ex(ic, n2, n1, ic->native_iconv); + } + } + + return ic->conv_handles[from][to]; +} + +/** + * Return the unicode codepoint for the next character in the input + * string in the given src_charset. + * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. + * + * Also return the number of bytes consumed (which tells the caller + * how many bytes to skip to get to the next src_charset-character). + * + * This is implemented (in the non-ascii-case) by first converting the + * next character in the input string to UTF16_LE and then calculating + * the unicode codepoint from that. + * + * Return INVALID_CODEPOINT if the next character cannot be converted. + */ +_PUBLIC_ codepoint_t next_codepoint_convenience_ext( + struct smb_iconv_convenience *ic, + const char *str, charset_t src_charset, + size_t *bytes_consumed) +{ + /* it cannot occupy more than 4 bytes in UTF16 format */ + uint8_t buf[4]; + smb_iconv_t descriptor; + size_t ilen_orig; + size_t ilen; + size_t olen; + char *outbuf; + + if ((str[0] & 0x80) == 0) { + *bytes_consumed = 1; + return (codepoint_t)str[0]; + } + + /* + * we assume that no multi-byte character can take more than 5 bytes. + * This is OK as we only support codepoints up to 1M (U+100000) + */ + ilen_orig = strnlen(str, 5); + ilen = ilen_orig; + + descriptor = get_conv_handle(ic, src_charset, CH_UTF16); + if (descriptor == (smb_iconv_t)-1) { + *bytes_consumed = 1; + return INVALID_CODEPOINT; + } + + /* + * this looks a little strange, but it is needed to cope with + * codepoints above 64k (U+1000) which are encoded as per RFC2781. + */ + olen = 2; + outbuf = (char *)buf; + smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); + if (olen == 2) { + olen = 4; + outbuf = (char *)buf; + smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); + if (olen == 4) { + /* we didn't convert any bytes */ + *bytes_consumed = 1; + return INVALID_CODEPOINT; + } + olen = 4 - olen; + } else { + olen = 2 - olen; + } + + *bytes_consumed = ilen_orig - ilen; + + if (olen == 2) { + return (codepoint_t)SVAL(buf, 0); + } + if (olen == 4) { + /* decode a 4 byte UTF16 character manually */ + return (codepoint_t)0x10000 + + (buf[2] | ((buf[3] & 0x3)<<8) | + (buf[0]<<10) | ((buf[1] & 0x3)<<18)); + } + + /* no other length is valid */ + return INVALID_CODEPOINT; +} + +/* + return the unicode codepoint for the next multi-byte CH_UNIX character + in the string + + also return the number of bytes consumed (which tells the caller + how many bytes to skip to get to the next CH_UNIX character) + + return INVALID_CODEPOINT if the next character cannot be converted +*/ +_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, + const char *str, size_t *size) +{ + return next_codepoint_convenience_ext(ic, str, CH_UNIX, size); +} + +/* + push a single codepoint into a CH_UNIX string the target string must + be able to hold the full character, which is guaranteed if it is at + least 5 bytes in size. The caller may pass less than 5 bytes if they + are sure the character will fit (for example, you can assume that + uppercase/lowercase of a character will not add more than 1 byte) + + return the number of bytes occupied by the CH_UNIX character, or + -1 on failure +*/ +_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, + char *str, codepoint_t c) +{ + smb_iconv_t descriptor; + uint8_t buf[4]; + size_t ilen, olen; + const char *inbuf; + + if (c < 128) { + *str = c; + return 1; + } + + descriptor = get_conv_handle(ic, + CH_UTF16, CH_UNIX); + if (descriptor == (smb_iconv_t)-1) { + return -1; + } + + if (c < 0x10000) { + ilen = 2; + olen = 5; + inbuf = (char *)buf; + SSVAL(buf, 0, c); + smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); + if (ilen != 0) { + return -1; + } + return 5 - olen; + } + + c -= 0x10000; + + buf[0] = (c>>10) & 0xFF; + buf[1] = (c>>18) | 0xd8; + buf[2] = c & 0xFF; + buf[3] = ((c>>8) & 0x3) | 0xdc; + + ilen = 4; + olen = 5; + inbuf = (char *)buf; + + smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); + if (ilen != 0) { + return -1; + } + return 5 - olen; +} + +_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, + size_t *size) +{ + return next_codepoint_convenience_ext(get_iconv_convenience(), str, + src_charset, size); +} + +_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size) +{ + return next_codepoint_convenience(get_iconv_convenience(), str, size); +} + +_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c) +{ + return push_codepoint_convenience(get_iconv_convenience(), str, c); +} diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index 410547400d..b6bfb29e7d 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -21,16 +21,6 @@ #include "includes.h" #include "system/locale.h" -struct smb_iconv_convenience *global_iconv_convenience = NULL; - -static inline struct smb_iconv_convenience *get_iconv_convenience(void) -{ - if (global_iconv_convenience == NULL) - global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(), - "ASCII", "UTF-8", true, NULL); - return global_iconv_convenience; -} - /** Case insensitive string compararison **/ @@ -1043,19 +1033,3 @@ _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx, allow_badcharcnv); } -_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, - size_t *size) -{ - return next_codepoint_convenience_ext(get_iconv_convenience(), str, - src_charset, size); -} - -_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size) -{ - return next_codepoint_convenience(get_iconv_convenience(), str, size); -} - -_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c) -{ - return push_codepoint_convenience(get_iconv_convenience(), str, c); -} diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build index 18479a9978..7dcd189036 100644 --- a/lib/util/charset/wscript_build +++ b/lib/util/charset/wscript_build @@ -4,7 +4,7 @@ if bld.env._SAMBA_BUILD_ == 4: bld.SAMBA_SUBSYSTEM('CHARSET', source='charcnv.c util_unistr.c', - public_deps='ICONV_WRAPPER CODEPOINTS', + public_deps='CODEPOINTS', public_headers='charset.h', ) @@ -14,5 +14,5 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER', bld.SAMBA_SUBSYSTEM('CODEPOINTS', source='codepoints.c', - deps='DYNCONFIG' + deps='DYNCONFIG ICONV_WRAPPER' ) diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 4c98f8f339..2723599599 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -45,68 +45,9 @@ char lp_failed_convert_char(void) */ -static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS]; static bool conv_silent; /* Should we do a debug if the conversion fails ? */ static bool initialized; -/** - * Return the name of a charset to give to iconv(). - **/ -static const char *charset_name(charset_t ch) -{ - const char *ret; - - switch (ch) { - case CH_UTF16LE: - ret = "UTF-16LE"; - break; - case CH_UTF16BE: - ret = "UTF-16BE"; - break; - case CH_UNIX: - ret = lp_unix_charset(); - break; - case CH_DOS: - ret = lp_dos_charset(); - break; - case CH_DISPLAY: - ret = lp_display_charset(); - break; - case CH_UTF8: - ret = "UTF8"; - break; - default: - ret = NULL; - } - -#if defined(HAVE_NL_LANGINFO) && defined(CODESET) - if (ret && !strcmp(ret, "LOCALE")) { - const char *ln = NULL; - -#ifdef HAVE_SETLOCALE - setlocale(LC_ALL, ""); -#endif - ln = nl_langinfo(CODESET); - if (ln) { - /* Check whether the charset name is supported - by iconv */ - smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE"); - if (handle == (smb_iconv_t) -1) { - DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); - ln = NULL; - } else { - DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); - smb_iconv_close(handle); - } - } - ret = ln; - } -#endif - - if (!ret || !*ret) ret = "ASCII"; - return ret; -} - void lazy_initialize_conv(void) { if (!initialized) { @@ -121,16 +62,7 @@ void lazy_initialize_conv(void) **/ void gfree_charcnv(void) { - int c1, c2; - - for (c1=0;c1<NUM_CHARSETS;c1++) { - for (c2=0;c2<NUM_CHARSETS;c2++) { - if ( conv_handles[c1][c2] ) { - smb_iconv_close( conv_handles[c1][c2] ); - conv_handles[c1][c2] = 0; - } - } - } + TALLOC_FREE(global_iconv_convenience); initialized = false; } @@ -143,51 +75,9 @@ void gfree_charcnv(void) **/ void init_iconv(void) { - int c1, c2; - bool did_reload = False; - - /* so that charset_name() works we need to get the UNIX<->UCS2 going - first */ - if (!conv_handles[CH_UNIX][CH_UTF16LE]) - conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII"); - - if (!conv_handles[CH_UTF16LE][CH_UNIX]) - conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE)); - - for (c1=0;c1<NUM_CHARSETS;c1++) { - for (c2=0;c2<NUM_CHARSETS;c2++) { - const char *n1 = charset_name((charset_t)c1); - const char *n2 = charset_name((charset_t)c2); - if (conv_handles[c1][c2] && - strcmp(n1, conv_handles[c1][c2]->from_name) == 0 && - strcmp(n2, conv_handles[c1][c2]->to_name) == 0) - continue; - - did_reload = True; - - if (conv_handles[c1][c2]) - smb_iconv_close(conv_handles[c1][c2]); - - conv_handles[c1][c2] = smb_iconv_open(n2,n1); - if (conv_handles[c1][c2] == (smb_iconv_t)-1) { - DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n", - charset_name((charset_t)c1), charset_name((charset_t)c2))); - if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) { - n1 = "ASCII"; - } - if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) { - n2 = "ASCII"; - } - DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n", - n1, n2 )); - conv_handles[c1][c2] = smb_iconv_open(n2,n1); - if (!conv_handles[c1][c2]) { - DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2)); - smb_panic("init_iconv: conv_handle initialization failed"); - } - } - } - } + global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(), + lp_unix_charset(), lp_display_charset(), + true, global_iconv_convenience); } /** @@ -214,10 +104,11 @@ static size_t convert_string_internal(charset_t from, charset_t to, const char* inbuf = (const char*)src; char* outbuf = (char*)dest; smb_iconv_t descriptor; + struct smb_iconv_convenience *ic; lazy_initialize_conv(); - - descriptor = conv_handles[from][to]; + ic = get_iconv_convenience(); + descriptor = get_conv_handle(ic, from, to); if (srclen == (size_t)-1) { if (from == CH_UTF16LE || from == CH_UTF16BE) { @@ -255,11 +146,11 @@ static size_t convert_string_internal(charset_t from, charset_t to, if (!conv_silent) { if (from == CH_UNIX) { DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", - charset_name(from), charset_name(to), + charset_name(ic, from), charset_name(ic, to), (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); } else { DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", - charset_name(from), charset_name(to), + charset_name(ic, from), charset_name(ic, to), (unsigned int)srclen, (unsigned int)destlen)); } } @@ -552,6 +443,7 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to, char *outbuf = NULL, *ob = NULL; smb_iconv_t descriptor; void **dest = (void **)dst; + struct smb_iconv_convenience *ic; *dest = NULL; @@ -576,8 +468,8 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to, } lazy_initialize_conv(); - - descriptor = conv_handles[from][to]; + ic = get_iconv_convenience(); + descriptor = get_conv_handle(ic, from, to); if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { if (!conv_silent) @@ -1784,173 +1676,3 @@ size_t align_string(const void *base_ptr, const char *p, int flags) return 0; } -/** - * Return the unicode codepoint for the next character in the input - * string in the given src_charset. - * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. - * - * Also return the number of bytes consumed (which tells the caller - * how many bytes to skip to get to the next src_charset-character). - * - * This is implemented (in the non-ascii-case) by first converting the - * next character in the input string to UTF16_LE and then calculating - * the unicode codepoint from that. - * - * Return INVALID_CODEPOINT if the next character cannot be converted. - */ - -codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, - size_t *bytes_consumed) -{ - /* It cannot occupy more than 4 bytes in UTF16 format */ - uint8_t buf[4]; - smb_iconv_t descriptor; - size_t ilen_orig; - size_t ilen; - size_t olen; - char *outbuf; - - /* fastpath if the character is ASCII */ - if ((str[0] & 0x80) == 0) { - *bytes_consumed = 1; - return (codepoint_t)str[0]; - } - - /* - * We assume that no multi-byte character can take more than - * 5 bytes. This is OK as we only support codepoints up to 1M (U+100000) - */ - - ilen_orig = strnlen(str, 5); - ilen = ilen_orig; - - lazy_initialize_conv(); - - descriptor = conv_handles[src_charset][CH_UTF16LE]; - if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { - *bytes_consumed = 1; - return INVALID_CODEPOINT; - } - - /* - * This looks a little strange, but it is needed to cope - * with codepoints above 64k (U+10000) which are encoded as per RFC2781. - */ - olen = 2; - outbuf = (char *)buf; - smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); - if (olen == 2) { - /* - * We failed to convert to a 2 byte character. - * See if we can convert to a 4 UTF16-LE byte char encoding. - */ - olen = 4; - outbuf = (char *)buf; - smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); - if (olen == 4) { - /* We didn't convert any bytes */ - *bytes_consumed = 1; - return INVALID_CODEPOINT; - } - olen = 4 - olen; - } else { - olen = 2 - olen; - } - - *bytes_consumed = ilen_orig - ilen; - - if (olen == 2) { - /* 2 byte, UTF16-LE encoded value. */ - return (codepoint_t)SVAL(buf, 0); - } - if (olen == 4) { - /* - * Decode a 4 byte UTF16-LE character manually. - * See RFC2871 for the encoding machanism. - */ - codepoint_t w1 = SVAL(buf,0) & ~0xD800; - codepoint_t w2 = SVAL(buf,2) & ~0xDC00; - - return (codepoint_t)0x10000 + - (w1 << 10) + w2; - } - - /* no other length is valid */ - return INVALID_CODEPOINT; -} - -/* - Return the unicode codepoint for the next multi-byte CH_UNIX character - in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value. - - Also return the number of bytes consumed (which tells the caller - how many bytes to skip to get to the next CH_UNIX character). - - Return INVALID_CODEPOINT if the next character cannot be converted. -*/ - -codepoint_t next_codepoint(const char *str, size_t *size) -{ - return next_codepoint_ext(str, CH_UNIX, size); -} - -/* - push a single codepoint into a CH_UNIX string the target string must - be able to hold the full character, which is guaranteed if it is at - least 5 bytes in size. The caller may pass less than 5 bytes if they - are sure the character will fit (for example, you can assume that - uppercase/lowercase of a character will not add more than 1 byte) - - return the number of bytes occupied by the CH_UNIX character, or - -1 on failure -*/ -_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c) -{ - smb_iconv_t descriptor; - uint8_t buf[4]; - size_t ilen, olen; - const char *inbuf; - - if (c < 128) { - *str = c; - return 1; - } - - lazy_initialize_conv(); - - descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; - if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { - return -1; - } - - if (c < 0x10000) { - ilen = 2; - olen = 5; - inbuf = (char *)buf; - SSVAL(buf, 0, c); - smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); - if (ilen != 0) { - return -1; - } - return 5 - olen; - } - - c -= 0x10000; - - buf[0] = (c>>10) & 0xFF; - buf[1] = (c>>18) | 0xd8; - buf[2] = c & 0xFF; - buf[3] = ((c>>8) & 0x3) | 0xdc; - - ilen = 4; - olen = 5; - inbuf = (char *)buf; - - smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); - if (ilen != 0) { - return -1; - } - return 5 - olen; -} - - diff --git a/source4/param/loadparm.c b/source4/param/loadparm.c index 3d87d6fb12..31157b2833 100644 --- a/source4/param/loadparm.c +++ b/source4/param/loadparm.c @@ -2776,11 +2776,7 @@ int lpcfg_maxprintjobs(struct loadparm_service *service, struct loadparm_service struct smb_iconv_convenience *lpcfg_iconv_convenience(struct loadparm_context *lp_ctx) { if (lp_ctx == NULL) { - static struct smb_iconv_convenience *fallback_ic = NULL; - if (fallback_ic == NULL) - fallback_ic = smb_iconv_convenience_reinit(talloc_autofree_context(), - "CP850", "UTF8", true, NULL); - return fallback_ic; + return get_iconv_convenience(); } return lp_ctx->iconv_convenience; } diff --git a/source4/param/util.c b/source4/param/util.c index fd12bb1eca..c6dca6076e 100644 --- a/source4/param/util.c +++ b/source4/param/util.c @@ -304,6 +304,7 @@ struct smb_iconv_convenience *smb_iconv_convenience_reinit_lp(TALLOC_CTX *mem_ct { return smb_iconv_convenience_reinit(mem_ctx, lpcfg_dos_charset(lp_ctx), lpcfg_unix_charset(lp_ctx), + lpcfg_display_charset(lp_ctx), lpcfg_parm_bool(lp_ctx, NULL, "iconv", "native", true), old_ic); } |