summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/util/charset/charcnv.c289
-rw-r--r--lib/util/charset/charset.h5
-rw-r--r--lib/util/charset/codepoints.c362
-rw-r--r--lib/util/charset/util_unistr.c26
-rw-r--r--lib/util/charset/wscript_build4
-rw-r--r--source3/lib/charcnv.c302
-rw-r--r--source4/param/loadparm.c6
-rw-r--r--source4/param/util.c1
8 files changed, 380 insertions, 615 deletions
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c
index 59b36e3062..dd2c725125 100644
--- a/lib/util/charset/charcnv.c
+++ b/lib/util/charset/charcnv.c
@@ -38,137 +38,6 @@
* @sa lib/iconv.c
*/
-struct smb_iconv_convenience {
- TALLOC_CTX *child_ctx;
- const char *unix_charset;
- const char *dos_charset;
- bool native_iconv;
- smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
-};
-
-
-/**
- * Return the name of a charset to give to iconv().
- **/
-static const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch)
-{
- switch (ch) {
- case CH_UTF16: return "UTF-16LE";
- case CH_UNIX: return ic->unix_charset;
- case CH_DOS: return ic->dos_charset;
- case CH_UTF8: return "UTF8";
- case CH_UTF16BE: return "UTF-16BE";
- case CH_UTF16MUNGED: return "UTF16_MUNGED";
- default:
- return "ASCII";
- }
-}
-
-/**
- re-initialize iconv conversion descriptors
-**/
-static int close_iconv_convenience(struct smb_iconv_convenience *data)
-{
- unsigned c1, c2;
- for (c1=0;c1<NUM_CHARSETS;c1++) {
- for (c2=0;c2<NUM_CHARSETS;c2++) {
- if (data->conv_handles[c1][c2] != NULL) {
- if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) {
- smb_iconv_close(data->conv_handles[c1][c2]);
- }
- data->conv_handles[c1][c2] = NULL;
- }
- }
- }
-
- return 0;
-}
-
-/*
- the old_ic is passed in here as the smb_iconv_convenience structure
- is used as a global pointer in some places (eg. python modules). We
- don't want to invalidate those global pointers, but we do want to
- update them with the right charset information when loadparm
- runs. To do that we need to re-use the structure pointer, but
- re-fill the elements in the structure with the updated values
- */
-_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
- const char *dos_charset,
- const char *unix_charset,
- bool native_iconv,
- struct smb_iconv_convenience *old_ic)
-{
- struct smb_iconv_convenience *ret;
-
- if (old_ic != NULL) {
- ret = old_ic;
- close_iconv_convenience(ret);
- talloc_free(ret->child_ctx);
- ZERO_STRUCTP(ret);
- } else {
- ret = talloc_zero(mem_ctx, struct smb_iconv_convenience);
- }
- if (ret == NULL) {
- return NULL;
- }
-
- /* we use a child context to allow us to free all ptrs without
- freeing the structure itself */
- ret->child_ctx = talloc_new(ret);
- if (ret->child_ctx == NULL) {
- return NULL;
- }
-
- talloc_set_destructor(ret, close_iconv_convenience);
-
- ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset);
- ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset);
- ret->native_iconv = native_iconv;
-
- return ret;
-}
-
-/*
- on-demand initialisation of conversion handles
-*/
-static smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
- charset_t from, charset_t to)
-{
- const char *n1, *n2;
- static bool initialised;
-
- if (initialised == false) {
- initialised = true;
- }
-
- if (ic->conv_handles[from][to]) {
- return ic->conv_handles[from][to];
- }
-
- n1 = charset_name(ic, from);
- n2 = charset_name(ic, to);
-
- ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1,
- ic->native_iconv);
-
- if (ic->conv_handles[from][to] == (smb_iconv_t)-1) {
- if ((from == CH_DOS || to == CH_DOS) &&
- strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) {
- DEBUG(0,("dos charset '%s' unavailable - using ASCII\n",
- charset_name(ic, CH_DOS)));
- ic->dos_charset = "ASCII";
-
- n1 = charset_name(ic, from);
- n2 = charset_name(ic, to);
-
- ic->conv_handles[from][to] =
- smb_iconv_open_ex(ic, n2, n1, ic->native_iconv);
- }
- }
-
- return ic->conv_handles[from][to];
-}
-
/**
* Convert string from one encoding to another, making error checking etc
*
@@ -363,161 +232,3 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx,
return true;
}
-
-/**
- * Return the unicode codepoint for the next character in the input
- * string in the given src_charset.
- * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
- *
- * Also return the number of bytes consumed (which tells the caller
- * how many bytes to skip to get to the next src_charset-character).
- *
- * This is implemented (in the non-ascii-case) by first converting the
- * next character in the input string to UTF16_LE and then calculating
- * the unicode codepoint from that.
- *
- * Return INVALID_CODEPOINT if the next character cannot be converted.
- */
-_PUBLIC_ codepoint_t next_codepoint_convenience_ext(
- struct smb_iconv_convenience *ic,
- const char *str, charset_t src_charset,
- size_t *bytes_consumed)
-{
- /* it cannot occupy more than 4 bytes in UTF16 format */
- uint8_t buf[4];
- smb_iconv_t descriptor;
- size_t ilen_orig;
- size_t ilen;
- size_t olen;
- char *outbuf;
-
- if ((str[0] & 0x80) == 0) {
- *bytes_consumed = 1;
- return (codepoint_t)str[0];
- }
-
- /*
- * we assume that no multi-byte character can take more than 5 bytes.
- * This is OK as we only support codepoints up to 1M (U+100000)
- */
- ilen_orig = strnlen(str, 5);
- ilen = ilen_orig;
-
- descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
- if (descriptor == (smb_iconv_t)-1) {
- *bytes_consumed = 1;
- return INVALID_CODEPOINT;
- }
-
- /*
- * this looks a little strange, but it is needed to cope with
- * codepoints above 64k (U+1000) which are encoded as per RFC2781.
- */
- olen = 2;
- outbuf = (char *)buf;
- smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
- if (olen == 2) {
- olen = 4;
- outbuf = (char *)buf;
- smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
- if (olen == 4) {
- /* we didn't convert any bytes */
- *bytes_consumed = 1;
- return INVALID_CODEPOINT;
- }
- olen = 4 - olen;
- } else {
- olen = 2 - olen;
- }
-
- *bytes_consumed = ilen_orig - ilen;
-
- if (olen == 2) {
- return (codepoint_t)SVAL(buf, 0);
- }
- if (olen == 4) {
- /* decode a 4 byte UTF16 character manually */
- return (codepoint_t)0x10000 +
- (buf[2] | ((buf[3] & 0x3)<<8) |
- (buf[0]<<10) | ((buf[1] & 0x3)<<18));
- }
-
- /* no other length is valid */
- return INVALID_CODEPOINT;
-}
-
-/*
- return the unicode codepoint for the next multi-byte CH_UNIX character
- in the string
-
- also return the number of bytes consumed (which tells the caller
- how many bytes to skip to get to the next CH_UNIX character)
-
- return INVALID_CODEPOINT if the next character cannot be converted
-*/
-_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
- const char *str, size_t *size)
-{
- return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
-}
-
-/*
- push a single codepoint into a CH_UNIX string the target string must
- be able to hold the full character, which is guaranteed if it is at
- least 5 bytes in size. The caller may pass less than 5 bytes if they
- are sure the character will fit (for example, you can assume that
- uppercase/lowercase of a character will not add more than 1 byte)
-
- return the number of bytes occupied by the CH_UNIX character, or
- -1 on failure
-*/
-_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
- char *str, codepoint_t c)
-{
- smb_iconv_t descriptor;
- uint8_t buf[4];
- size_t ilen, olen;
- const char *inbuf;
-
- if (c < 128) {
- *str = c;
- return 1;
- }
-
- descriptor = get_conv_handle(ic,
- CH_UTF16, CH_UNIX);
- if (descriptor == (smb_iconv_t)-1) {
- return -1;
- }
-
- if (c < 0x10000) {
- ilen = 2;
- olen = 5;
- inbuf = (char *)buf;
- SSVAL(buf, 0, c);
- smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
- if (ilen != 0) {
- return -1;
- }
- return 5 - olen;
- }
-
- c -= 0x10000;
-
- buf[0] = (c>>10) & 0xFF;
- buf[1] = (c>>18) | 0xd8;
- buf[2] = c & 0xFF;
- buf[3] = ((c>>8) & 0x3) | 0xdc;
-
- ilen = 4;
- olen = 5;
- inbuf = (char *)buf;
-
- smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
- if (ilen != 0) {
- return -1;
- }
- return 5 - olen;
-}
-
-
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 28d762578b..b4a5a55461 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -170,6 +170,10 @@ ssize_t iconv_talloc(TALLOC_CTX *mem_ctx,
void *dest);
extern struct smb_iconv_convenience *global_iconv_convenience;
+struct smb_iconv_convenience *get_iconv_convenience(void);
+smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
+ charset_t from, charset_t to);
+const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch);
codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
size_t *size);
@@ -195,6 +199,7 @@ int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
const char *dos_charset,
const char *unix_charset,
+ const char *display_charset,
bool native_iconv,
struct smb_iconv_convenience *old_ic);
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
index 53febb8b5e..01183e4ad4 100644
--- a/lib/util/charset/codepoints.c
+++ b/lib/util/charset/codepoints.c
@@ -1,8 +1,10 @@
/*
Unix SMB/CIFS implementation.
- Samba utility functions
- Copyright (C) Andrew Tridgell 1992-2001
+ Character set conversion Extensions
+ Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
+ Copyright (C) Andrew Tridgell 2001
Copyright (C) Simo Sorce 2001
+ Copyright (C) Jelmer Vernooij 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,12 +18,17 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+*/
#include "includes.h"
+#include "lib/util/charset/charset.h"
#include "system/locale.h"
#include "dynconfig.h"
+#ifdef strcasecmp
+#undef strcasecmp
+#endif
+
/**
* @file
* @brief Unicode string manipulation
@@ -126,3 +133,352 @@ _PUBLIC_ int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
}
+struct smb_iconv_convenience {
+ TALLOC_CTX *child_ctx;
+ const char *unix_charset;
+ const char *dos_charset;
+ const char *display_charset;
+ bool native_iconv;
+ smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
+};
+
+struct smb_iconv_convenience *global_iconv_convenience = NULL;
+
+struct smb_iconv_convenience *get_iconv_convenience(void)
+{
+ if (global_iconv_convenience == NULL)
+ global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
+ "ASCII", "UTF-8", "ASCII", true, NULL);
+ return global_iconv_convenience;
+}
+
+/**
+ * Return the name of a charset to give to iconv().
+ **/
+const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch)
+{
+ switch (ch) {
+ case CH_UTF16: return "UTF-16LE";
+ case CH_UNIX: return ic->unix_charset;
+ case CH_DOS: return ic->dos_charset;
+ case CH_DISPLAY: return ic->display_charset;
+ case CH_UTF8: return "UTF8";
+ case CH_UTF16BE: return "UTF-16BE";
+ case CH_UTF16MUNGED: return "UTF16_MUNGED";
+ default:
+ return "ASCII";
+ }
+}
+
+/**
+ re-initialize iconv conversion descriptors
+**/
+static int close_iconv_convenience(struct smb_iconv_convenience *data)
+{
+ unsigned c1, c2;
+ for (c1=0;c1<NUM_CHARSETS;c1++) {
+ for (c2=0;c2<NUM_CHARSETS;c2++) {
+ if (data->conv_handles[c1][c2] != NULL) {
+ if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) {
+ smb_iconv_close(data->conv_handles[c1][c2]);
+ }
+ data->conv_handles[c1][c2] = NULL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static const char *map_locale(const char *charset)
+{
+ if (strcmp(charset, "LOCALE") != 0) {
+ return charset;
+ }
+#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
+ {
+ const char *ln;
+ smb_iconv_t handle;
+
+ ln = nl_langinfo(CODESET);
+ if (ln == NULL) {
+ DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n"));
+ return "ASCII";
+ }
+ /* Check whether the charset name is supported
+ by iconv */
+ handle = smb_iconv_open(ln, "UCS-2LE");
+ if (handle == (smb_iconv_t) -1) {
+ DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
+ return "ASCII";
+ } else {
+ DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
+ smb_iconv_close(handle);
+ }
+ return ln;
+ }
+#endif
+ return "ASCII";
+}
+
+/*
+ the old_ic is passed in here as the smb_iconv_convenience structure
+ is used as a global pointer in some places (eg. python modules). We
+ don't want to invalidate those global pointers, but we do want to
+ update them with the right charset information when loadparm
+ runs. To do that we need to re-use the structure pointer, but
+ re-fill the elements in the structure with the updated values
+ */
+_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
+ const char *dos_charset,
+ const char *unix_charset,
+ const char *display_charset,
+ bool native_iconv,
+ struct smb_iconv_convenience *old_ic)
+{
+ struct smb_iconv_convenience *ret;
+
+ display_charset = map_locale(display_charset);
+
+ if (old_ic != NULL) {
+ ret = old_ic;
+ close_iconv_convenience(ret);
+ talloc_free(ret->child_ctx);
+ ZERO_STRUCTP(ret);
+ } else {
+ ret = talloc_zero(mem_ctx, struct smb_iconv_convenience);
+ }
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ /* we use a child context to allow us to free all ptrs without
+ freeing the structure itself */
+ ret->child_ctx = talloc_new(ret);
+ if (ret->child_ctx == NULL) {
+ return NULL;
+ }
+
+ talloc_set_destructor(ret, close_iconv_convenience);
+
+ ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset);
+ ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset);
+ ret->display_charset = talloc_strdup(ret->child_ctx, display_charset);
+ ret->native_iconv = native_iconv;
+
+ return ret;
+}
+
+/*
+ on-demand initialisation of conversion handles
+*/
+smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
+ charset_t from, charset_t to)
+{
+ const char *n1, *n2;
+ static bool initialised;
+
+ if (initialised == false) {
+ initialised = true;
+ }
+
+ if (ic->conv_handles[from][to]) {
+ return ic->conv_handles[from][to];
+ }
+
+ n1 = charset_name(ic, from);
+ n2 = charset_name(ic, to);
+
+ ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1,
+ ic->native_iconv);
+
+ if (ic->conv_handles[from][to] == (smb_iconv_t)-1) {
+ if ((from == CH_DOS || to == CH_DOS) &&
+ strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) {
+ DEBUG(0,("dos charset '%s' unavailable - using ASCII\n",
+ charset_name(ic, CH_DOS)));
+ ic->dos_charset = "ASCII";
+
+ n1 = charset_name(ic, from);
+ n2 = charset_name(ic, to);
+
+ ic->conv_handles[from][to] =
+ smb_iconv_open_ex(ic, n2, n1, ic->native_iconv);
+ }
+ }
+
+ return ic->conv_handles[from][to];
+}
+
+/**
+ * Return the unicode codepoint for the next character in the input
+ * string in the given src_charset.
+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+ *
+ * Also return the number of bytes consumed (which tells the caller
+ * how many bytes to skip to get to the next src_charset-character).
+ *
+ * This is implemented (in the non-ascii-case) by first converting the
+ * next character in the input string to UTF16_LE and then calculating
+ * the unicode codepoint from that.
+ *
+ * Return INVALID_CODEPOINT if the next character cannot be converted.
+ */
+_PUBLIC_ codepoint_t next_codepoint_convenience_ext(
+ struct smb_iconv_convenience *ic,
+ const char *str, charset_t src_charset,
+ size_t *bytes_consumed)
+{
+ /* it cannot occupy more than 4 bytes in UTF16 format */
+ uint8_t buf[4];
+ smb_iconv_t descriptor;
+ size_t ilen_orig;
+ size_t ilen;
+ size_t olen;
+ char *outbuf;
+
+ if ((str[0] & 0x80) == 0) {
+ *bytes_consumed = 1;
+ return (codepoint_t)str[0];
+ }
+
+ /*
+ * we assume that no multi-byte character can take more than 5 bytes.
+ * This is OK as we only support codepoints up to 1M (U+100000)
+ */
+ ilen_orig = strnlen(str, 5);
+ ilen = ilen_orig;
+
+ descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
+ if (descriptor == (smb_iconv_t)-1) {
+ *bytes_consumed = 1;
+ return INVALID_CODEPOINT;
+ }
+
+ /*
+ * this looks a little strange, but it is needed to cope with
+ * codepoints above 64k (U+1000) which are encoded as per RFC2781.
+ */
+ olen = 2;
+ outbuf = (char *)buf;
+ smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
+ if (olen == 2) {
+ olen = 4;
+ outbuf = (char *)buf;
+ smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
+ if (olen == 4) {
+ /* we didn't convert any bytes */
+ *bytes_consumed = 1;
+ return INVALID_CODEPOINT;
+ }
+ olen = 4 - olen;
+ } else {
+ olen = 2 - olen;
+ }
+
+ *bytes_consumed = ilen_orig - ilen;
+
+ if (olen == 2) {
+ return (codepoint_t)SVAL(buf, 0);
+ }
+ if (olen == 4) {
+ /* decode a 4 byte UTF16 character manually */
+ return (codepoint_t)0x10000 +
+ (buf[2] | ((buf[3] & 0x3)<<8) |
+ (buf[0]<<10) | ((buf[1] & 0x3)<<18));
+ }
+
+ /* no other length is valid */
+ return INVALID_CODEPOINT;
+}
+
+/*
+ return the unicode codepoint for the next multi-byte CH_UNIX character
+ in the string
+
+ also return the number of bytes consumed (which tells the caller
+ how many bytes to skip to get to the next CH_UNIX character)
+
+ return INVALID_CODEPOINT if the next character cannot be converted
+*/
+_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
+ const char *str, size_t *size)
+{
+ return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
+}
+
+/*
+ push a single codepoint into a CH_UNIX string the target string must
+ be able to hold the full character, which is guaranteed if it is at
+ least 5 bytes in size. The caller may pass less than 5 bytes if they
+ are sure the character will fit (for example, you can assume that
+ uppercase/lowercase of a character will not add more than 1 byte)
+
+ return the number of bytes occupied by the CH_UNIX character, or
+ -1 on failure
+*/
+_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
+ char *str, codepoint_t c)
+{
+ smb_iconv_t descriptor;
+ uint8_t buf[4];
+ size_t ilen, olen;
+ const char *inbuf;
+
+ if (c < 128) {
+ *str = c;
+ return 1;
+ }
+
+ descriptor = get_conv_handle(ic,
+ CH_UTF16, CH_UNIX);
+ if (descriptor == (smb_iconv_t)-1) {
+ return -1;
+ }
+
+ if (c < 0x10000) {
+ ilen = 2;
+ olen = 5;
+ inbuf = (char *)buf;
+ SSVAL(buf, 0, c);
+ smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
+ if (ilen != 0) {
+ return -1;
+ }
+ return 5 - olen;
+ }
+
+ c -= 0x10000;
+
+ buf[0] = (c>>10) & 0xFF;
+ buf[1] = (c>>18) | 0xd8;
+ buf[2] = c & 0xFF;
+ buf[3] = ((c>>8) & 0x3) | 0xdc;
+
+ ilen = 4;
+ olen = 5;
+ inbuf = (char *)buf;
+
+ smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
+ if (ilen != 0) {
+ return -1;
+ }
+ return 5 - olen;
+}
+
+_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
+ size_t *size)
+{
+ return next_codepoint_convenience_ext(get_iconv_convenience(), str,
+ src_charset, size);
+}
+
+_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
+{
+ return next_codepoint_convenience(get_iconv_convenience(), str, size);
+}
+
+_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
+{
+ return push_codepoint_convenience(get_iconv_convenience(), str, c);
+}
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index 410547400d..b6bfb29e7d 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -21,16 +21,6 @@
#include "includes.h"
#include "system/locale.h"
-struct smb_iconv_convenience *global_iconv_convenience = NULL;
-
-static inline struct smb_iconv_convenience *get_iconv_convenience(void)
-{
- if (global_iconv_convenience == NULL)
- global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
- "ASCII", "UTF-8", true, NULL);
- return global_iconv_convenience;
-}
-
/**
Case insensitive string compararison
**/
@@ -1043,19 +1033,3 @@ _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
allow_badcharcnv);
}
-_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
- size_t *size)
-{
- return next_codepoint_convenience_ext(get_iconv_convenience(), str,
- src_charset, size);
-}
-
-_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
-{
- return next_codepoint_convenience(get_iconv_convenience(), str, size);
-}
-
-_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
-{
- return push_codepoint_convenience(get_iconv_convenience(), str, c);
-}
diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build
index 18479a9978..7dcd189036 100644
--- a/lib/util/charset/wscript_build
+++ b/lib/util/charset/wscript_build
@@ -4,7 +4,7 @@
if bld.env._SAMBA_BUILD_ == 4:
bld.SAMBA_SUBSYSTEM('CHARSET',
source='charcnv.c util_unistr.c',
- public_deps='ICONV_WRAPPER CODEPOINTS',
+ public_deps='CODEPOINTS',
public_headers='charset.h',
)
@@ -14,5 +14,5 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER',
bld.SAMBA_SUBSYSTEM('CODEPOINTS',
source='codepoints.c',
- deps='DYNCONFIG'
+ deps='DYNCONFIG ICONV_WRAPPER'
)
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index 4c98f8f339..2723599599 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -45,68 +45,9 @@ char lp_failed_convert_char(void)
*/
-static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
static bool conv_silent; /* Should we do a debug if the conversion fails ? */
static bool initialized;
-/**
- * Return the name of a charset to give to iconv().
- **/
-static const char *charset_name(charset_t ch)
-{
- const char *ret;
-
- switch (ch) {
- case CH_UTF16LE:
- ret = "UTF-16LE";
- break;
- case CH_UTF16BE:
- ret = "UTF-16BE";
- break;
- case CH_UNIX:
- ret = lp_unix_charset();
- break;
- case CH_DOS:
- ret = lp_dos_charset();
- break;
- case CH_DISPLAY:
- ret = lp_display_charset();
- break;
- case CH_UTF8:
- ret = "UTF8";
- break;
- default:
- ret = NULL;
- }
-
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
- if (ret && !strcmp(ret, "LOCALE")) {
- const char *ln = NULL;
-
-#ifdef HAVE_SETLOCALE
- setlocale(LC_ALL, "");
-#endif
- ln = nl_langinfo(CODESET);
- if (ln) {
- /* Check whether the charset name is supported
- by iconv */
- smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
- if (handle == (smb_iconv_t) -1) {
- DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
- ln = NULL;
- } else {
- DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
- smb_iconv_close(handle);
- }
- }
- ret = ln;
- }
-#endif
-
- if (!ret || !*ret) ret = "ASCII";
- return ret;
-}
-
void lazy_initialize_conv(void)
{
if (!initialized) {
@@ -121,16 +62,7 @@ void lazy_initialize_conv(void)
**/
void gfree_charcnv(void)
{
- int c1, c2;
-
- for (c1=0;c1<NUM_CHARSETS;c1++) {
- for (c2=0;c2<NUM_CHARSETS;c2++) {
- if ( conv_handles[c1][c2] ) {
- smb_iconv_close( conv_handles[c1][c2] );
- conv_handles[c1][c2] = 0;
- }
- }
- }
+ TALLOC_FREE(global_iconv_convenience);
initialized = false;
}
@@ -143,51 +75,9 @@ void gfree_charcnv(void)
**/
void init_iconv(void)
{
- int c1, c2;
- bool did_reload = False;
-
- /* so that charset_name() works we need to get the UNIX<->UCS2 going
- first */
- if (!conv_handles[CH_UNIX][CH_UTF16LE])
- conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
-
- if (!conv_handles[CH_UTF16LE][CH_UNIX])
- conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
-
- for (c1=0;c1<NUM_CHARSETS;c1++) {
- for (c2=0;c2<NUM_CHARSETS;c2++) {
- const char *n1 = charset_name((charset_t)c1);
- const char *n2 = charset_name((charset_t)c2);
- if (conv_handles[c1][c2] &&
- strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
- strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
- continue;
-
- did_reload = True;
-
- if (conv_handles[c1][c2])
- smb_iconv_close(conv_handles[c1][c2]);
-
- conv_handles[c1][c2] = smb_iconv_open(n2,n1);
- if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
- DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
- charset_name((charset_t)c1), charset_name((charset_t)c2)));
- if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
- n1 = "ASCII";
- }
- if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
- n2 = "ASCII";
- }
- DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
- n1, n2 ));
- conv_handles[c1][c2] = smb_iconv_open(n2,n1);
- if (!conv_handles[c1][c2]) {
- DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
- smb_panic("init_iconv: conv_handle initialization failed");
- }
- }
- }
- }
+ global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(),
+ lp_unix_charset(), lp_display_charset(),
+ true, global_iconv_convenience);
}
/**
@@ -214,10 +104,11 @@ static size_t convert_string_internal(charset_t from, charset_t to,
const char* inbuf = (const char*)src;
char* outbuf = (char*)dest;
smb_iconv_t descriptor;
+ struct smb_iconv_convenience *ic;
lazy_initialize_conv();
-
- descriptor = conv_handles[from][to];
+ ic = get_iconv_convenience();
+ descriptor = get_conv_handle(ic, from, to);
if (srclen == (size_t)-1) {
if (from == CH_UTF16LE || from == CH_UTF16BE) {
@@ -255,11 +146,11 @@ static size_t convert_string_internal(charset_t from, charset_t to,
if (!conv_silent) {
if (from == CH_UNIX) {
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
- charset_name(from), charset_name(to),
+ charset_name(ic, from), charset_name(ic, to),
(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
} else {
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
- charset_name(from), charset_name(to),
+ charset_name(ic, from), charset_name(ic, to),
(unsigned int)srclen, (unsigned int)destlen));
}
}
@@ -552,6 +443,7 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
char *outbuf = NULL, *ob = NULL;
smb_iconv_t descriptor;
void **dest = (void **)dst;
+ struct smb_iconv_convenience *ic;
*dest = NULL;
@@ -576,8 +468,8 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
}
lazy_initialize_conv();
-
- descriptor = conv_handles[from][to];
+ ic = get_iconv_convenience();
+ descriptor = get_conv_handle(ic, from, to);
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
if (!conv_silent)
@@ -1784,173 +1676,3 @@ size_t align_string(const void *base_ptr, const char *p, int flags)
return 0;
}
-/**
- * Return the unicode codepoint for the next character in the input
- * string in the given src_charset.
- * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
- *
- * Also return the number of bytes consumed (which tells the caller
- * how many bytes to skip to get to the next src_charset-character).
- *
- * This is implemented (in the non-ascii-case) by first converting the
- * next character in the input string to UTF16_LE and then calculating
- * the unicode codepoint from that.
- *
- * Return INVALID_CODEPOINT if the next character cannot be converted.
- */
-
-codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
- size_t *bytes_consumed)
-{
- /* It cannot occupy more than 4 bytes in UTF16 format */
- uint8_t buf[4];
- smb_iconv_t descriptor;
- size_t ilen_orig;
- size_t ilen;
- size_t olen;
- char *outbuf;
-
- /* fastpath if the character is ASCII */
- if ((str[0] & 0x80) == 0) {
- *bytes_consumed = 1;
- return (codepoint_t)str[0];
- }
-
- /*
- * We assume that no multi-byte character can take more than
- * 5 bytes. This is OK as we only support codepoints up to 1M (U+100000)
- */
-
- ilen_orig = strnlen(str, 5);
- ilen = ilen_orig;
-
- lazy_initialize_conv();
-
- descriptor = conv_handles[src_charset][CH_UTF16LE];
- if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
- *bytes_consumed = 1;
- return INVALID_CODEPOINT;
- }
-
- /*
- * This looks a little strange, but it is needed to cope
- * with codepoints above 64k (U+10000) which are encoded as per RFC2781.
- */
- olen = 2;
- outbuf = (char *)buf;
- smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
- if (olen == 2) {
- /*
- * We failed to convert to a 2 byte character.
- * See if we can convert to a 4 UTF16-LE byte char encoding.
- */
- olen = 4;
- outbuf = (char *)buf;
- smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
- if (olen == 4) {
- /* We didn't convert any bytes */
- *bytes_consumed = 1;
- return INVALID_CODEPOINT;
- }
- olen = 4 - olen;
- } else {
- olen = 2 - olen;
- }
-
- *bytes_consumed = ilen_orig - ilen;
-
- if (olen == 2) {
- /* 2 byte, UTF16-LE encoded value. */
- return (codepoint_t)SVAL(buf, 0);
- }
- if (olen == 4) {
- /*
- * Decode a 4 byte UTF16-LE character manually.
- * See RFC2871 for the encoding machanism.
- */
- codepoint_t w1 = SVAL(buf,0) & ~0xD800;
- codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
-
- return (codepoint_t)0x10000 +
- (w1 << 10) + w2;
- }
-
- /* no other length is valid */
- return INVALID_CODEPOINT;
-}
-
-/*
- Return the unicode codepoint for the next multi-byte CH_UNIX character
- in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
-
- Also return the number of bytes consumed (which tells the caller
- how many bytes to skip to get to the next CH_UNIX character).
-
- Return INVALID_CODEPOINT if the next character cannot be converted.
-*/
-
-codepoint_t next_codepoint(const char *str, size_t *size)
-{
- return next_codepoint_ext(str, CH_UNIX, size);
-}
-
-/*
- push a single codepoint into a CH_UNIX string the target string must
- be able to hold the full character, which is guaranteed if it is at
- least 5 bytes in size. The caller may pass less than 5 bytes if they
- are sure the character will fit (for example, you can assume that
- uppercase/lowercase of a character will not add more than 1 byte)
-
- return the number of bytes occupied by the CH_UNIX character, or
- -1 on failure
-*/
-_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
-{
- smb_iconv_t descriptor;
- uint8_t buf[4];
- size_t ilen, olen;
- const char *inbuf;
-
- if (c < 128) {
- *str = c;
- return 1;
- }
-
- lazy_initialize_conv();
-
- descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
- if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
- return -1;
- }
-
- if (c < 0x10000) {
- ilen = 2;
- olen = 5;
- inbuf = (char *)buf;
- SSVAL(buf, 0, c);
- smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
- if (ilen != 0) {
- return -1;
- }
- return 5 - olen;
- }
-
- c -= 0x10000;
-
- buf[0] = (c>>10) & 0xFF;
- buf[1] = (c>>18) | 0xd8;
- buf[2] = c & 0xFF;
- buf[3] = ((c>>8) & 0x3) | 0xdc;
-
- ilen = 4;
- olen = 5;
- inbuf = (char *)buf;
-
- smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
- if (ilen != 0) {
- return -1;
- }
- return 5 - olen;
-}
-
-
diff --git a/source4/param/loadparm.c b/source4/param/loadparm.c
index 3d87d6fb12..31157b2833 100644
--- a/source4/param/loadparm.c
+++ b/source4/param/loadparm.c
@@ -2776,11 +2776,7 @@ int lpcfg_maxprintjobs(struct loadparm_service *service, struct loadparm_service
struct smb_iconv_convenience *lpcfg_iconv_convenience(struct loadparm_context *lp_ctx)
{
if (lp_ctx == NULL) {
- static struct smb_iconv_convenience *fallback_ic = NULL;
- if (fallback_ic == NULL)
- fallback_ic = smb_iconv_convenience_reinit(talloc_autofree_context(),
- "CP850", "UTF8", true, NULL);
- return fallback_ic;
+ return get_iconv_convenience();
}
return lp_ctx->iconv_convenience;
}
diff --git a/source4/param/util.c b/source4/param/util.c
index fd12bb1eca..c6dca6076e 100644
--- a/source4/param/util.c
+++ b/source4/param/util.c
@@ -304,6 +304,7 @@ struct smb_iconv_convenience *smb_iconv_convenience_reinit_lp(TALLOC_CTX *mem_ct
{
return smb_iconv_convenience_reinit(mem_ctx, lpcfg_dos_charset(lp_ctx),
lpcfg_unix_charset(lp_ctx),
+ lpcfg_display_charset(lp_ctx),
lpcfg_parm_bool(lp_ctx, NULL, "iconv", "native", true),
old_ic);
}