From 2a3a86a86f3d1ab97adda563beda7ee35f6a2414 Mon Sep 17 00:00:00 2001
From: Andrew Bartlett <abartlet@samba.org>
Date: Fri, 18 Feb 2011 13:47:28 +1100
Subject: lib/util/charcnv Move iconv handle setup in common

We now use the struct smb_iconv_convenience at the core of all our
iconv code, and use global_iconv_convenience for the callers that
don't specify one.

Andrew Bartlett
---
 source3/lib/charcnv.c | 302 ++------------------------------------------------
 1 file changed, 12 insertions(+), 290 deletions(-)

(limited to 'source3/lib/charcnv.c')

diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index 4c98f8f339..2723599599 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -45,68 +45,9 @@ char lp_failed_convert_char(void)
  */
 
 
-static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
 static bool initialized;
 
-/**
- * Return the name of a charset to give to iconv().
- **/
-static const char *charset_name(charset_t ch)
-{
-	const char *ret;
-
-	switch (ch) {
-	case CH_UTF16LE:
-		ret = "UTF-16LE";
-		break;
-	case CH_UTF16BE:
-		ret = "UTF-16BE";
-		break;
-	case CH_UNIX:
-		ret = lp_unix_charset();
-		break;
-	case CH_DOS:
-		ret = lp_dos_charset();
-		break;
-	case CH_DISPLAY:
-		ret = lp_display_charset();
-		break;
-	case CH_UTF8:
-		ret = "UTF8";
-		break;
-	default:
-		ret = NULL;
-	}
-
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
-	if (ret && !strcmp(ret, "LOCALE")) {
-		const char *ln = NULL;
-
-#ifdef HAVE_SETLOCALE
-		setlocale(LC_ALL, "");
-#endif
-		ln = nl_langinfo(CODESET);
-		if (ln) {
-			/* Check whether the charset name is supported
-			   by iconv */
-			smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
-			if (handle == (smb_iconv_t) -1) {
-				DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
-				ln = NULL;
-			} else {
-				DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
-				smb_iconv_close(handle);
-			}
-		}
-		ret = ln;
-	}
-#endif
-
-	if (!ret || !*ret) ret = "ASCII";
-	return ret;
-}
-
 void lazy_initialize_conv(void)
 {
 	if (!initialized) {
@@ -121,16 +62,7 @@ void lazy_initialize_conv(void)
  **/
 void gfree_charcnv(void)
 {
-	int c1, c2;
-
-	for (c1=0;c1<NUM_CHARSETS;c1++) {
-		for (c2=0;c2<NUM_CHARSETS;c2++) {
-			if ( conv_handles[c1][c2] ) {
-				smb_iconv_close( conv_handles[c1][c2] );
-				conv_handles[c1][c2] = 0;
-			}
-		}
-	}
+	TALLOC_FREE(global_iconv_convenience);
 	initialized = false;
 }
 
@@ -143,51 +75,9 @@ void gfree_charcnv(void)
  **/
 void init_iconv(void)
 {
-	int c1, c2;
-	bool did_reload = False;
-
-	/* so that charset_name() works we need to get the UNIX<->UCS2 going
-	   first */
-	if (!conv_handles[CH_UNIX][CH_UTF16LE])
-		conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
-
-	if (!conv_handles[CH_UTF16LE][CH_UNIX])
-		conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
-
-	for (c1=0;c1<NUM_CHARSETS;c1++) {
-		for (c2=0;c2<NUM_CHARSETS;c2++) {
-			const char *n1 = charset_name((charset_t)c1);
-			const char *n2 = charset_name((charset_t)c2);
-			if (conv_handles[c1][c2] &&
-			    strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
-			    strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
-				continue;
-
-			did_reload = True;
-
-			if (conv_handles[c1][c2])
-				smb_iconv_close(conv_handles[c1][c2]);
-
-			conv_handles[c1][c2] = smb_iconv_open(n2,n1);
-			if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
-				DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
-					 charset_name((charset_t)c1), charset_name((charset_t)c2)));
-				if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
-					n1 = "ASCII";
-				}
-				if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
-					n2 = "ASCII";
-				}
-				DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
-					n1, n2 ));
-				conv_handles[c1][c2] = smb_iconv_open(n2,n1);
-				if (!conv_handles[c1][c2]) {
-					DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
-					smb_panic("init_iconv: conv_handle initialization failed");
-				}
-			}
-		}
-	}
+	global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(),
+								lp_unix_charset(), lp_display_charset(),
+								true, global_iconv_convenience);
 }
 
 /**
@@ -214,10 +104,11 @@ static size_t convert_string_internal(charset_t from, charset_t to,
 	const char* inbuf = (const char*)src;
 	char* outbuf = (char*)dest;
 	smb_iconv_t descriptor;
+	struct smb_iconv_convenience *ic;
 
 	lazy_initialize_conv();
-
-	descriptor = conv_handles[from][to];
+	ic = get_iconv_convenience();
+	descriptor = get_conv_handle(ic, from, to);
 
 	if (srclen == (size_t)-1) {
 		if (from == CH_UTF16LE || from == CH_UTF16BE) {
@@ -255,11 +146,11 @@ static size_t convert_string_internal(charset_t from, charset_t to,
 				if (!conv_silent) {
 					if (from == CH_UNIX) {
 						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
-							charset_name(from), charset_name(to),
+							 charset_name(ic, from), charset_name(ic, to),
 							(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
 					} else {
 						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
-							charset_name(from), charset_name(to),
+							 charset_name(ic, from), charset_name(ic, to),
 							(unsigned int)srclen, (unsigned int)destlen));
 					}
 				}
@@ -552,6 +443,7 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 	char *outbuf = NULL, *ob = NULL;
 	smb_iconv_t descriptor;
 	void **dest = (void **)dst;
+	struct smb_iconv_convenience *ic;
 
 	*dest = NULL;
 
@@ -576,8 +468,8 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
 	}
 
 	lazy_initialize_conv();
-
-	descriptor = conv_handles[from][to];
+	ic = get_iconv_convenience();
+	descriptor = get_conv_handle(ic, from, to);
 
 	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
 		if (!conv_silent)
@@ -1784,173 +1676,3 @@ size_t align_string(const void *base_ptr, const char *p, int flags)
 	return 0;
 }
 
-/**
- * Return the unicode codepoint for the next character in the input
- * string in the given src_charset.
- * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
- *
- * Also return the number of bytes consumed (which tells the caller
- * how many bytes to skip to get to the next src_charset-character).
- *
- * This is implemented (in the non-ascii-case) by first converting the
- * next character in the input string to UTF16_LE and then calculating
- * the unicode codepoint from that.
- *
- * Return INVALID_CODEPOINT if the next character cannot be converted.
- */
-
-codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
-			       size_t *bytes_consumed)
-{
-	/* It cannot occupy more than 4 bytes in UTF16 format */
-	uint8_t buf[4];
-	smb_iconv_t descriptor;
-	size_t ilen_orig;
-	size_t ilen;
-	size_t olen;
-	char *outbuf;
-
-	/* fastpath if the character is ASCII */
-	if ((str[0] & 0x80) == 0) {
-		*bytes_consumed = 1;
-		return (codepoint_t)str[0];
-	}
-
-	/*
-	 * We assume that no multi-byte character can take more than
-	 * 5 bytes. This is OK as we only support codepoints up to 1M (U+100000)
-	 */
-
-	ilen_orig = strnlen(str, 5);
-	ilen = ilen_orig;
-
-	lazy_initialize_conv();
-
-	descriptor = conv_handles[src_charset][CH_UTF16LE];
-	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
-		*bytes_consumed = 1;
-		return INVALID_CODEPOINT;
-	}
-
-	/*
-	 * This looks a little strange, but it is needed to cope
-	 * with codepoints above 64k (U+10000) which are encoded as per RFC2781.
-	 */
-	olen = 2;
-	outbuf = (char *)buf;
-	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
-	if (olen == 2) {
-		/*
-		 * We failed to convert to a 2 byte character.
-		 * See if we can convert to a 4 UTF16-LE byte char encoding.
-		 */
-		olen = 4;
-		outbuf = (char *)buf;
-		smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
-		if (olen == 4) {
-			/* We didn't convert any bytes */
-			*bytes_consumed = 1;
-			return INVALID_CODEPOINT;
-		}
-		olen = 4 - olen;
-	} else {
-		olen = 2 - olen;
-	}
-
-	*bytes_consumed = ilen_orig - ilen;
-
-	if (olen == 2) {
-		/* 2 byte, UTF16-LE encoded value. */
-		return (codepoint_t)SVAL(buf, 0);
-	}
-	if (olen == 4) {
-		/*
-		 * Decode a 4 byte UTF16-LE character manually.
-		 * See RFC2871 for the encoding machanism.
-		 */
-		codepoint_t w1 = SVAL(buf,0) & ~0xD800;
-		codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
-
-		return (codepoint_t)0x10000 +
-				(w1 << 10) + w2;
-	}
-
-	/* no other length is valid */
-	return INVALID_CODEPOINT;
-}
-
-/*
-  Return the unicode codepoint for the next multi-byte CH_UNIX character
-  in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
-
-  Also return the number of bytes consumed (which tells the caller
-  how many bytes to skip to get to the next CH_UNIX character).
-
-  Return INVALID_CODEPOINT if the next character cannot be converted.
-*/
-
-codepoint_t next_codepoint(const char *str, size_t *size)
-{
-	return next_codepoint_ext(str, CH_UNIX, size);
-}
-
-/*
-  push a single codepoint into a CH_UNIX string the target string must
-  be able to hold the full character, which is guaranteed if it is at
-  least 5 bytes in size. The caller may pass less than 5 bytes if they
-  are sure the character will fit (for example, you can assume that
-  uppercase/lowercase of a character will not add more than 1 byte)
-
-  return the number of bytes occupied by the CH_UNIX character, or
-  -1 on failure
-*/
-_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
-{
-	smb_iconv_t descriptor;
-	uint8_t buf[4];
-	size_t ilen, olen;
-	const char *inbuf;
-	
-	if (c < 128) {
-		*str = c;
-		return 1;
-	}
-
-	lazy_initialize_conv();
-
-	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
-	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
-		return -1;
-	}
-
-	if (c < 0x10000) {
-		ilen = 2;
-		olen = 5;
-		inbuf = (char *)buf;
-		SSVAL(buf, 0, c);
-		smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
-		if (ilen != 0) {
-			return -1;
-		}
-		return 5 - olen;
-	}
-
-	c -= 0x10000;
-
-	buf[0] = (c>>10) & 0xFF;
-	buf[1] = (c>>18) | 0xd8;
-	buf[2] = c & 0xFF;
-	buf[3] = ((c>>8) & 0x3) | 0xdc;
-
-	ilen = 4;
-	olen = 5;
-	inbuf = (char *)buf;
-
-	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
-	if (ilen != 0) {
-		return -1;
-	}
-	return 5 - olen;
-}
-
-
-- 
cgit