summaryrefslogtreecommitdiff
path: root/source3
diff options
context:
space:
mode:
authorAndrew Bartlett <abartlet@samba.org>2011-04-12 14:01:41 +1000
committerAndrew Tridgell <tridge@samba.org>2011-04-13 14:47:08 +1000
commit4158e9a7e59c489c90097ac10d44640ccdd4470d (patch)
tree5038be4eab3286800320ec24bb3b355d69ab1432 /source3
parentbf431fbedb8119b392b071f903b63e0f9671ee49 (diff)
downloadsamba-4158e9a7e59c489c90097ac10d44640ccdd4470d.tar.gz
samba-4158e9a7e59c489c90097ac10d44640ccdd4470d.tar.bz2
samba-4158e9a7e59c489c90097ac10d44640ccdd4470d.zip
s3-charcnv: Move convert_string() et al to lib/util/charset
This is the first step to this being the common convert_string implementation. Andrew Bartlett Signed-off-by: Andrew Tridgell <tridge@samba.org>
Diffstat (limited to 'source3')
-rw-r--r--source3/Makefile.in2
-rw-r--r--source3/include/proto.h1
-rw-r--r--source3/lib/charcnv.c446
-rwxr-xr-xsource3/wscript_build2
4 files changed, 4 insertions, 447 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in
index 2c74ff8cf4..c20323c078 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -442,7 +442,7 @@ LIB_OBJ = $(LIBSAMBAUTIL_OBJ) $(UTIL_OBJ) $(CRYPTO_OBJ) \
lib/util_transfer_file.o ../lib/async_req/async_sock.o \
lib/addrchange.o \
$(TDB_LIB_OBJ) \
- $(VERSION_OBJ) lib/charcnv.o lib/fstring.o ../lib/util/debug.o ../lib/util/debug_s3.o ../lib/util/fault.o \
+ $(VERSION_OBJ) lib/charcnv.o ../lib/util/charset/convert_string.o lib/fstring.o ../lib/util/debug.o ../lib/util/debug_s3.o ../lib/util/fault.o \
lib/interface.o lib/pidfile.o lib/dumpcore.o \
lib/system.o lib/sendfile.o lib/recvfile.o lib/time.o \
lib/username.o \
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 3f44b949f2..5b96bdfbeb 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -72,6 +72,7 @@ int bitmap_find(struct bitmap *bm, unsigned ofs);
/* The following definitions come from lib/charcnv.c */
+void lazy_initialize_conv(void);
void gfree_charcnv(void);
void init_iconv(void);
bool convert_string(charset_t from, charset_t to,
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index a8719070c3..5c01052d21 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -22,25 +22,9 @@
*/
#include "includes.h"
-/**
- * @file
- *
- * @brief Character-set conversion routines built on our iconv.
- *
- * @note Samba's internal character set (at least in the 3.0 series)
- * is always the same as the one for the Unix filesystem. It is
- * <b>not</b> necessarily UTF-8 and may be different on machines that
- * need i18n filenames to be compatible with Unix software. It does
- * have to be a superset of ASCII. All multibyte sequences must start
- * with a byte with the high bit set.
- *
- * @sa lib/iconv.c
- */
-
-
static bool initialized;
-static void lazy_initialize_conv(void)
+void lazy_initialize_conv(void)
{
if (!initialized) {
load_case_tables_library();
@@ -72,434 +56,6 @@ void init_iconv(void)
true, global_iconv_handle);
}
-/**
- * Convert string from one encoding to another, making error checking etc
- * Slow path version - uses (slow) iconv.
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string
- * @param converted size is the number of bytes occupied in the destination
- *
- * @returns false and sets errno on fail, true on success.
- *
- * Ensure the srclen contains the terminating zero.
- *
- **/
-
-static bool convert_string_internal(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen, size_t *converted_size)
-{
- size_t i_len, o_len;
- size_t retval;
- const char* inbuf = (const char*)src;
- char* outbuf = (char*)dest;
- smb_iconv_t descriptor;
- struct smb_iconv_handle *ic;
-
- lazy_initialize_conv();
- ic = get_iconv_handle();
- descriptor = get_conv_handle(ic, from, to);
-
- if (srclen == (size_t)-1) {
- if (from == CH_UTF16LE || from == CH_UTF16BE) {
- srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
- } else {
- srclen = strlen((const char *)src)+1;
- }
- }
-
-
- if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
- errno = EINVAL;
- return false;
- }
-
- i_len=srclen;
- o_len=destlen;
-
- retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
- if (retval == (size_t)-1) {
- return false;
- }
- *converted_size = destlen-o_len;
- return true;
-}
-
-/**
- * Convert string from one encoding to another, making error checking etc
- * Fast path version - handles ASCII first.
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes, or -1 for nul terminated.
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string - *NEVER* -1.
- * @param converted size is the number of bytes occupied in the destination
- *
- * @returns false and sets errno on fail, true on success.
- *
- * Ensure the srclen contains the terminating zero.
- *
- * This function has been hand-tuned to provide a fast path.
- * Don't change unless you really know what you are doing. JRA.
- **/
-
-bool convert_string_error(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen,
- size_t *converted_size)
-{
- /*
- * NB. We deliberately don't do a strlen here if srclen == -1.
- * This is very expensive over millions of calls and is taken
- * care of in the slow path in convert_string_internal. JRA.
- */
-
-#ifdef DEVELOPER
- SMB_ASSERT(destlen != (size_t)-1);
-#endif
-
- if (srclen == 0) {
- *converted_size = 0;
- return true;
- }
-
- if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
- const unsigned char *p = (const unsigned char *)src;
- unsigned char *q = (unsigned char *)dest;
- size_t slen = srclen;
- size_t dlen = destlen;
- unsigned char lastp = '\0';
- size_t retval = 0;
-
- /* If all characters are ascii, fast path here. */
- while (slen && dlen) {
- if ((lastp = *p) <= 0x7f) {
- *q++ = *p++;
- if (slen != (size_t)-1) {
- slen--;
- }
- dlen--;
- retval++;
- if (!lastp)
- break;
- } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
- goto general_case;
-#else
- bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
- *converted_size += retval;
- return ret;
-#endif
- }
- }
-
- *converted_size = retval;
-
- if (!dlen) {
- /* Even if we fast path we should note if we ran out of room. */
- if (((slen != (size_t)-1) && slen) ||
- ((slen == (size_t)-1) && lastp)) {
- errno = E2BIG;
- return false;
- }
- }
- return true;
- } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
- const unsigned char *p = (const unsigned char *)src;
- unsigned char *q = (unsigned char *)dest;
- size_t retval = 0;
- size_t slen = srclen;
- size_t dlen = destlen;
- unsigned char lastp = '\0';
-
- /* If all characters are ascii, fast path here. */
- while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
- if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
- *q++ = *p;
- if (slen != (size_t)-1) {
- slen -= 2;
- }
- p += 2;
- dlen--;
- retval++;
- if (!lastp)
- break;
- } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
- goto general_case;
-#else
- bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
- *converted_size += retval;
- return ret;
-#endif
- }
- }
-
- *converted_size = retval;
-
- if (!dlen) {
- /* Even if we fast path we should note if we ran out of room. */
- if (((slen != (size_t)-1) && slen) ||
- ((slen == (size_t)-1) && lastp)) {
- errno = E2BIG;
- return false;
- }
- }
- return true;
- } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
- const unsigned char *p = (const unsigned char *)src;
- unsigned char *q = (unsigned char *)dest;
- size_t retval = 0;
- size_t slen = srclen;
- size_t dlen = destlen;
- unsigned char lastp = '\0';
-
- /* If all characters are ascii, fast path here. */
- while (slen && (dlen >= 2)) {
- if ((lastp = *p) <= 0x7F) {
- *q++ = *p++;
- *q++ = '\0';
- if (slen != (size_t)-1) {
- slen--;
- }
- dlen -= 2;
- retval += 2;
- if (!lastp)
- break;
- } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
- goto general_case;
-#else
- bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
- *converted_size += retval;
- return ret;
-#endif
- }
- }
-
- *converted_size = retval;
-
- if (!dlen) {
- /* Even if we fast path we should note if we ran out of room. */
- if (((slen != (size_t)-1) && slen) ||
- ((slen == (size_t)-1) && lastp)) {
- errno = E2BIG;
- return false;
- }
- }
- return true;
- }
-
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
- general_case:
-#endif
- return convert_string_internal(from, to, src, srclen, dest, destlen, converted_size);
-}
-
-bool convert_string(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen,
- size_t *converted_size)
-{
- bool ret = convert_string_error(from, to, src, srclen, dest, destlen, converted_size);
-
- if(ret==false) {
- const char *reason="unknown error";
- switch(errno) {
- case EINVAL:
- reason="Incomplete multibyte sequence";
- DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
- reason, (const char *)src));
- break;
- case E2BIG:
- {
- struct smb_iconv_handle *ic;
- lazy_initialize_conv();
- ic = get_iconv_handle();
-
- reason="No more room";
- if (from == CH_UNIX) {
- DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
- charset_name(ic, from), charset_name(ic, to),
- (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
- } else {
- DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
- charset_name(ic, from), charset_name(ic, to),
- (unsigned int)srclen, (unsigned int)destlen));
- }
- break;
- }
- case EILSEQ:
- reason="Illegal multibyte sequence";
- DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
- reason, (const char *)src));
- break;
- default:
- DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",
- reason, (const char *)src));
- break;
- }
- /* smb_panic(reason); */
- }
- return ret;
-}
-
-
-/**
- * Convert between character sets, allocating a new buffer using talloc for the result.
- *
- * @param srclen length of source buffer.
- * @param dest always set at least to NULL
- * @parm converted_size set to the number of bytes occupied by the string in
- * the destination on success.
- * @note -1 is not accepted for srclen.
- *
- * @return true if new buffer was correctly allocated, and string was
- * converted.
- *
- * Ensure the srclen contains the terminating zero.
- *
- * I hate the goto's in this function. It's embarressing.....
- * There has to be a cleaner way to do this. JRA.
- */
-bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
- void const *src, size_t srclen, void *dst,
- size_t *converted_size)
-
-{
- size_t i_len, o_len, destlen = (srclen * 3) / 2;
- size_t retval;
- const char *inbuf = (const char *)src;
- char *outbuf = NULL, *ob = NULL;
- smb_iconv_t descriptor;
- void **dest = (void **)dst;
- struct smb_iconv_handle *ic;
-
- *dest = NULL;
-
- if (src == NULL || srclen == (size_t)-1) {
- errno = EINVAL;
- return false;
- }
-
- if (srclen == 0) {
- /* We really should treat this as an error, but
- there are too many callers that need this to
- return a NULL terminated string in the correct
- character set. */
- if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
- destlen = 2;
- } else {
- destlen = 1;
- }
- ob = talloc_zero_array(ctx, char, destlen);
- if (ob == NULL) {
- errno = ENOMEM;
- return false;
- }
- *converted_size = destlen;
- *dest = ob;
- return true;
- }
-
- lazy_initialize_conv();
- ic = get_iconv_handle();
- descriptor = get_conv_handle(ic, from, to);
-
- if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
- DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
- errno = EOPNOTSUPP;
- return false;
- }
-
- convert:
-
- /* +2 is for ucs2 null termination. */
- if ((destlen*2)+2 < destlen) {
- /* wrapped ! abort. */
- DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
- TALLOC_FREE(outbuf);
- errno = EOPNOTSUPP;
- return false;
- } else {
- destlen = destlen * 2;
- }
-
- /* +2 is for ucs2 null termination. */
- ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
-
- if (!ob) {
- DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
- errno = ENOMEM;
- return false;
- }
- outbuf = ob;
- i_len = srclen;
- o_len = destlen;
-
- retval = smb_iconv(descriptor,
- &inbuf, &i_len,
- &outbuf, &o_len);
- if(retval == (size_t)-1) {
- const char *reason="unknown error";
- switch(errno) {
- case EINVAL:
- reason="Incomplete multibyte sequence";
- DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
- break;
- case E2BIG:
- goto convert;
- case EILSEQ:
- reason="Illegal multibyte sequence";
- DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
- break;
- }
- DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
- /* smb_panic(reason); */
- TALLOC_FREE(ob);
- return false;
- }
-
- destlen = destlen - o_len;
- /* Don't shrink unless we're reclaiming a lot of
- * space. This is in the hot codepath and these
- * reallocs *cost*. JRA.
- */
- if (o_len > 1024) {
- /* We're shrinking here so we know the +2 is safe from wrap. */
- ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
- }
-
- if (destlen && !ob) {
- DEBUG(0, ("convert_string_talloc: out of memory!\n"));
- errno = ENOMEM;
- return false;
- }
-
- *dest = ob;
-
- /* Must ucs2 null terminate in the extra space we allocated. */
- ob[destlen] = '\0';
- ob[destlen+1] = '\0';
-
- /* Ensure we can never return a *converted_size of zero. */
- if (destlen == 0) {
- /* As we're now returning false on a bad smb_iconv call,
- this should never happen. But be safe anyway. */
- if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
- destlen = 2;
- } else {
- destlen = 1;
- }
- }
-
- *converted_size = destlen;
- return true;
-}
-
bool unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
{
size_t size;
diff --git a/source3/wscript_build b/source3/wscript_build
index 71d91388dd..6eb0c4c464 100755
--- a/source3/wscript_build
+++ b/source3/wscript_build
@@ -963,7 +963,7 @@ bld.SAMBA3_SUBSYSTEM('tdb-wrap3',
vars=locals())
bld.SAMBA3_SUBSYSTEM('CHARSET3',
- source='''lib/util_str.c lib/charcnv.c lib/fstring.c''',
+ source='''lib/util_str.c lib/charcnv.c ../lib/util/charset/convert_string.c lib/fstring.c''',
public_deps='ICONV_WRAPPER CODEPOINTS',
deps='DYNCONFIG')