diff options
author | Andrew Tridgell <tridge@samba.org> | 2004-09-01 04:39:06 +0000 |
---|---|---|
committer | Gerald (Jerry) Carter <jerry@samba.org> | 2007-10-10 12:58:27 -0500 |
commit | 31c1c7846f6b6e5848bc39a28a65118bfa98e35d (patch) | |
tree | 7387bbf9234320ca2c60011b34c76c08e2b274b0 /source4/lib/charcnv.c | |
parent | 48f3df41bb4c854b1049591e1c315c4c3b980984 (diff) | |
download | samba-31c1c7846f6b6e5848bc39a28a65118bfa98e35d.tar.gz samba-31c1c7846f6b6e5848bc39a28a65118bfa98e35d.tar.bz2 samba-31c1c7846f6b6e5848bc39a28a65118bfa98e35d.zip |
r2159: converted samba4 over to UTF-16.
I had previously thought this was unnecessary, as windows doesn't use
standards compliant UTF-16, and for filesystem operations treats bytes
as UCS-2, but Bjoern Jacke has pointed out to me that this means we
don't correctly store extended UTF-16 characters as UTF-8 on
disk. This can be seen with (for example) the gothic characters with
codepoints above 64k.
This commit also adds a LOCAL-ICONV torture test that tests the first
1 million codepoints against the system iconv library, and tests 5
million random UTF-16LE buffers for identical error handling to the
system iconv library.
the lib/iconv.c changes need backporting to samba3
(This used to be commit 756f28ac95feaa84b42402723d5f7286865c78db)
Diffstat (limited to 'source4/lib/charcnv.c')
-rw-r--r-- | source4/lib/charcnv.c | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/source4/lib/charcnv.c b/source4/lib/charcnv.c index 8204a3bcdd..2109e957d9 100644 --- a/source4/lib/charcnv.c +++ b/source4/lib/charcnv.c @@ -47,12 +47,12 @@ static const char *charset_name(charset_t ch) { const char *ret = NULL; - if (ch == CH_UCS2) ret = "UTF-16LE"; + if (ch == CH_UTF16) ret = "UTF-16LE"; else if (ch == CH_UNIX) ret = lp_unix_charset(); else if (ch == CH_DOS) ret = lp_dos_charset(); else if (ch == CH_DISPLAY) ret = lp_display_charset(); else if (ch == CH_UTF8) ret = "UTF8"; - else if (ch == CH_UCS2BE) ret = "UCS-2BE"; + else if (ch == CH_UTF16BE) ret = "UTF-16BE"; if (!ret || !*ret) ret = "ASCII"; return ret; @@ -81,13 +81,13 @@ void init_iconv(void) /* so that charset_name() works we need to get the UNIX<->UCS2 going first */ - if (!conv_handles[CH_UNIX][CH_UCS2]) - conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), + if (!conv_handles[CH_UNIX][CH_UTF16]) + conv_handles[CH_UNIX][CH_UTF16] = smb_iconv_open(charset_name(CH_UTF16), "ASCII"); - if (!conv_handles[CH_UCS2][CH_UNIX]) - conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", - charset_name(CH_UCS2)); + if (!conv_handles[CH_UTF16][CH_UNIX]) + conv_handles[CH_UTF16][CH_UNIX] = smb_iconv_open("ASCII", + charset_name(CH_UTF16)); for (c1=0;c1<NUM_CHARSETS;c1++) { for (c2=0;c2<NUM_CHARSETS;c2++) { @@ -293,7 +293,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) size_t size; smb_ucs2_t *buffer; - size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen, + size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen, (void **) &buffer); if (size == -1) { smb_panic("failed to create UCS2 buffer"); @@ -303,7 +303,7 @@ size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) return srclen; } - size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen); + size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen); free(buffer); return size; } @@ -313,7 +313,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) size_t size; smb_ucs2_t *buffer; - size = convert_string_allocate(CH_UNIX, CH_UCS2, src, srclen, + size = convert_string_allocate(CH_UNIX, CH_UTF16, src, srclen, (void **) &buffer); if (size == -1) { smb_panic("failed to create UCS2 buffer"); @@ -322,7 +322,7 @@ size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) free(buffer); return srclen; } - size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen); + size = convert_string(CH_UTF16, CH_UNIX, buffer, size, dest, destlen); free(buffer); return size; } @@ -461,7 +461,7 @@ ssize_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest /* ucs2 is always a multiple of 2 bytes */ dest_len &= ~1; - len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len); + len += convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len); return len; } @@ -480,7 +480,7 @@ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) size_t src_len = strlen(src)+1; *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (const void **)dest); + return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (const void **)dest); } @@ -498,7 +498,7 @@ ssize_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src) size_t src_len = strlen(src)+1; *dest = NULL; - return convert_string_allocate(CH_UNIX, CH_UCS2, src, src_len, (void **)dest); + return convert_string_allocate(CH_UNIX, CH_UTF16, src, src_len, (void **)dest); } /** @@ -603,7 +603,7 @@ size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_ if (src_len != (size_t)-1) src_len &= ~1; - ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len); + ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len); if (dest_len) dest[MIN(ret, dest_len-1)] = 0; @@ -627,7 +627,7 @@ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src) { size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); *dest = NULL; - return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (const void **)dest); + return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (const void **)dest); } /** @@ -642,7 +642,7 @@ ssize_t pull_ucs2_allocate(void **dest, const smb_ucs2_t *src) { size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); *dest = NULL; - return convert_string_allocate(CH_UCS2, CH_UNIX, src, src_len, dest); + return convert_string_allocate(CH_UTF16, CH_UNIX, src, src_len, dest); } /** |