diff options
Diffstat (limited to 'source4/lib/charcnv.c')
-rw-r--r-- | source4/lib/charcnv.c | 204 |
1 files changed, 156 insertions, 48 deletions
diff --git a/source4/lib/charcnv.c b/source4/lib/charcnv.c index 7d00c7e78f..392ad3cc72 100644 --- a/source4/lib/charcnv.c +++ b/source4/lib/charcnv.c @@ -55,18 +55,6 @@ static const char *charset_name(charset_t ch) return ret; } -static void lazy_initialize_conv(void) -{ - static int initialized = False; - - if (!initialized) { - initialized = True; - load_case_tables(); - init_iconv(); - } -} - - static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS]; /* @@ -107,6 +95,7 @@ void init_iconv(void) } + /** * Convert string from one encoding to another, making error checking etc * @@ -129,8 +118,6 @@ ssize_t convert_string(charset_t from, charset_t to, if (srclen == (size_t)-1) srclen = strlen(src)+1; - lazy_initialize_conv(); - descriptor = get_conv_handle(from, to); if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { @@ -194,8 +181,6 @@ ssize_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to, if (src == NULL || srclen == (size_t)-1 || srclen == 0) return (size_t)-1; - lazy_initialize_conv(); - descriptor = get_conv_handle(from, to); if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { @@ -271,27 +256,27 @@ ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags) { size_t src_len; ssize_t ret; - char *tmpbuf = NULL; - - /* treat a pstring as "unlimited" length */ - if (dest_len == (size_t)-1) - dest_len = sizeof(pstring); if (flags & STR_UPPER) { - tmpbuf = strupper_talloc(NULL, src); - if (!tmpbuf) { + char *tmpbuf = strupper_talloc(NULL, src); + if (tmpbuf == NULL) { return -1; } - src = tmpbuf; + ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER); + talloc_free(tmpbuf); + return ret; } + + /* treat a pstring as "unlimited" length */ + if (dest_len == (size_t)-1) + dest_len = sizeof(pstring); + src_len = strlen(src); if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) src_len++; - ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len); - talloc_free(tmpbuf); - return ret; + return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len); } /** @@ -375,6 +360,16 @@ ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags) size_t src_len = strlen(src); size_t ret; + if (flags & STR_UPPER) { + char *tmpbuf = strupper_talloc(NULL, src); + if (tmpbuf == NULL) { + return -1; + } + ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER); + talloc_free(tmpbuf); + return ret; + } + /* treat a pstring as "unlimited" length */ if (dest_len == (size_t)-1) dest_len = sizeof(pstring); @@ -399,17 +394,6 @@ ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags) len += ret; - if (flags & STR_UPPER) { - smb_ucs2_t *dest_ucs2 = dest; - size_t i; - for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) { - smb_ucs2_t v = toupper_w(dest_ucs2[i]); - if (v != dest_ucs2[i]) { - dest_ucs2[i] = v; - } - } - } - return len; } @@ -423,12 +407,11 @@ ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags) * @returns The number of bytes occupied by the string in the destination * or -1 in case of error. **/ -ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src) +ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src) { size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest); + return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, dest); } @@ -474,12 +457,9 @@ size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, i if (flags & STR_TERMINATE) { if (src_len == (size_t)-1) { - src_len = strlen_w(src)*2 + 2; + src_len = utf16_len(src); } else { - size_t len = strnlen_w(src, src_len/2); - if (len < src_len/2) - len++; - src_len = len*2; + src_len = utf16_len_n(src, src_len); } } @@ -507,9 +487,9 @@ ssize_t pull_ucs2_pstring(char *dest, const void *src) * @returns The number of bytes occupied by the string in the destination **/ -ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src) +ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src) { - size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); + size_t src_len = utf16_len(src); *dest = NULL; return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest); } @@ -582,3 +562,131 @@ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len } } + +/* + return the unicode codepoint for the next multi-byte CH_UNIX character + in the string + + also return the number of bytes consumed (which tells the caller + how many bytes to skip to get to the next CH_UNIX character) + + return INVALID_CODEPOINT if the next character cannot be converted +*/ +codepoint_t next_codepoint(const char *str, size_t *size) +{ + /* it cannot occupy more than 4 bytes in UTF16 format */ + uint8_t buf[4]; + smb_iconv_t descriptor; + size_t ilen_orig; + size_t ilen; + size_t olen; + char *outbuf; + + if ((str[0] & 0x80) == 0) { + *size = 1; + return (codepoint_t)str[0]; + } + + /* we assume that no multi-byte character can take + more than 5 bytes. This is OK as we only + support codepoints up to 1M */ + ilen_orig = strnlen(str, 5); + ilen = ilen_orig; + + descriptor = get_conv_handle(CH_UNIX, CH_UTF16); + if (descriptor == (smb_iconv_t)-1) { + *size = 1; + return INVALID_CODEPOINT; + } + + /* this looks a little strange, but it is needed to cope + with codepoints above 64k */ + olen = 2; + outbuf = buf; + smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); + if (olen == 2) { + olen = 4; + outbuf = buf; + smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); + if (olen == 4) { + /* we didn't convert any bytes */ + *size = 1; + return INVALID_CODEPOINT; + } + olen = 4 - olen; + } else { + olen = 2 - olen; + } + + *size = ilen_orig - ilen; + + if (olen == 2) { + return (codepoint_t)SVAL(buf, 0); + } + if (olen == 4) { + /* decode a 4 byte UTF16 character manually */ + return (codepoint_t)0x10000 + + (buf[2] | ((buf[3] & 0x3)<<8) | + (buf[0]<<10) | ((buf[1] & 0x3)<<18)); + } + + /* no other length is valid */ + return INVALID_CODEPOINT; +} + +/* + push a single codepoint into a CH_UNIX string the target string must + be able to hold the full character, which is guaranteed if it is at + least 5 bytes in size. The caller may pass less than 5 bytes if they + are sure the character will fit (for example, you can assume that + uppercase/lowercase of a character will not add more than 1 byte) + + return the number of bytes occupied by the CH_UNIX character, or + -1 on failure +*/ +ssize_t push_codepoint(char *str, codepoint_t c) +{ + smb_iconv_t descriptor; + uint8_t buf[4]; + size_t ilen, olen; + const char *inbuf; + + if (c < 128) { + *str = c; + return 1; + } + + descriptor = get_conv_handle(CH_UTF16, CH_UNIX); + if (descriptor == (smb_iconv_t)-1) { + return -1; + } + + if (c < 0x10000) { + ilen = 2; + olen = 5; + inbuf = buf; + SSVAL(buf, 0, c); + smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); + if (ilen != 0) { + return -1; + } + return 5 - olen; + } + + c -= 0x10000; + + buf[0] = (c>>10) & 0xFF; + buf[1] = (c>>18) | 0xd8; + buf[2] = c & 0xFF; + buf[3] = ((c>>8) & 0x3) | 0xdc; + + ilen = 4; + olen = 5; + inbuf = buf; + + smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); + if (ilen != 0) { + return -1; + } + return 5 - olen; +} |