diff options
Diffstat (limited to 'lib/util/charset/charcnv.c')
-rw-r--r-- | lib/util/charset/charcnv.c | 289 |
1 files changed, 0 insertions, 289 deletions
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c index 59b36e3062..dd2c725125 100644 --- a/lib/util/charset/charcnv.c +++ b/lib/util/charset/charcnv.c @@ -38,137 +38,6 @@ * @sa lib/iconv.c */ -struct smb_iconv_convenience { - TALLOC_CTX *child_ctx; - const char *unix_charset; - const char *dos_charset; - bool native_iconv; - smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS]; -}; - - -/** - * Return the name of a charset to give to iconv(). - **/ -static const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch) -{ - switch (ch) { - case CH_UTF16: return "UTF-16LE"; - case CH_UNIX: return ic->unix_charset; - case CH_DOS: return ic->dos_charset; - case CH_UTF8: return "UTF8"; - case CH_UTF16BE: return "UTF-16BE"; - case CH_UTF16MUNGED: return "UTF16_MUNGED"; - default: - return "ASCII"; - } -} - -/** - re-initialize iconv conversion descriptors -**/ -static int close_iconv_convenience(struct smb_iconv_convenience *data) -{ - unsigned c1, c2; - for (c1=0;c1<NUM_CHARSETS;c1++) { - for (c2=0;c2<NUM_CHARSETS;c2++) { - if (data->conv_handles[c1][c2] != NULL) { - if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) { - smb_iconv_close(data->conv_handles[c1][c2]); - } - data->conv_handles[c1][c2] = NULL; - } - } - } - - return 0; -} - -/* - the old_ic is passed in here as the smb_iconv_convenience structure - is used as a global pointer in some places (eg. python modules). We - don't want to invalidate those global pointers, but we do want to - update them with the right charset information when loadparm - runs. To do that we need to re-use the structure pointer, but - re-fill the elements in the structure with the updated values - */ -_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx, - const char *dos_charset, - const char *unix_charset, - bool native_iconv, - struct smb_iconv_convenience *old_ic) -{ - struct smb_iconv_convenience *ret; - - if (old_ic != NULL) { - ret = old_ic; - close_iconv_convenience(ret); - talloc_free(ret->child_ctx); - ZERO_STRUCTP(ret); - } else { - ret = talloc_zero(mem_ctx, struct smb_iconv_convenience); - } - if (ret == NULL) { - return NULL; - } - - /* we use a child context to allow us to free all ptrs without - freeing the structure itself */ - ret->child_ctx = talloc_new(ret); - if (ret->child_ctx == NULL) { - return NULL; - } - - talloc_set_destructor(ret, close_iconv_convenience); - - ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset); - ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset); - ret->native_iconv = native_iconv; - - return ret; -} - -/* - on-demand initialisation of conversion handles -*/ -static smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic, - charset_t from, charset_t to) -{ - const char *n1, *n2; - static bool initialised; - - if (initialised == false) { - initialised = true; - } - - if (ic->conv_handles[from][to]) { - return ic->conv_handles[from][to]; - } - - n1 = charset_name(ic, from); - n2 = charset_name(ic, to); - - ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1, - ic->native_iconv); - - if (ic->conv_handles[from][to] == (smb_iconv_t)-1) { - if ((from == CH_DOS || to == CH_DOS) && - strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) { - DEBUG(0,("dos charset '%s' unavailable - using ASCII\n", - charset_name(ic, CH_DOS))); - ic->dos_charset = "ASCII"; - - n1 = charset_name(ic, from); - n2 = charset_name(ic, to); - - ic->conv_handles[from][to] = - smb_iconv_open_ex(ic, n2, n1, ic->native_iconv); - } - } - - return ic->conv_handles[from][to]; -} - /** * Convert string from one encoding to another, making error checking etc * @@ -363,161 +232,3 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx, return true; } - -/** - * Return the unicode codepoint for the next character in the input - * string in the given src_charset. - * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. - * - * Also return the number of bytes consumed (which tells the caller - * how many bytes to skip to get to the next src_charset-character). - * - * This is implemented (in the non-ascii-case) by first converting the - * next character in the input string to UTF16_LE and then calculating - * the unicode codepoint from that. - * - * Return INVALID_CODEPOINT if the next character cannot be converted. - */ -_PUBLIC_ codepoint_t next_codepoint_convenience_ext( - struct smb_iconv_convenience *ic, - const char *str, charset_t src_charset, - size_t *bytes_consumed) -{ - /* it cannot occupy more than 4 bytes in UTF16 format */ - uint8_t buf[4]; - smb_iconv_t descriptor; - size_t ilen_orig; - size_t ilen; - size_t olen; - char *outbuf; - - if ((str[0] & 0x80) == 0) { - *bytes_consumed = 1; - return (codepoint_t)str[0]; - } - - /* - * we assume that no multi-byte character can take more than 5 bytes. - * This is OK as we only support codepoints up to 1M (U+100000) - */ - ilen_orig = strnlen(str, 5); - ilen = ilen_orig; - - descriptor = get_conv_handle(ic, src_charset, CH_UTF16); - if (descriptor == (smb_iconv_t)-1) { - *bytes_consumed = 1; - return INVALID_CODEPOINT; - } - - /* - * this looks a little strange, but it is needed to cope with - * codepoints above 64k (U+1000) which are encoded as per RFC2781. - */ - olen = 2; - outbuf = (char *)buf; - smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); - if (olen == 2) { - olen = 4; - outbuf = (char *)buf; - smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); - if (olen == 4) { - /* we didn't convert any bytes */ - *bytes_consumed = 1; - return INVALID_CODEPOINT; - } - olen = 4 - olen; - } else { - olen = 2 - olen; - } - - *bytes_consumed = ilen_orig - ilen; - - if (olen == 2) { - return (codepoint_t)SVAL(buf, 0); - } - if (olen == 4) { - /* decode a 4 byte UTF16 character manually */ - return (codepoint_t)0x10000 + - (buf[2] | ((buf[3] & 0x3)<<8) | - (buf[0]<<10) | ((buf[1] & 0x3)<<18)); - } - - /* no other length is valid */ - return INVALID_CODEPOINT; -} - -/* - return the unicode codepoint for the next multi-byte CH_UNIX character - in the string - - also return the number of bytes consumed (which tells the caller - how many bytes to skip to get to the next CH_UNIX character) - - return INVALID_CODEPOINT if the next character cannot be converted -*/ -_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, - const char *str, size_t *size) -{ - return next_codepoint_convenience_ext(ic, str, CH_UNIX, size); -} - -/* - push a single codepoint into a CH_UNIX string the target string must - be able to hold the full character, which is guaranteed if it is at - least 5 bytes in size. The caller may pass less than 5 bytes if they - are sure the character will fit (for example, you can assume that - uppercase/lowercase of a character will not add more than 1 byte) - - return the number of bytes occupied by the CH_UNIX character, or - -1 on failure -*/ -_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, - char *str, codepoint_t c) -{ - smb_iconv_t descriptor; - uint8_t buf[4]; - size_t ilen, olen; - const char *inbuf; - - if (c < 128) { - *str = c; - return 1; - } - - descriptor = get_conv_handle(ic, - CH_UTF16, CH_UNIX); - if (descriptor == (smb_iconv_t)-1) { - return -1; - } - - if (c < 0x10000) { - ilen = 2; - olen = 5; - inbuf = (char *)buf; - SSVAL(buf, 0, c); - smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); - if (ilen != 0) { - return -1; - } - return 5 - olen; - } - - c -= 0x10000; - - buf[0] = (c>>10) & 0xFF; - buf[1] = (c>>18) | 0xd8; - buf[2] = c & 0xFF; - buf[3] = ((c>>8) & 0x3) | 0xdc; - - ilen = 4; - olen = 5; - inbuf = (char *)buf; - - smb_iconv(descriptor, &inbuf, &ilen, &str, &olen); - if (ilen != 0) { - return -1; - } - return 5 - olen; -} - - |