diff options
author | Michael Adam <obnox@samba.org> | 2010-10-29 22:06:05 +0200 |
---|---|---|
committer | Michael Adam <obnox@samba.org> | 2010-11-03 22:45:19 +0000 |
commit | 5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b (patch) | |
tree | e44cbf22f6c4ddfe17e95d8af4950b451db88a16 /lib/util/charset | |
parent | a50d3638a7440e30a0cbfb3bffe4712ff68e350a (diff) | |
download | samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.gz samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.bz2 samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.zip |
lib/charset/charcnv: add next_codepoint_convenience_ext() that accepts input charset.
next_codepoint_convenience() takes as string in CH_UNIX encoding and returns the
unicode codepoint of the next (possibly multibyte) character of the
input string.
The new next_codepoint_convenience_ext() function adds the encoding of the input
string as a parameter. next_codepoint_convenience() now only calls
next_codepoint_convenience_ext() with CH_UNIX als src_charset argument.
Diffstat (limited to 'lib/util/charset')
-rw-r--r-- | lib/util/charset/charcnv.c | 45 | ||||
-rw-r--r-- | lib/util/charset/charset.h | 3 |
2 files changed, 37 insertions, 11 deletions
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c index e9f6ab0d94..25a54cca13 100644 --- a/lib/util/charset/charcnv.c +++ b/lib/util/charset/charcnv.c @@ -373,17 +373,25 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx, return true; } -/* - return the unicode codepoint for the next multi-byte CH_UNIX character - in the string - also return the number of bytes consumed (which tells the caller - how many bytes to skip to get to the next CH_UNIX character) - - return INVALID_CODEPOINT if the next character cannot be converted -*/ -_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, - const char *str, size_t *size) +/** + * Return the unicode codepoint for the next character in the input + * string in the given src_charset. + * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. + * + * Also return the number of bytes consumed (which tells the caller + * how many bytes to skip to get to the next src_charset-character). + * + * This is implemented (in the non-ascii-case) by first converting the + * next character in the input string to UTF16_LE and then calculating + * the unicode codepoint from that. + * + * Return INVALID_CODEPOINT if the next character cannot be converted. + */ +_PUBLIC_ codepoint_t next_codepoint_convenience_ext( + struct smb_iconv_convenience *ic, + const char *str, charset_t src_charset, + size_t *size) { /* it cannot occupy more than 4 bytes in UTF16 format */ uint8_t buf[4]; @@ -404,7 +412,7 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic ilen_orig = strnlen(str, 5); ilen = ilen_orig; - descriptor = get_conv_handle(ic, CH_UNIX, CH_UTF16); + descriptor = get_conv_handle(ic, src_charset, CH_UTF16); if (descriptor == (smb_iconv_t)-1) { *size = 1; return INVALID_CODEPOINT; @@ -446,6 +454,21 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic } /* + return the unicode codepoint for the next multi-byte CH_UNIX character + in the string + + also return the number of bytes consumed (which tells the caller + how many bytes to skip to get to the next CH_UNIX character) + + return INVALID_CODEPOINT if the next character cannot be converted +*/ +_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, + const char *str, size_t *size) +{ + return next_codepoint_convenience_ext(ic, str, CH_UNIX, size); +} + +/* push a single codepoint into a CH_UNIX string the target string must be able to hold the full character, which is guaranteed if it is at least 5 bytes in size. The caller may pass less than 5 bytes if they diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index a66e24f2b9..7960631484 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -172,6 +172,9 @@ codepoint_t next_codepoint(const char *str, size_t *size); ssize_t push_codepoint(char *str, codepoint_t c); /* codepoints */ +codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic, + const char *str, charset_t src_charset, + size_t *size); codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, const char *str, size_t *size); ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, |