diff options
author | Michael Adam <obnox@samba.org> | 2010-10-29 22:06:05 +0200 |
---|---|---|
committer | Michael Adam <obnox@samba.org> | 2010-11-03 22:45:19 +0000 |
commit | d41d05ec7b5650759b8b6b388d34516daf0eed83 (patch) | |
tree | c021d8bb12e4712437974350a51d4a48393b6043 /source3/lib | |
parent | f14d84e2f233dd337bbd9bb0166f8bf0cc8f7a82 (diff) | |
download | samba-d41d05ec7b5650759b8b6b388d34516daf0eed83.tar.gz samba-d41d05ec7b5650759b8b6b388d34516daf0eed83.tar.bz2 samba-d41d05ec7b5650759b8b6b388d34516daf0eed83.zip |
s3:lib/charcnv: add next_codepoint_ext() that accepts input charset.
next_codepoint() takes as string in CH_UNIX encoding and returns the
unicode codepoint of the next (possibly multibyte) character of the
input string.
The new next_codepoint_ext() function adds the encoding of the input
string as a parameter. next_codepoint() now only calls next_codepoint_ext()
with CH_UNIX als src_charset argument.
Diffstat (limited to 'source3/lib')
-rw-r--r-- | source3/lib/charcnv.c | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 9ac9930267..ce01841f7e 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -1793,17 +1793,23 @@ size_t align_string(const void *base_ptr, const char *p, int flags) return 0; } -/* - Return the unicode codepoint for the next multi-byte CH_UNIX character - in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value. - - Also return the number of bytes consumed (which tells the caller - how many bytes to skip to get to the next CH_UNIX character). - - Return INVALID_CODEPOINT if the next character cannot be converted. -*/ +/** + * Return the unicode codepoint for the next character in the input + * string in the given src_charset. + * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. + * + * Also return the number of bytes consumed (which tells the caller + * how many bytes to skip to get to the next src_charset-character). + * + * This is implemented (in the non-ascii-case) by first converting the + * next character in the input string to UTF16_LE and then calculating + * the unicode codepoint from that. + * + * Return INVALID_CODEPOINT if the next character cannot be converted. + */ -codepoint_t next_codepoint(const char *str, size_t *size) +codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, + size_t *size) { /* It cannot occupy more than 4 bytes in UTF16 format */ uint8_t buf[4]; @@ -1827,7 +1833,7 @@ codepoint_t next_codepoint(const char *str, size_t *size) lazy_initialize_conv(); - descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; + descriptor = conv_handles[src_charset][CH_UTF16LE]; if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { *size = 1; return INVALID_CODEPOINT; @@ -1877,6 +1883,21 @@ codepoint_t next_codepoint(const char *str, size_t *size) } /* + Return the unicode codepoint for the next multi-byte CH_UNIX character + in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value. + + Also return the number of bytes consumed (which tells the caller + how many bytes to skip to get to the next CH_UNIX character). + + Return INVALID_CODEPOINT if the next character cannot be converted. +*/ + +codepoint_t next_codepoint(const char *str, size_t *size) +{ + return next_codepoint_ext(str, CH_UNIX, size); +} + +/* push a single codepoint into a CH_UNIX string the target string must be able to hold the full character, which is guaranteed if it is at least 5 bytes in size. The caller may pass less than 5 bytes if they |