diff options
author | Andrew Bartlett <abartlet@samba.org> | 2011-04-12 10:35:43 +1000 |
---|---|---|
committer | Andrew Tridgell <tridge@samba.org> | 2011-04-13 14:47:08 +1000 |
commit | 8db1648f6644acca05ca41fd3803468bba98993d (patch) | |
tree | 8a1418b582de2bffcc544368883116898f251f3f | |
parent | 1d4fb073ecd77a8289b064d4eb6bb148ba49c11b (diff) | |
download | samba-8db1648f6644acca05ca41fd3803468bba98993d.tar.gz samba-8db1648f6644acca05ca41fd3803468bba98993d.tar.bz2 samba-8db1648f6644acca05ca41fd3803468bba98993d.zip |
lib/util/charset Make ASCII conversion validate it's input
We should not just strip the high bits off unicode strings being
converted to ASCII, we need to actually fail the conversion.
Andrew Bartlett
Signed-off-by: Andrew Tridgell <tridge@samba.org>
-rw-r--r-- | lib/util/charset/iconv.c | 73 |
1 files changed, 69 insertions, 4 deletions
diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c index 16ce92ea82..64e345a7c1 100644 --- a/lib/util/charset/iconv.c +++ b/lib/util/charset/iconv.c @@ -56,6 +56,7 @@ static_decl_charset; static size_t ascii_pull (void *,const char **, size_t *, char **, size_t *); static size_t ascii_push (void *,const char **, size_t *, char **, size_t *); +static size_t latin1_pull(void *,const char **, size_t *, char **, size_t *); static size_t latin1_push(void *,const char **, size_t *, char **, size_t *); static size_t utf8_pull (void *,const char **, size_t *, char **, size_t *); static size_t utf8_push (void *,const char **, size_t *, char **, size_t *); @@ -81,7 +82,7 @@ static const struct charset_functions builtin_functions[] = { {"ASCII", ascii_pull, ascii_push}, {"646", ascii_pull, ascii_push}, - {"ISO-8859-1", ascii_pull, latin1_push}, + {"ISO-8859-1", latin1_pull, latin1_push}, {"UCS2-HEX", ucs2hex_pull, ucs2hex_push} }; @@ -373,10 +374,24 @@ _PUBLIC_ int smb_iconv_close(smb_iconv_t cd) and also the "test" character sets that are designed to test multi-byte character set support for english users ***********************************************************************/ + +/* + this takes an ASCII sequence and produces a UTF16 sequence + + The first 127 codepoints of latin1 matches the first 127 codepoints + of unicode, and so can be put into the first byte of UTF16LE + + */ + static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { while (*inbytesleft >= 1 && *outbytesleft >= 2) { + if (((*inbuf)[0] & 0x7F) != (*inbuf)[0]) { + /* If this is multi-byte, then it isn't legal ASCII */ + errno = EILSEQ; + return -1; + } (*outbuf)[0] = (*inbuf)[0]; (*outbuf)[1] = 0; (*inbytesleft) -= 1; @@ -393,14 +408,26 @@ static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft, return 0; } +/* + this takes a UTF16 sequence and produces an ASCII sequence + + The first 127 codepoints of ASCII matches the first 127 codepoints + of unicode, and so can be read directly from the first byte of UTF16LE + + */ static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { int ir_count=0; while (*inbytesleft >= 2 && *outbytesleft >= 1) { - (*outbuf)[0] = (*inbuf)[0] & 0x7F; - if ((*inbuf)[1]) ir_count++; + if (((*inbuf)[0] & 0x7F) != (*inbuf)[0] || + (*inbuf)[1] != 0) { + /* If this is multi-byte, then it isn't legal ASCII */ + errno = EILSEQ; + return -1; + } + (*outbuf)[0] = (*inbuf)[0]; (*inbytesleft) -= 2; (*outbytesleft) -= 1; (*inbuf) += 2; @@ -420,6 +447,40 @@ static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft, return ir_count; } +/* + this takes a latin1/ISO-8859-1 sequence and produces a UTF16 sequence + + The first 256 codepoints of latin1 matches the first 256 codepoints + of unicode, and so can be put into the first byte of UTF16LE + + */ +static size_t latin1_pull(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +/* + this takes a UTF16 sequence and produces a latin1/ISO-8859-1 sequence + + The first 256 codepoints of latin1 matches the first 256 codepoints + of unicode, and so can be read directly from the first byte of UTF16LE + + */ static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { @@ -427,7 +488,11 @@ static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft, while (*inbytesleft >= 2 && *outbytesleft >= 1) { (*outbuf)[0] = (*inbuf)[0]; - if ((*inbuf)[1]) ir_count++; + if ((*inbuf)[1] != 0) { + /* If this is multi-byte, then it isn't legal latin1 */ + errno = EILSEQ; + return -1; + } (*inbytesleft) -= 2; (*outbytesleft) -= 1; (*inbuf) += 2; |