From a925f039ee382df0f3be434108416bab0d17e8c0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 1 Aug 2008 07:08:51 +0200 Subject: heimdal: update to lorikeet-heimdal rev 801 metze (This used to be commit d6c54a66fb23c784ef221a3c1cf766b72bdb5a0b) --- source4/heimdal/lib/wind/utf8.c | 179 ++++++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 51 deletions(-) (limited to 'source4/heimdal/lib/wind/utf8.c') diff --git a/source4/heimdal/lib/wind/utf8.c b/source4/heimdal/lib/wind/utf8.c index c49e80522e..544e0fe00d 100644 --- a/source4/heimdal/lib/wind/utf8.c +++ b/source4/heimdal/lib/wind/utf8.c @@ -36,7 +36,68 @@ #endif #include "windlocl.h" -RCSID("$Id: utf8.c 22572 2008-02-05 20:22:39Z lha $"); +RCSID("$Id: utf8.c 23246 2008-06-01 22:29:04Z lha $"); + +static int +utf8toutf32(const unsigned char **pp, uint32_t *out) +{ + const unsigned char *p = *pp; + unsigned c = *p; + + if (c & 0x80) { + if ((c & 0xE0) == 0xC0) { + const unsigned c2 = *++p; + if ((c2 & 0xC0) == 0x80) { + *out = ((c & 0x1F) << 6) + | (c2 & 0x3F); + } else { + return WIND_ERR_INVALID_UTF8; + } + } else if ((c & 0xF0) == 0xE0) { + const unsigned c2 = *++p; + if ((c2 & 0xC0) == 0x80) { + const unsigned c3 = *++p; + if ((c3 & 0xC0) == 0x80) { + *out = ((c & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F); + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else if ((c & 0xF8) == 0xF0) { + const unsigned c2 = *++p; + if ((c2 & 0xC0) == 0x80) { + const unsigned c3 = *++p; + if ((c3 & 0xC0) == 0x80) { + const unsigned c4 = *++p; + if ((c4 & 0xC0) == 0x80) { + *out = ((c & 0x07) << 18) + | ((c2 & 0x3F) << 12) + | ((c3 & 0x3F) << 6) + | (c4 & 0x3F); + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + *out = c; + } + + *pp = p; + + return 0; +} /** * Convert an UTF-8 string to an UCS4 string. @@ -59,60 +120,15 @@ wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len) { const unsigned char *p; size_t o = 0; + int ret; for (p = (const unsigned char *)in; *p != '\0'; ++p) { - unsigned c = *p; uint32_t u; - if (c & 0x80) { - if ((c & 0xE0) == 0xC0) { - const unsigned c2 = *++p; - if ((c2 & 0xC0) == 0x80) { - u = ((c & 0x1F) << 6) - | (c2 & 0x3F); - } else { - return WIND_ERR_INVALID_UTF8; - } - } else if ((c & 0xF0) == 0xE0) { - const unsigned c2 = *++p; - if ((c2 & 0xC0) == 0x80) { - const unsigned c3 = *++p; - if ((c3 & 0xC0) == 0x80) { - u = ((c & 0x0F) << 12) - | ((c2 & 0x3F) << 6) - | (c3 & 0x3F); - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else if ((c & 0xF8) == 0xF0) { - const unsigned c2 = *++p; - if ((c2 & 0xC0) == 0x80) { - const unsigned c3 = *++p; - if ((c3 & 0xC0) == 0x80) { - const unsigned c4 = *++p; - if ((c4 & 0xC0) == 0x80) { - u = ((c & 0x07) << 18) - | ((c2 & 0x3F) << 12) - | ((c3 & 0x3F) << 6) - | (c4 & 0x3F); - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - u = c; - } + ret = utf8toutf32(&p, &u); + if (ret) + return ret; + if (out) { if (o >= *out_len) return WIND_ERR_OVERRUN; @@ -364,6 +380,67 @@ wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags, } +/** + * Convert an UTF-8 string to an UCS2 string. + * + * @param in an UTF-8 string to convert. + * @param out the resulting UCS2 strint, must be at least + * wind_utf8ucs2_length() long. If out is NULL, the function will + * calculate the needed space for the out variable (just like + * wind_utf8ucs2_length()). + * @param out_len before processing out_len should be the length of + * the out variable, after processing it will be the length of the out + * string. + * + * @return returns 0 on success, an wind error code otherwise + * @ingroup wind + */ + +int +wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len) +{ + const unsigned char *p; + size_t o = 0; + int ret; + + for (p = (const unsigned char *)in; *p != '\0'; ++p) { + uint32_t u; + + ret = utf8toutf32(&p, &u); + if (ret) + return ret; + + if (u & 0xffff0000) + return WIND_ERR_NOT_UTF16; + + if (out) { + if (o >= *out_len) + return WIND_ERR_OVERRUN; + out[o] = u; + } + o++; + } + *out_len = o; + return 0; +} + +/** + * Calculate the length of from converting a UTF-8 string to a UCS2 + * string. + * + * @param in an UTF-8 string to convert. + * @param out_len the length of the resulting UCS4 string. + * + * @return returns 0 on success, an wind error code otherwise + * @ingroup wind + */ + +int +wind_utf8ucs2_length(const char *in, size_t *out_len) +{ + return wind_utf8ucs2(in, NULL, out_len); +} + /** * Convert an UCS2 string to a UTF-8 string. * -- cgit