diff options
Diffstat (limited to 'source4/heimdal/lib/wind')
-rw-r--r-- | source4/heimdal/lib/wind/stringprep.c | 4 | ||||
-rw-r--r-- | source4/heimdal/lib/wind/utf8.c | 179 | ||||
-rw-r--r-- | source4/heimdal/lib/wind/wind.h | 13 | ||||
-rw-r--r-- | source4/heimdal/lib/wind/wind_err.et | 3 | ||||
-rw-r--r-- | source4/heimdal/lib/wind/windlocl.h | 4 |
5 files changed, 143 insertions, 60 deletions
diff --git a/source4/heimdal/lib/wind/stringprep.c b/source4/heimdal/lib/wind/stringprep.c index 0beba76384..7c28fdae1f 100644 --- a/source4/heimdal/lib/wind/stringprep.c +++ b/source4/heimdal/lib/wind/stringprep.c @@ -36,10 +36,10 @@ #endif #include "windlocl.h" #include <stdlib.h> -#include <strings.h> +#include <string.h> #include <errno.h> -RCSID("$Id: stringprep.c 22593 2008-02-12 11:58:01Z lha $"); +RCSID("$Id: stringprep.c 23063 2008-04-21 11:18:04Z lha $"); /** * Process a input UCS4 string according a string-prep profile. diff --git a/source4/heimdal/lib/wind/utf8.c b/source4/heimdal/lib/wind/utf8.c index c49e80522e..544e0fe00d 100644 --- a/source4/heimdal/lib/wind/utf8.c +++ b/source4/heimdal/lib/wind/utf8.c @@ -36,7 +36,68 @@ #endif #include "windlocl.h" -RCSID("$Id: utf8.c 22572 2008-02-05 20:22:39Z lha $"); +RCSID("$Id: utf8.c 23246 2008-06-01 22:29:04Z lha $"); + +static int +utf8toutf32(const unsigned char **pp, uint32_t *out) +{ + const unsigned char *p = *pp; + unsigned c = *p; + + if (c & 0x80) { + if ((c & 0xE0) == 0xC0) { + const unsigned c2 = *++p; + if ((c2 & 0xC0) == 0x80) { + *out = ((c & 0x1F) << 6) + | (c2 & 0x3F); + } else { + return WIND_ERR_INVALID_UTF8; + } + } else if ((c & 0xF0) == 0xE0) { + const unsigned c2 = *++p; + if ((c2 & 0xC0) == 0x80) { + const unsigned c3 = *++p; + if ((c3 & 0xC0) == 0x80) { + *out = ((c & 0x0F) << 12) + | ((c2 & 0x3F) << 6) + | (c3 & 0x3F); + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else if ((c & 0xF8) == 0xF0) { + const unsigned c2 = *++p; + if ((c2 & 0xC0) == 0x80) { + const unsigned c3 = *++p; + if ((c3 & 0xC0) == 0x80) { + const unsigned c4 = *++p; + if ((c4 & 0xC0) == 0x80) { + *out = ((c & 0x07) << 18) + | ((c2 & 0x3F) << 12) + | ((c3 & 0x3F) << 6) + | (c4 & 0x3F); + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + return WIND_ERR_INVALID_UTF8; + } + } else { + *out = c; + } + + *pp = p; + + return 0; +} /** * Convert an UTF-8 string to an UCS4 string. @@ -59,60 +120,15 @@ wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len) { const unsigned char *p; size_t o = 0; + int ret; for (p = (const unsigned char *)in; *p != '\0'; ++p) { - unsigned c = *p; uint32_t u; - if (c & 0x80) { - if ((c & 0xE0) == 0xC0) { - const unsigned c2 = *++p; - if ((c2 & 0xC0) == 0x80) { - u = ((c & 0x1F) << 6) - | (c2 & 0x3F); - } else { - return WIND_ERR_INVALID_UTF8; - } - } else if ((c & 0xF0) == 0xE0) { - const unsigned c2 = *++p; - if ((c2 & 0xC0) == 0x80) { - const unsigned c3 = *++p; - if ((c3 & 0xC0) == 0x80) { - u = ((c & 0x0F) << 12) - | ((c2 & 0x3F) << 6) - | (c3 & 0x3F); - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else if ((c & 0xF8) == 0xF0) { - const unsigned c2 = *++p; - if ((c2 & 0xC0) == 0x80) { - const unsigned c3 = *++p; - if ((c3 & 0xC0) == 0x80) { - const unsigned c4 = *++p; - if ((c4 & 0xC0) == 0x80) { - u = ((c & 0x07) << 18) - | ((c2 & 0x3F) << 12) - | ((c3 & 0x3F) << 6) - | (c4 & 0x3F); - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - return WIND_ERR_INVALID_UTF8; - } - } else { - u = c; - } + ret = utf8toutf32(&p, &u); + if (ret) + return ret; + if (out) { if (o >= *out_len) return WIND_ERR_OVERRUN; @@ -365,6 +381,67 @@ wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags, /** + * Convert an UTF-8 string to an UCS2 string. + * + * @param in an UTF-8 string to convert. + * @param out the resulting UCS2 strint, must be at least + * wind_utf8ucs2_length() long. If out is NULL, the function will + * calculate the needed space for the out variable (just like + * wind_utf8ucs2_length()). + * @param out_len before processing out_len should be the length of + * the out variable, after processing it will be the length of the out + * string. + * + * @return returns 0 on success, an wind error code otherwise + * @ingroup wind + */ + +int +wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len) +{ + const unsigned char *p; + size_t o = 0; + int ret; + + for (p = (const unsigned char *)in; *p != '\0'; ++p) { + uint32_t u; + + ret = utf8toutf32(&p, &u); + if (ret) + return ret; + + if (u & 0xffff0000) + return WIND_ERR_NOT_UTF16; + + if (out) { + if (o >= *out_len) + return WIND_ERR_OVERRUN; + out[o] = u; + } + o++; + } + *out_len = o; + return 0; +} + +/** + * Calculate the length of from converting a UTF-8 string to a UCS2 + * string. + * + * @param in an UTF-8 string to convert. + * @param out_len the length of the resulting UCS4 string. + * + * @return returns 0 on success, an wind error code otherwise + * @ingroup wind + */ + +int +wind_utf8ucs2_length(const char *in, size_t *out_len) +{ + return wind_utf8ucs2(in, NULL, out_len); +} + +/** * Convert an UCS2 string to a UTF-8 string. * * @param in an UCS2 string to convert. diff --git a/source4/heimdal/lib/wind/wind.h b/source4/heimdal/lib/wind/wind.h index 6921b619f5..3120e87da5 100644 --- a/source4/heimdal/lib/wind/wind.h +++ b/source4/heimdal/lib/wind/wind.h @@ -31,13 +31,13 @@ * SUCH DAMAGE. */ -/* $Id: wind.h 22595 2008-02-12 11:59:05Z lha $ */ +/* $Id: wind.h 23233 2008-06-01 22:25:25Z lha $ */ #ifndef _WIND_H_ #define _WIND_H_ #include <stddef.h> -#include <stdint.h> +#include <krb5-types.h> #include <wind_err.h> @@ -58,9 +58,9 @@ typedef unsigned int wind_profile_flags; #define WIND_RW_BE 2 #define WIND_RW_BOM 4 -int wind_stringprep(const unsigned *in, size_t in_len, - unsigned *out, size_t *out_len, - wind_profile_flags flags); +int wind_stringprep(const uint32_t *, size_t, + uint32_t *, size_t *, + wind_profile_flags); int wind_profile(const char *, wind_profile_flags *); int wind_punycode_label_toascii(const uint32_t *, size_t, @@ -72,6 +72,9 @@ int wind_utf8ucs4_length(const char *, size_t *); int wind_ucs4utf8(const uint32_t *, size_t, char *, size_t *); int wind_ucs4utf8_length(const uint32_t *, size_t, size_t *); +int wind_utf8ucs2(const char *, uint16_t *, size_t *); +int wind_utf8ucs2_length(const char *, size_t *); + int wind_ucs2utf8(const uint16_t *, size_t, char *, size_t *); int wind_ucs2utf8_length(const uint16_t *, size_t, size_t *); diff --git a/source4/heimdal/lib/wind/wind_err.et b/source4/heimdal/lib/wind/wind_err.et index 025c402790..65bdff992f 100644 --- a/source4/heimdal/lib/wind/wind_err.et +++ b/source4/heimdal/lib/wind/wind_err.et @@ -3,7 +3,7 @@ # # This might look like a com_err file, but is not # -id "$Id: wind_err.et 22559 2008-02-03 16:35:07Z lha $" +id "$Id: wind_err.et 23233 2008-06-01 22:25:25Z lha $" error_table wind @@ -18,5 +18,6 @@ error_code INVALID_UTF8, "Invalid UTF-8 combination in string" error_code INVALID_UTF16, "Invalid UTF-16 combination in string" error_code INVALID_UTF32, "Invalid UTF-32 combination in string" error_code NO_BOM, "No byte order mark (BOM) in string" +error_code NOT_UTF16, "Code can't be represented as UTF-16" end diff --git a/source4/heimdal/lib/wind/windlocl.h b/source4/heimdal/lib/wind/windlocl.h index 02e8c46481..009a4ae94a 100644 --- a/source4/heimdal/lib/wind/windlocl.h +++ b/source4/heimdal/lib/wind/windlocl.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. */ -/* $Id: windlocl.h 22582 2008-02-11 20:43:50Z lha $ */ +/* $Id: windlocl.h 23187 2008-05-23 15:04:07Z lha $ */ #ifndef _WINDLOCL_H_ #define _WINDLOCL_H_ @@ -40,6 +40,8 @@ #include <config.h> #endif +#include <krb5-types.h> + #include "wind.h" #include "wind_err.h" |