summaryrefslogtreecommitdiff
path: root/source4/heimdal/lib/wind/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'source4/heimdal/lib/wind/utf8.c')
-rw-r--r--source4/heimdal/lib/wind/utf8.c179
1 files changed, 128 insertions, 51 deletions
diff --git a/source4/heimdal/lib/wind/utf8.c b/source4/heimdal/lib/wind/utf8.c
index c49e80522e..544e0fe00d 100644
--- a/source4/heimdal/lib/wind/utf8.c
+++ b/source4/heimdal/lib/wind/utf8.c
@@ -36,7 +36,68 @@
#endif
#include "windlocl.h"
-RCSID("$Id: utf8.c 22572 2008-02-05 20:22:39Z lha $");
+RCSID("$Id: utf8.c 23246 2008-06-01 22:29:04Z lha $");
+
+static int
+utf8toutf32(const unsigned char **pp, uint32_t *out)
+{
+ const unsigned char *p = *pp;
+ unsigned c = *p;
+
+ if (c & 0x80) {
+ if ((c & 0xE0) == 0xC0) {
+ const unsigned c2 = *++p;
+ if ((c2 & 0xC0) == 0x80) {
+ *out = ((c & 0x1F) << 6)
+ | (c2 & 0x3F);
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else if ((c & 0xF0) == 0xE0) {
+ const unsigned c2 = *++p;
+ if ((c2 & 0xC0) == 0x80) {
+ const unsigned c3 = *++p;
+ if ((c3 & 0xC0) == 0x80) {
+ *out = ((c & 0x0F) << 12)
+ | ((c2 & 0x3F) << 6)
+ | (c3 & 0x3F);
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else if ((c & 0xF8) == 0xF0) {
+ const unsigned c2 = *++p;
+ if ((c2 & 0xC0) == 0x80) {
+ const unsigned c3 = *++p;
+ if ((c3 & 0xC0) == 0x80) {
+ const unsigned c4 = *++p;
+ if ((c4 & 0xC0) == 0x80) {
+ *out = ((c & 0x07) << 18)
+ | ((c2 & 0x3F) << 12)
+ | ((c3 & 0x3F) << 6)
+ | (c4 & 0x3F);
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else {
+ return WIND_ERR_INVALID_UTF8;
+ }
+ } else {
+ *out = c;
+ }
+
+ *pp = p;
+
+ return 0;
+}
/**
* Convert an UTF-8 string to an UCS4 string.
@@ -59,60 +120,15 @@ wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len)
{
const unsigned char *p;
size_t o = 0;
+ int ret;
for (p = (const unsigned char *)in; *p != '\0'; ++p) {
- unsigned c = *p;
uint32_t u;
- if (c & 0x80) {
- if ((c & 0xE0) == 0xC0) {
- const unsigned c2 = *++p;
- if ((c2 & 0xC0) == 0x80) {
- u = ((c & 0x1F) << 6)
- | (c2 & 0x3F);
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else if ((c & 0xF0) == 0xE0) {
- const unsigned c2 = *++p;
- if ((c2 & 0xC0) == 0x80) {
- const unsigned c3 = *++p;
- if ((c3 & 0xC0) == 0x80) {
- u = ((c & 0x0F) << 12)
- | ((c2 & 0x3F) << 6)
- | (c3 & 0x3F);
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else if ((c & 0xF8) == 0xF0) {
- const unsigned c2 = *++p;
- if ((c2 & 0xC0) == 0x80) {
- const unsigned c3 = *++p;
- if ((c3 & 0xC0) == 0x80) {
- const unsigned c4 = *++p;
- if ((c4 & 0xC0) == 0x80) {
- u = ((c & 0x07) << 18)
- | ((c2 & 0x3F) << 12)
- | ((c3 & 0x3F) << 6)
- | (c4 & 0x3F);
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else {
- return WIND_ERR_INVALID_UTF8;
- }
- } else {
- u = c;
- }
+ ret = utf8toutf32(&p, &u);
+ if (ret)
+ return ret;
+
if (out) {
if (o >= *out_len)
return WIND_ERR_OVERRUN;
@@ -365,6 +381,67 @@ wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags,
/**
+ * Convert an UTF-8 string to an UCS2 string.
+ *
+ * @param in an UTF-8 string to convert.
+ * @param out the resulting UCS2 strint, must be at least
+ * wind_utf8ucs2_length() long. If out is NULL, the function will
+ * calculate the needed space for the out variable (just like
+ * wind_utf8ucs2_length()).
+ * @param out_len before processing out_len should be the length of
+ * the out variable, after processing it will be the length of the out
+ * string.
+ *
+ * @return returns 0 on success, an wind error code otherwise
+ * @ingroup wind
+ */
+
+int
+wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len)
+{
+ const unsigned char *p;
+ size_t o = 0;
+ int ret;
+
+ for (p = (const unsigned char *)in; *p != '\0'; ++p) {
+ uint32_t u;
+
+ ret = utf8toutf32(&p, &u);
+ if (ret)
+ return ret;
+
+ if (u & 0xffff0000)
+ return WIND_ERR_NOT_UTF16;
+
+ if (out) {
+ if (o >= *out_len)
+ return WIND_ERR_OVERRUN;
+ out[o] = u;
+ }
+ o++;
+ }
+ *out_len = o;
+ return 0;
+}
+
+/**
+ * Calculate the length of from converting a UTF-8 string to a UCS2
+ * string.
+ *
+ * @param in an UTF-8 string to convert.
+ * @param out_len the length of the resulting UCS4 string.
+ *
+ * @return returns 0 on success, an wind error code otherwise
+ * @ingroup wind
+ */
+
+int
+wind_utf8ucs2_length(const char *in, size_t *out_len)
+{
+ return wind_utf8ucs2(in, NULL, out_len);
+}
+
+/**
* Convert an UCS2 string to a UTF-8 string.
*
* @param in an UCS2 string to convert.