summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Adam <obnox@samba.org>2010-10-29 22:06:05 +0200
committerMichael Adam <obnox@samba.org>2010-11-03 22:45:19 +0000
commitd41d05ec7b5650759b8b6b388d34516daf0eed83 (patch)
treec021d8bb12e4712437974350a51d4a48393b6043
parentf14d84e2f233dd337bbd9bb0166f8bf0cc8f7a82 (diff)
downloadsamba-d41d05ec7b5650759b8b6b388d34516daf0eed83.tar.gz
samba-d41d05ec7b5650759b8b6b388d34516daf0eed83.tar.bz2
samba-d41d05ec7b5650759b8b6b388d34516daf0eed83.zip
s3:lib/charcnv: add next_codepoint_ext() that accepts input charset.
next_codepoint() takes as string in CH_UNIX encoding and returns the unicode codepoint of the next (possibly multibyte) character of the input string. The new next_codepoint_ext() function adds the encoding of the input string as a parameter. next_codepoint() now only calls next_codepoint_ext() with CH_UNIX als src_charset argument.
-rw-r--r--source3/include/proto.h2
-rw-r--r--source3/lib/charcnv.c43
2 files changed, 34 insertions, 11 deletions
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 5095b1fde4..f76bc32aaa 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -480,6 +480,8 @@ size_t pull_string_talloc_fn(const char *function,
size_t src_len,
int flags);
size_t align_string(const void *base_ptr, const char *p, int flags);
+codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
+ size_t *size);
codepoint_t next_codepoint(const char *str, size_t *size);
/* The following definitions come from lib/clobber.c */
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index 9ac9930267..ce01841f7e 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -1793,17 +1793,23 @@ size_t align_string(const void *base_ptr, const char *p, int flags)
return 0;
}
-/*
- Return the unicode codepoint for the next multi-byte CH_UNIX character
- in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
-
- Also return the number of bytes consumed (which tells the caller
- how many bytes to skip to get to the next CH_UNIX character).
-
- Return INVALID_CODEPOINT if the next character cannot be converted.
-*/
+/**
+ * Return the unicode codepoint for the next character in the input
+ * string in the given src_charset.
+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+ *
+ * Also return the number of bytes consumed (which tells the caller
+ * how many bytes to skip to get to the next src_charset-character).
+ *
+ * This is implemented (in the non-ascii-case) by first converting the
+ * next character in the input string to UTF16_LE and then calculating
+ * the unicode codepoint from that.
+ *
+ * Return INVALID_CODEPOINT if the next character cannot be converted.
+ */
-codepoint_t next_codepoint(const char *str, size_t *size)
+codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
+ size_t *size)
{
/* It cannot occupy more than 4 bytes in UTF16 format */
uint8_t buf[4];
@@ -1827,7 +1833,7 @@ codepoint_t next_codepoint(const char *str, size_t *size)
lazy_initialize_conv();
- descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
+ descriptor = conv_handles[src_charset][CH_UTF16LE];
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
*size = 1;
return INVALID_CODEPOINT;
@@ -1877,6 +1883,21 @@ codepoint_t next_codepoint(const char *str, size_t *size)
}
/*
+ Return the unicode codepoint for the next multi-byte CH_UNIX character
+ in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+
+ Also return the number of bytes consumed (which tells the caller
+ how many bytes to skip to get to the next CH_UNIX character).
+
+ Return INVALID_CODEPOINT if the next character cannot be converted.
+*/
+
+codepoint_t next_codepoint(const char *str, size_t *size)
+{
+ return next_codepoint_ext(str, CH_UNIX, size);
+}
+
+/*
push a single codepoint into a CH_UNIX string the target string must
be able to hold the full character, which is guaranteed if it is at
least 5 bytes in size. The caller may pass less than 5 bytes if they