summaryrefslogtreecommitdiff
path: root/lib/util/charset
diff options
context:
space:
mode:
authorMichael Adam <obnox@samba.org>2010-10-29 22:06:05 +0200
committerMichael Adam <obnox@samba.org>2010-11-03 22:45:19 +0000
commit5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b (patch)
treee44cbf22f6c4ddfe17e95d8af4950b451db88a16 /lib/util/charset
parenta50d3638a7440e30a0cbfb3bffe4712ff68e350a (diff)
downloadsamba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.gz
samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.bz2
samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.zip
lib/charset/charcnv: add next_codepoint_convenience_ext() that accepts input charset.
next_codepoint_convenience() takes as string in CH_UNIX encoding and returns the unicode codepoint of the next (possibly multibyte) character of the input string. The new next_codepoint_convenience_ext() function adds the encoding of the input string as a parameter. next_codepoint_convenience() now only calls next_codepoint_convenience_ext() with CH_UNIX als src_charset argument.
Diffstat (limited to 'lib/util/charset')
-rw-r--r--lib/util/charset/charcnv.c45
-rw-r--r--lib/util/charset/charset.h3
2 files changed, 37 insertions, 11 deletions
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c
index e9f6ab0d94..25a54cca13 100644
--- a/lib/util/charset/charcnv.c
+++ b/lib/util/charset/charcnv.c
@@ -373,17 +373,25 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx,
return true;
}
-/*
- return the unicode codepoint for the next multi-byte CH_UNIX character
- in the string
- also return the number of bytes consumed (which tells the caller
- how many bytes to skip to get to the next CH_UNIX character)
-
- return INVALID_CODEPOINT if the next character cannot be converted
-*/
-_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
- const char *str, size_t *size)
+/**
+ * Return the unicode codepoint for the next character in the input
+ * string in the given src_charset.
+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+ *
+ * Also return the number of bytes consumed (which tells the caller
+ * how many bytes to skip to get to the next src_charset-character).
+ *
+ * This is implemented (in the non-ascii-case) by first converting the
+ * next character in the input string to UTF16_LE and then calculating
+ * the unicode codepoint from that.
+ *
+ * Return INVALID_CODEPOINT if the next character cannot be converted.
+ */
+_PUBLIC_ codepoint_t next_codepoint_convenience_ext(
+ struct smb_iconv_convenience *ic,
+ const char *str, charset_t src_charset,
+ size_t *size)
{
/* it cannot occupy more than 4 bytes in UTF16 format */
uint8_t buf[4];
@@ -404,7 +412,7 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic
ilen_orig = strnlen(str, 5);
ilen = ilen_orig;
- descriptor = get_conv_handle(ic, CH_UNIX, CH_UTF16);
+ descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
if (descriptor == (smb_iconv_t)-1) {
*size = 1;
return INVALID_CODEPOINT;
@@ -446,6 +454,21 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic
}
/*
+ return the unicode codepoint for the next multi-byte CH_UNIX character
+ in the string
+
+ also return the number of bytes consumed (which tells the caller
+ how many bytes to skip to get to the next CH_UNIX character)
+
+ return INVALID_CODEPOINT if the next character cannot be converted
+*/
+_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
+ const char *str, size_t *size)
+{
+ return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
+}
+
+/*
push a single codepoint into a CH_UNIX string the target string must
be able to hold the full character, which is guaranteed if it is at
least 5 bytes in size. The caller may pass less than 5 bytes if they
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index a66e24f2b9..7960631484 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -172,6 +172,9 @@ codepoint_t next_codepoint(const char *str, size_t *size);
ssize_t push_codepoint(char *str, codepoint_t c);
/* codepoints */
+codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic,
+ const char *str, charset_t src_charset,
+ size_t *size);
codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
const char *str, size_t *size);
ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,