lib/charset/charcnv: add next_codepoint_convenience_ext() that accepts input charset.

next_codepoint_convenience() takes as string in CH_UNIX encoding and returns the unicode codepoint of the next (possibly multibyte) character of the input string. The new next_codepoint_convenience_ext() function adds the encoding of the input string as a parameter. next_codepoint_convenience() now only calls next_codepoint_convenience_ext() with CH_UNIX als src_charset argument.
author: Michael Adam <obnox@samba.org> 2010-10-29 22:06:05 +0200
committer: Michael Adam <obnox@samba.org> 2010-11-03 22:45:19 +0000
commit: 5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b (patch)
tree: e44cbf22f6c4ddfe17e95d8af4950b451db88a16 /lib/util/charset
parent: a50d3638a7440e30a0cbfb3bffe4712ff68e350a (diff)
download: samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.gz
samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.bz2
samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.zip
2 files changed, 37 insertions, 11 deletions
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c
index e9f6ab0d94..25a54cca13 100644
--- a/lib/util/charset/charcnv.c
+++ b/lib/util/charset/charcnv.c
@@ -373,17 +373,25 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx,
 	return true;
 }
 
-/*
-  return the unicode codepoint for the next multi-byte CH_UNIX character
-  in the string
 
-  also return the number of bytes consumed (which tells the caller
-  how many bytes to skip to get to the next CH_UNIX character)
-
-  return INVALID_CODEPOINT if the next character cannot be converted
-*/
-_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, 
-				    const char *str, size_t *size)
+/**
+ * Return the unicode codepoint for the next character in the input
+ * string in the given src_charset.
+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+ *
+ * Also return the number of bytes consumed (which tells the caller
+ * how many bytes to skip to get to the next src_charset-character).
+ *
+ * This is implemented (in the non-ascii-case) by first converting the
+ * next character in the input string to UTF16_LE and then calculating
+ * the unicode codepoint from that.
+ *
+ * Return INVALID_CODEPOINT if the next character cannot be converted.
+ */
+_PUBLIC_ codepoint_t next_codepoint_convenience_ext(
+			struct smb_iconv_convenience *ic,
+			const char *str, charset_t src_charset,
+			size_t *size)
 {
 	/* it cannot occupy more than 4 bytes in UTF16 format */
 	uint8_t buf[4];
@@ -404,7 +412,7 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic
 	ilen_orig = strnlen(str, 5);
 	ilen = ilen_orig;
 
-	descriptor = get_conv_handle(ic, CH_UNIX, CH_UTF16);
+	descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
 	if (descriptor == (smb_iconv_t)-1) {
 		*size = 1;
 		return INVALID_CODEPOINT;
@@ -446,6 +454,21 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic
 }
 
 /*
+  return the unicode codepoint for the next multi-byte CH_UNIX character
+  in the string
+
+  also return the number of bytes consumed (which tells the caller
+  how many bytes to skip to get to the next CH_UNIX character)
+
+  return INVALID_CODEPOINT if the next character cannot be converted
+*/
+_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
+				    const char *str, size_t *size)
+{
+	return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
+}
+
+/*
   push a single codepoint into a CH_UNIX string the target string must
   be able to hold the full character, which is guaranteed if it is at
   least 5 bytes in size. The caller may pass less than 5 bytes if they
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index a66e24f2b9..7960631484 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -172,6 +172,9 @@ codepoint_t next_codepoint(const char *str, size_t *size);
 ssize_t push_codepoint(char *str, codepoint_t c);
 
 /* codepoints */
+codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic,
+			    const char *str, charset_t src_charset,
+			    size_t *size);
 codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, 
 			    const char *str, size_t *size);
 ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
author	Michael Adam <obnox@samba.org>	2010-10-29 22:06:05 +0200
committer	Michael Adam <obnox@samba.org>	2010-11-03 22:45:19 +0000
commit	5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b (patch)
tree	e44cbf22f6c4ddfe17e95d8af4950b451db88a16 /lib/util/charset
parent	a50d3638a7440e30a0cbfb3bffe4712ff68e350a (diff)
download	samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.gz samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.tar.bz2 samba-5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b.zip