Use common header file for character set handling in Samba 3 and Samba 4.

author: Jelmer Vernooij <jelmer@samba.org> 2009-03-01 22:24:34 +0100
committer: Jelmer Vernooij <jelmer@samba.org> 2009-03-01 22:24:34 +0100
commit: 7ab8f373c84c328b197f923287163e83371e7ccb (patch)
tree: 8a009647735ccc1cf181d98b01c1084f45e0d711 /lib/util
parent: 94069bd2747a8397308c0b0b384f7bb4edd8f68b (diff)
download: samba-7ab8f373c84c328b197f923287163e83371e7ccb.tar.gz
samba-7ab8f373c84c328b197f923287163e83371e7ccb.tar.bz2
samba-7ab8f373c84c328b197f923287163e83371e7ccb.zip
3 files changed, 122 insertions, 38 deletions
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 1f24f8985f..655bae7bcd 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -28,9 +28,16 @@
 #include <talloc.h>
 
 /* this defines the charset types used in samba */
-typedef enum {CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
+typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
 
-#define NUM_CHARSETS 6
+#define NUM_CHARSETS 7
+
+/*
+ * SMB UCS2 (16-bit unicode) internal type.
+ * smb_ucs2_t is *always* in little endian format.
+ */
+
+typedef uint16_t smb_ucs2_t;
 
 /*
  *   for each charset we have a function that pulls from that charset to
@@ -51,6 +58,17 @@ typedef uint32_t codepoint_t;
 
 #define INVALID_CODEPOINT ((codepoint_t)-1)
 
+/*
+ * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script
+ * during generation of an encoding table for charset module
+ *     */
+
+struct charset_gap_table {
+  uint16_t start;
+  uint16_t end;
+  int32_t idx;
+};
+
 
 /* generic iconv conversion structure */
 typedef struct smb_iconv_s {
@@ -106,12 +124,12 @@ bool strhaslower(const char *string);
 char *strrchr_m(const char *s, char c);
 char *strchr_m(const char *s, char c);
 
-ssize_t push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src);
-ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src);
-ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src);
-ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src);
-ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src);
-ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src);
+bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
+bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size);
+bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
+bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
+bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size);
+bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags);
 ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags);
 
@@ -171,4 +189,81 @@ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
 void load_case_tables(void);
 bool charset_register_backend(const void *_funcs);
 
+/*
+ *   Define stub for charset module which implements 8-bit encoding with gaps.
+ *   Encoding tables for such module should be produced from glibc's CHARMAPs
+ *   using script source/script/gen-8bit-gap.sh
+ *   CHARSETNAME is CAPITALIZED charset name
+ *
+ *     */
+#define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME) 					\
+static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft,			\
+			 char **outbuf, size_t *outbytesleft) 					\
+{ 												\
+	while (*inbytesleft >= 2 && *outbytesleft >= 1) { 					\
+		int i; 										\
+		int done = 0; 									\
+												\
+		uint16 ch = SVAL(*inbuf,0); 							\
+												\
+		for (i=0; from_idx[i].start != 0xffff; i++) {					\
+			if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) {		\
+				((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch];	\
+				(*inbytesleft) -= 2;						\
+				(*outbytesleft) -= 1;						\
+				(*inbuf)  += 2;							\
+				(*outbuf) += 1;							\
+				done = 1;							\
+				break;								\
+			}									\
+		}										\
+		if (!done) {									\
+			errno = EINVAL;								\
+			return -1;								\
+		}										\
+												\
+	}											\
+												\
+	if (*inbytesleft == 1) {								\
+		errno = EINVAL;									\
+		return -1;									\
+	}											\
+												\
+	if (*inbytesleft > 1) {									\
+		errno = E2BIG;									\
+		return -1;									\
+	}											\
+												\
+	return 0;										\
+}												\
+												\
+static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft,				\
+			 char **outbuf, size_t *outbytesleft)					\
+{												\
+	while (*inbytesleft >= 1 && *outbytesleft >= 2) {					\
+		*(uint16*)(*outbuf) = to_ucs2[((unsigned char*)(*inbuf))[0]];			\
+		(*inbytesleft)  -= 1;								\
+		(*outbytesleft) -= 2;								\
+		(*inbuf)  += 1;									\
+		(*outbuf) += 2;									\
+	}											\
+												\
+	if (*inbytesleft > 0) {									\
+		errno = E2BIG;									\
+		return -1;									\
+	}											\
+												\
+	return 0;										\
+}												\
+												\
+struct charset_functions CHARSETNAME ## _functions = 						\
+		{#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push};			\
+												\
+NTSTATUS charset_ ## CHARSETNAME ## _init(void);							\
+NTSTATUS charset_ ## CHARSETNAME ## _init(void)							\
+{												\
+	return smb_register_charset(& CHARSETNAME ## _functions);				\
+}												\
+
+
 #endif /* __CHARSET_H__ */
diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c
index b6842a49aa..98284ce9bd 100644
--- a/lib/util/charset/iconv.c
+++ b/lib/util/charset/iconv.c
@@ -22,6 +22,7 @@
 #include "../lib/util/dlinklist.h"
 #include "system/iconv.h"
 #include "system/filesys.h"
+#undef strcasecmp
 
 
 /**
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index 13178ea5cc..ec88e784d0 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -668,13 +668,11 @@ static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flag
  * @returns The number of bytes occupied by the string in the destination
  *         or -1 in case of error.
  **/
-_PUBLIC_ ssize_t push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
+_PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 {
-	size_t src_len = strlen(src)+1, ret;
+	size_t src_len = strlen(src)+1;
 	*dest = NULL;
-	if (!convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, &ret, false))
-		return -1;
-	return (ssize_t)ret;
+	return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
 }
 
 
@@ -781,13 +779,11 @@ static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags
  * @returns The number of bytes occupied by the string in the destination
  *         or -1 in case of error.
  **/
-_PUBLIC_ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
+_PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
 {
-	size_t src_len = strlen(src)+1, ret;
+	size_t src_len = strlen(src)+1;
 	*dest = NULL;
-	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, dest, &ret, false))
-		return -1;
-	return ret;
+	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
 }
 
 
@@ -799,13 +795,11 @@ _PUBLIC_ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src)
  * @returns The number of bytes occupied by the string in the destination
  **/
 
-_PUBLIC_ ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
+_PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 {
-	size_t src_len = strlen(src)+1, ret;
+	size_t src_len = strlen(src)+1;
 	*dest = NULL;
-	if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, &ret, false))
-		return -1;
-	return ret;
+	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
 }
 
 /**
@@ -856,13 +850,11 @@ static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src
  * @returns The number of bytes occupied by the string in the destination
  **/
 
-_PUBLIC_ ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
+_PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 {
-	size_t src_len = strlen(src)+1, ret;
+	size_t src_len = strlen(src)+1;
 	*dest = NULL;
-	if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, &ret, false))
-		return -1;
-	return ret;
+	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 }
 
 /**
@@ -873,13 +865,11 @@ _PUBLIC_ ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src
  * @returns The number of bytes occupied by the string in the destination
  **/
 
-_PUBLIC_ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src)
+_PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
 {
-	size_t src_len = utf16_len(src), ret;
+	size_t src_len = utf16_len(src);
 	*dest = NULL;
-	if (!convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, &ret, false))
-		return -1;
-	return ret;
+	return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 }
 
 /**
@@ -890,13 +880,11 @@ _PUBLIC_ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src)
  * @returns The number of bytes occupied by the string in the destination
  **/
 
-_PUBLIC_ ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
+_PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
 {
-	size_t src_len = strlen(src)+1, ret;
+	size_t src_len = strlen(src)+1;
 	*dest = NULL;
-	if (!convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, &ret, false))
-		return -1;
-	return ret;
+	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
 }
 
 /**
author	Jelmer Vernooij <jelmer@samba.org>	2009-03-01 22:24:34 +0100
committer	Jelmer Vernooij <jelmer@samba.org>	2009-03-01 22:24:34 +0100
commit	7ab8f373c84c328b197f923287163e83371e7ccb (patch)
tree	8a009647735ccc1cf181d98b01c1084f45e0d711 /lib/util
parent	94069bd2747a8397308c0b0b384f7bb4edd8f68b (diff)
download	samba-7ab8f373c84c328b197f923287163e83371e7ccb.tar.gz samba-7ab8f373c84c328b197f923287163e83371e7ccb.tar.bz2 samba-7ab8f373c84c328b197f923287163e83371e7ccb.zip