From 7ab8f373c84c328b197f923287163e83371e7ccb Mon Sep 17 00:00:00 2001 From: Jelmer Vernooij Date: Sun, 1 Mar 2009 22:24:34 +0100 Subject: Use common header file for character set handling in Samba 3 and Samba 4. --- lib/util/charset/charset.h | 111 ++++++++++++++++++++++++++++++++++++++--- lib/util/charset/iconv.c | 1 + lib/util/charset/util_unistr.c | 48 +++++++----------- 3 files changed, 122 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 1f24f8985f..655bae7bcd 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -28,9 +28,16 @@ #include /* this defines the charset types used in samba */ -typedef enum {CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; +typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; -#define NUM_CHARSETS 6 +#define NUM_CHARSETS 7 + +/* + * SMB UCS2 (16-bit unicode) internal type. + * smb_ucs2_t is *always* in little endian format. + */ + +typedef uint16_t smb_ucs2_t; /* * for each charset we have a function that pulls from that charset to @@ -51,6 +58,17 @@ typedef uint32_t codepoint_t; #define INVALID_CODEPOINT ((codepoint_t)-1) +/* + * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script + * during generation of an encoding table for charset module + * */ + +struct charset_gap_table { + uint16_t start; + uint16_t end; + int32_t idx; +}; + /* generic iconv conversion structure */ typedef struct smb_iconv_s { @@ -106,12 +124,12 @@ bool strhaslower(const char *string); char *strrchr_m(const char *s, char c); char *strchr_m(const char *s, char c); -ssize_t push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src); -ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src); -ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src); -ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src); -ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src); -ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src); +bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); +bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size); +bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); +bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); +bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size); +bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags); ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags); @@ -171,4 +189,81 @@ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, void load_case_tables(void); bool charset_register_backend(const void *_funcs); +/* + * Define stub for charset module which implements 8-bit encoding with gaps. + * Encoding tables for such module should be produced from glibc's CHARMAPs + * using script source/script/gen-8bit-gap.sh + * CHARSETNAME is CAPITALIZED charset name + * + * */ +#define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME) \ +static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft, \ + char **outbuf, size_t *outbytesleft) \ +{ \ + while (*inbytesleft >= 2 && *outbytesleft >= 1) { \ + int i; \ + int done = 0; \ + \ + uint16 ch = SVAL(*inbuf,0); \ + \ + for (i=0; from_idx[i].start != 0xffff; i++) { \ + if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \ + ((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch]; \ + (*inbytesleft) -= 2; \ + (*outbytesleft) -= 1; \ + (*inbuf) += 2; \ + (*outbuf) += 1; \ + done = 1; \ + break; \ + } \ + } \ + if (!done) { \ + errno = EINVAL; \ + return -1; \ + } \ + \ + } \ + \ + if (*inbytesleft == 1) { \ + errno = EINVAL; \ + return -1; \ + } \ + \ + if (*inbytesleft > 1) { \ + errno = E2BIG; \ + return -1; \ + } \ + \ + return 0; \ +} \ + \ +static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft, \ + char **outbuf, size_t *outbytesleft) \ +{ \ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { \ + *(uint16*)(*outbuf) = to_ucs2[((unsigned char*)(*inbuf))[0]]; \ + (*inbytesleft) -= 1; \ + (*outbytesleft) -= 2; \ + (*inbuf) += 1; \ + (*outbuf) += 2; \ + } \ + \ + if (*inbytesleft > 0) { \ + errno = E2BIG; \ + return -1; \ + } \ + \ + return 0; \ +} \ + \ +struct charset_functions CHARSETNAME ## _functions = \ + {#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push}; \ + \ +NTSTATUS charset_ ## CHARSETNAME ## _init(void); \ +NTSTATUS charset_ ## CHARSETNAME ## _init(void) \ +{ \ + return smb_register_charset(& CHARSETNAME ## _functions); \ +} \ + + #endif /* __CHARSET_H__ */ diff --git a/lib/util/charset/iconv.c b/lib/util/charset/iconv.c index b6842a49aa..98284ce9bd 100644 --- a/lib/util/charset/iconv.c +++ b/lib/util/charset/iconv.c @@ -22,6 +22,7 @@ #include "../lib/util/dlinklist.h" #include "system/iconv.h" #include "system/filesys.h" +#undef strcasecmp /** diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index 13178ea5cc..ec88e784d0 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -668,13 +668,11 @@ static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flag * @returns The number of bytes occupied by the string in the destination * or -1 in case of error. **/ -_PUBLIC_ ssize_t push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src) +_PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) { - size_t src_len = strlen(src)+1, ret; + size_t src_len = strlen(src)+1; *dest = NULL; - if (!convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, &ret, false)) - return -1; - return (ssize_t)ret; + return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false); } @@ -781,13 +779,11 @@ static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags * @returns The number of bytes occupied by the string in the destination * or -1 in case of error. **/ -_PUBLIC_ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src) +_PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size) { - size_t src_len = strlen(src)+1, ret; + size_t src_len = strlen(src)+1; *dest = NULL; - if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, dest, &ret, false)) - return -1; - return ret; + return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false); } @@ -799,13 +795,11 @@ _PUBLIC_ ssize_t push_ucs2_talloc(TALLOC_CTX *ctx, void **dest, const char *src) * @returns The number of bytes occupied by the string in the destination **/ -_PUBLIC_ ssize_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) +_PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) { - size_t src_len = strlen(src)+1, ret; + size_t src_len = strlen(src)+1; *dest = NULL; - if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, &ret, false)) - return -1; - return ret; + return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false); } /** @@ -856,13 +850,11 @@ static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src * @returns The number of bytes occupied by the string in the destination **/ -_PUBLIC_ ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src) +_PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) { - size_t src_len = strlen(src)+1, ret; + size_t src_len = strlen(src)+1; *dest = NULL; - if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, &ret, false)) - return -1; - return ret; + return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false); } /** @@ -873,13 +865,11 @@ _PUBLIC_ ssize_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src * @returns The number of bytes occupied by the string in the destination **/ -_PUBLIC_ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src) +_PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size) { - size_t src_len = utf16_len(src), ret; + size_t src_len = utf16_len(src); *dest = NULL; - if (!convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, &ret, false)) - return -1; - return ret; + return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false); } /** @@ -890,13 +880,11 @@ _PUBLIC_ ssize_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const void *src) * @returns The number of bytes occupied by the string in the destination **/ -_PUBLIC_ ssize_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src) +_PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) { - size_t src_len = strlen(src)+1, ret; + size_t src_len = strlen(src)+1; *dest = NULL; - if (!convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, &ret, false)) - return -1; - return ret; + return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false); } /** -- cgit