diff options
author | Andrew Tridgell <tridge@samba.org> | 2001-07-22 07:38:32 +0000 |
---|---|---|
committer | Andrew Tridgell <tridge@samba.org> | 2001-07-22 07:38:32 +0000 |
commit | 49514266131aa55b4a97f02b39b2d5e5c1f625a2 (patch) | |
tree | 81cc9bf796345a882305af58bd18caa705d4e774 /source3 | |
parent | 512351db92b4e13c2944f740090ec58730d8fa06 (diff) | |
download | samba-49514266131aa55b4a97f02b39b2d5e5c1f625a2.tar.gz samba-49514266131aa55b4a97f02b39b2d5e5c1f625a2.tar.bz2 samba-49514266131aa55b4a97f02b39b2d5e5c1f625a2.zip |
changed the iconv interface to go via ucs2 for all conversions. This
fixes some problems wih some character sets and allows for using
internal charsets in conjunction with ionv charsets
this makes us slower but more correct. speed will come later.
(This used to be commit 594f84b4e39182dcf344c02dc0185376a2726395)
Diffstat (limited to 'source3')
-rw-r--r-- | source3/include/includes.h | 5 | ||||
-rw-r--r-- | source3/include/smb.h | 10 | ||||
-rw-r--r-- | source3/lib/charcnv.c | 2 | ||||
-rw-r--r-- | source3/lib/iconv.c | 257 |
4 files changed, 195 insertions, 79 deletions
diff --git a/source3/include/includes.h b/source3/include/includes.h index 3d6147f587..51f2ca9ee8 100644 --- a/source3/include/includes.h +++ b/source3/include/includes.h @@ -1029,6 +1029,11 @@ extern int DEBUGLEVEL; #define RTLD_NOW 0 #endif +/* needed for some systems without iconv */ +#ifndef EILSEQ +#define EILSEQ EINVAL +#endif + /* add varargs prototypes with printf checking */ int fdprintf(int , char *, ...) PRINTF_ATTRIBUTE(2,3); #ifndef HAVE_SNPRINTF_DECL diff --git a/source3/include/smb.h b/source3/include/smb.h index 091ed2ac66..2e83e1c637 100644 --- a/source3/include/smb.h +++ b/source3/include/smb.h @@ -1687,15 +1687,13 @@ typedef struct { /* generic iconv conversion structure */ typedef struct { - size_t (*direct)(char **inbuf, size_t *inbytesleft, + size_t (*direct)(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); - size_t (*pull)(char **inbuf, size_t *inbytesleft, + size_t (*pull)(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); - size_t (*push)(char **inbuf, size_t *inbytesleft, + size_t (*push)(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); -#ifdef HAVE_NATIVE_ICONV - iconv_t cd; -#endif + void *cd_direct, *cd_pull, *cd_push; } *smb_iconv_t; #endif /* _SMB_H */ diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 6c7646673d..49c4ba4063 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -97,9 +97,7 @@ static size_t convert_string(smb_iconv_t *descriptor, DEBUG(0, ("Required %d, available %d\n", srclen, destlen)); break; -#ifdef EILSEQ case EILSEQ: reason="Illegal myltybyte sequence"; break; -#endif } DEBUG(0,("Conversion error:%s(%s)\n",reason,inbuf)); /* smb_panic(reason); */ diff --git a/source3/lib/iconv.c b/source3/lib/iconv.c index b73ff6ff39..2285d8debf 100644 --- a/source3/lib/iconv.c +++ b/source3/lib/iconv.c @@ -21,13 +21,15 @@ #include "includes.h" -static size_t ascii_pull(char **, size_t *, char **, size_t *); -static size_t ascii_push(char **, size_t *, char **, size_t *); -static size_t utf8_pull(char **, size_t *, char **, size_t *); -static size_t utf8_push(char **, size_t *, char **, size_t *); -static size_t weird_pull(char **, size_t *, char **, size_t *); -static size_t weird_push(char **, size_t *, char **, size_t *); -static size_t iconv_copy(char **, size_t *, char **, size_t *); +static size_t ascii_pull(void *,char **, size_t *, char **, size_t *); +static size_t ascii_push(void *,char **, size_t *, char **, size_t *); +static size_t utf8_pull(void *,char **, size_t *, char **, size_t *); +static size_t utf8_push(void *,char **, size_t *, char **, size_t *); +static size_t weird_pull(void *,char **, size_t *, char **, size_t *); +static size_t weird_push(void *,char **, size_t *, char **, size_t *); +static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *); +static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *); +static size_t iconv_copy(void *,char **, size_t *, char **, size_t *); /* for each charset we have a function that pulls from that charset to @@ -35,18 +37,39 @@ static size_t iconv_copy(char **, size_t *, char **, size_t *); */ static struct { char *name; - size_t (*pull)(char **inbuf, size_t *inbytesleft, + size_t (*pull)(void *, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); - size_t (*push)(char **inbuf, size_t *inbytesleft, + size_t (*push)(void *, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); } charsets[] = { {"UCS-2LE", iconv_copy, iconv_copy}, {"UTF8", utf8_pull, utf8_push}, {"ASCII", ascii_pull, ascii_push}, {"WEIRD", weird_pull, weird_push}, + {"UCS2-HEX", ucs2hex_pull, ucs2hex_push}, {NULL, NULL, NULL} }; + +/* if there was an error then reset the internal state, + this ensures that we don't have a shift state remaining for + character sets like SJIS */ +static size_t sys_iconv(void *cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ +#ifdef HAVE_NATIVE_ICONV + size_t ret = iconv((iconv_t)cd, + inbuf, inbytesleft, + outbuf, outbytesleft); + if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL); + return ret; +#else + errno = EINVAL; + return -1; +#endif +} + /* this is a simple portable iconv() implementaion. It only knows about a very small number of character sets - just enough that Samba works @@ -60,36 +83,28 @@ size_t smb_iconv(smb_iconv_t cd, char *bufp = cvtbuf; size_t bufsize; -#ifdef HAVE_NATIVE_ICONV - if (cd->cd) { - size_t ret; - ret = iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); - - /* if there was an error then reset the internal state, - this ensures that we don't have a shift state remaining for - character sets like SJIS */ - if (ret == (size_t)-1) { - iconv(cd->cd, NULL, NULL, NULL, NULL); - } - return ret; - } -#endif - - /* in most cases we can go direct */ + /* in many cases we can go direct */ if (cd->direct) { - return cd->direct(inbuf, inbytesleft, outbuf, outbytesleft); + return cd->direct(cd->cd_direct, + inbuf, inbytesleft, outbuf, outbytesleft); } + /* otherwise we have to do it chunks at a time */ while (*inbytesleft > 0) { bufp = cvtbuf; bufsize = sizeof(cvtbuf); - if (cd->pull(inbuf, inbytesleft, &bufp, &bufsize) == -1 && - errno != E2BIG) return -1; + + if (cd->pull(cd->cd_pull, + inbuf, inbytesleft, &bufp, &bufsize) == -1 + && errno != E2BIG) return -1; bufp = cvtbuf; bufsize = sizeof(cvtbuf) - bufsize; - if (cd->push(&bufp, &bufsize, outbuf, outbytesleft) == -1) return -1; + + if (cd->push(cd->cd_push, + &bufp, &bufsize, + outbuf, outbytesleft) == -1) return -1; } return 0; @@ -102,9 +117,19 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode) { smb_iconv_t ret; int from, to; -#ifdef HAVE_NATIVE_ICONV - iconv_t cd = NULL; -#endif + + ret = (smb_iconv_t)malloc(sizeof(*ret)); + if (!ret) { + errno = ENOMEM; + return (smb_iconv_t)-1; + } + memset(ret, 0, sizeof(*ret)); + + /* check for the simplest null conversion */ + if (strcmp(fromcode, tocode) == 0) { + ret->direct = iconv_copy; + return ret; + } for (from=0; charsets[from].name; from++) { if (strcasecmp(charsets[from].name, fromcode) == 0) break; @@ -113,53 +138,57 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode) if (strcasecmp(charsets[to].name, tocode) == 0) break; } - if (!charsets[from].name || !charsets[to].name) { #ifdef HAVE_NATIVE_ICONV - /* its not builtin - see if iconv() has it */ - cd = iconv_open(tocode, fromcode); - if (!cd) -#endif - { - errno = EINVAL; - return (smb_iconv_t)-1; - } + if (!charsets[from].name) { + ret->pull = sys_iconv; + ret->cd_pull = iconv_open("UCS-2LE", fromcode); + if (!ret->cd_pull) goto failed; } - - ret = (smb_iconv_t)malloc(sizeof(*ret)); - if (!ret) { - errno = ENOMEM; - return (smb_iconv_t)-1; + if (!charsets[to].name) { + ret->push = sys_iconv; + ret->cd_push = iconv_open(tocode, "UCS-2LE"); + if (!ret->cd_push) goto failed; } - memset(ret, 0, sizeof(*ret)); - -#ifdef HAVE_NATIVE_ICONV - /* see if we will be using the native iconv */ - if (cd) { - ret->cd = cd; - return ret; +#else + if (!charsets[from].name || !charsets[to].name) { + goto failed; } #endif - /* check for the simplest null conversion */ - if (from == to) { - ret->direct = iconv_copy; + /* check for conversion to/from ucs2 */ + if (from == 0 && charsets[to].name) { + ret->direct = charsets[to].push; + return ret; + } + if (to == 0 && charsets[from].name) { + ret->direct = charsets[from].pull; return ret; } - /* check for conversion to/from ucs2 */ +#ifdef HAVE_NATIVE_ICONV if (from == 0) { - ret->direct = charsets[to].push; + ret->direct = sys_iconv; + ret->cd_direct = ret->cd_push; + ret->cd_push = NULL; return ret; } if (to == 0) { - ret->direct = charsets[from].pull; + ret->direct = sys_iconv; + ret->cd_direct = ret->cd_pull; + ret->cd_pull = NULL; return ret; } +#endif /* the general case has to go via a buffer */ - ret->pull = charsets[from].pull; - ret->push = charsets[to].push; + if (!ret->pull) ret->pull = charsets[from].pull; + if (!ret->push) ret->push = charsets[to].push; return ret; + +failed: + free(ret); + errno = EINVAL; + return (smb_iconv_t)-1; } /* @@ -168,10 +197,11 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode) int smb_iconv_close (smb_iconv_t cd) { #ifdef HAVE_NATIVE_ICONV - if (cd->cd) { - iconv_close(cd->cd); - } + if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct); + if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull); + if (cd->cd_push) iconv_close((iconv_t)cd->cd_push); #endif + memset(cd, 0, sizeof(*cd)); free(cd); return 0; @@ -184,7 +214,7 @@ int smb_iconv_close (smb_iconv_t cd) multi-byte character set support for english users ***********************************************************************/ -static size_t ascii_pull(char **inbuf, size_t *inbytesleft, +static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { while (*inbytesleft >= 1 && *outbytesleft >= 2) { @@ -204,7 +234,7 @@ static size_t ascii_pull(char **inbuf, size_t *inbytesleft, return 0; } -static size_t ascii_push(char **inbuf, size_t *inbytesleft, +static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { int ir_count=0; @@ -232,6 +262,91 @@ static size_t ascii_push(char **inbuf, size_t *inbytesleft, } +static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + unsigned v; + + if ((*inbuf)[0] != '@') { + /* seven bit ascii case */ + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + continue; + } + /* it's a hex character */ + if (*inbytesleft < 5) { + errno = EINVAL; + return -1; + } + + if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) { + errno = EILSEQ; + return -1; + } + + (*outbuf)[0] = v&0xff; + (*outbuf)[1] = v>>8; + (*inbytesleft) -= 5; + (*outbytesleft) -= 2; + (*inbuf) += 5; + (*outbuf) += 2; + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 2 && *outbytesleft >= 1) { + char buf[6]; + + if ((*inbuf)[1] == 0 && + ((*inbuf)[0] & 0x80) == 0 && + (*inbuf)[0] != '@') { + (*outbuf)[0] = (*inbuf)[0]; + (*inbytesleft) -= 2; + (*outbytesleft) -= 1; + (*inbuf) += 2; + (*outbuf) += 1; + continue; + } + if (*outbytesleft < 5) { + errno = E2BIG; + return -1; + } + snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0)); + memcpy(*outbuf, buf, 5); + (*inbytesleft) -= 2; + (*outbytesleft) -= 5; + (*inbuf) += 2; + (*outbuf) += 5; + } + + if (*inbytesleft == 1) { + errno = EINVAL; + return -1; + } + + if (*inbytesleft > 1) { + errno = E2BIG; + return -1; + } + + return 0; +} + + /* the "weird" character set is very useful for testing multi-byte support and finding bugs. Don't use on a production system! */ @@ -245,7 +360,7 @@ static struct { {0, NULL} }; -static size_t weird_pull(char **inbuf, size_t *inbytesleft, +static size_t weird_pull(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { while (*inbytesleft >= 1 && *outbytesleft >= 2) { @@ -288,7 +403,7 @@ static size_t weird_pull(char **inbuf, size_t *inbytesleft, return 0; } -static size_t weird_push(char **inbuf, size_t *inbytesleft, +static size_t weird_push(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { int ir_count=0; @@ -337,7 +452,7 @@ static size_t weird_push(char **inbuf, size_t *inbytesleft, return ir_count; } -static size_t iconv_copy(char **inbuf, size_t *inbytesleft, +static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { int n; @@ -359,7 +474,7 @@ static size_t iconv_copy(char **inbuf, size_t *inbytesleft, return 0; } -static size_t utf8_pull(char **inbuf, size_t *inbytesleft, +static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { while (*inbytesleft >= 1 && *outbytesleft >= 2) { @@ -406,7 +521,7 @@ badseq: return -1; } -static size_t utf8_push(char **inbuf, size_t *inbytesleft, +static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { while (*inbytesleft >= 2 && *outbytesleft >= 1) { |