From 87fbb7092b8f8b2f0db0f361c3d625e19de57cd9 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 4 Jul 2001 07:15:53 +0000 Subject: The big character set handling changeover! This commit gets rid of all our old codepage handling and replaces it with iconv. All internal strings in Samba are now in "unix" charset, which may be multi-byte. See internals.doc and my posting to samba-technical for a more complete explanation. (This used to be commit debb471267960e56005a741817ebd227ecfc512a) --- source3/lib/charcnv.c | 584 ++++++++-------- source3/lib/charset.c | 287 -------- source3/lib/cmd_interp.c | 4 - source3/lib/doscalls.c | 345 --------- source3/lib/iconv.c | 346 ++++++++++ source3/lib/kanji.c | 1691 --------------------------------------------- source3/lib/ms_fnmatch.c | 145 ++-- source3/lib/sysacls.c | 2 +- source3/lib/system.c | 170 ----- source3/lib/util.c | 19 +- source3/lib/util_file.c | 25 +- source3/lib/util_str.c | 808 +++++----------------- source3/lib/util_unistr.c | 1676 +++----------------------------------------- 13 files changed, 963 insertions(+), 5139 deletions(-) delete mode 100644 source3/lib/doscalls.c create mode 100644 source3/lib/iconv.c delete mode 100644 source3/lib/kanji.c (limited to 'source3/lib') diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 4a3d7090e3..388c05f816 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -1,8 +1,9 @@ /* Unix SMB/Netbios implementation. - Version 1.9. + Version 3.0 Character set conversion Extensions - Copyright (C) Andrew Tridgell 1992-1998 + Copyright (C) Igor Vergeichik 2001 + Copyright (C) Andrew Tridgell 2001 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,351 +21,336 @@ */ #include "includes.h" -#define CTRLZ 26 + extern int DEBUGLEVEL; -static char cvtbuf[sizeof(pstring)]; +static pstring cvtbuf; -static BOOL mapsinited = 0; +static smb_iconv_t + ucs2_to_unix=(smb_iconv_t)-1, /*ucs2 (MS) <-> unix format */ + unix_to_ucs2=(smb_iconv_t)-1, + dos_to_unix=(smb_iconv_t)-1, /*unix format <-> dos codepage*/ + unix_to_dos=(smb_iconv_t)-1; /*for those clients who does not support unicode*/ -static char unix2dos[256]; -static char dos2unix[256]; + +/**************************************************************************** + Initialize iconv conversion descriptors +****************************************************************************/ +void init_iconv(char *unix_charset, char *dos_charset) +{ +#define ICONV(descr, from_name, to_name)\ + if(descr!=(smb_iconv_t)-1) smb_iconv_close(descr);\ + descr = smb_iconv_open(to_name, from_name);\ + if(descr==(smb_iconv_t)-1)\ + DEBUG(0,("Conversion from %s to %s is not supported\n",from_name,to_name)); + + if (!unix_charset || !*unix_charset) unix_charset = "ASCII"; + if (!dos_charset || !*dos_charset) dos_charset = "ASCII"; + + ICONV(ucs2_to_unix, "UCS2", unix_charset) + ICONV(unix_to_ucs2, unix_charset, "UCS2") + ICONV(dos_to_unix, dos_charset, unix_charset) + ICONV(unix_to_dos, unix_charset, dos_charset) + +#undef ICONV +} -static void initmaps(void) { - int k; +/**************************************************************************** + Convert string from one encoding to another, makeing error checking etc + Parameters: + descriptor - conversion descriptor, created in init_iconv + src - pointer to source string (multibute or singlebyte) + srclen - length of the source string in bytes + dest - pointer to destination string (multibyte or singlebyte) + destlen - maximal length allowed for string +return the number of bytes occupied in the destination +****************************************************************************/ +static size_t convert_string(smb_iconv_t descriptor, + void const *src, size_t srclen, + void *dest, size_t destlen) +{ + size_t i_len, o_len; + size_t retval; + char* inbuf = (char*)src; + char* outbuf = (char*)dest; + + if (descriptor == (smb_iconv_t)-1) { + /* conversion not supported, use as is */ + int len = MIN(srclen,destlen); + memcpy(dest,src,len); + return len; + } + + i_len=srclen; + o_len=destlen; + retval=smb_iconv(descriptor,&inbuf, &i_len, &outbuf, &o_len); + if(retval==-1) + { char *reason; + switch(errno) + { case EINVAL: reason="Incomplete multybyte sequence"; break; + case E2BIG: reason="No more room"; + DEBUG(0, ("Required %d, available %d\n", + srclen, destlen)); + break; + case EILSEQ: reason="Illegal myltybyte sequence"; break; + } + DEBUG(0,("Conversion error:%s(%s)\n",reason,inbuf)); + /* smb_panic(reason); */ + } + return destlen-o_len; +} - for (k = 0; k < 256; k++) unix2dos[k] = k; - for (k = 0; k < 256; k++) dos2unix[k] = k; +int unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen) +{ + int size,len; + smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf; + size=convert_string(unix_to_ucs2, src, srclen, buffer, sizeof(cvtbuf)); + len=size/2; + strupper_w(buffer); + return convert_string(ucs2_to_unix, buffer, size, dest, destlen); +} - mapsinited = True; +int unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen) +{ + int size,len; + smb_ucs2_t *buffer=(smb_ucs2_t*)cvtbuf; + size=convert_string(unix_to_ucs2, src, srclen, buffer, sizeof(cvtbuf)); + len=size/2; + strlower_w(buffer); + return convert_string(ucs2_to_unix, buffer, size, dest, destlen); } -static void update_map(char *str) { - char *p; - for (p = str; *p; p++) { - if (p[1]) { - unix2dos[(unsigned char)*p] = p[1]; - dos2unix[(unsigned char)p[1]] = *p; - p++; - } - } +int ucs2_align(const void *base_ptr, const void *p, int flags) +{ + if (flags & (STR_NOALIGN|STR_ASCII)) return 0; + return PTR_DIFF(p, base_ptr) & 1; } -static void setupmaps(void) + +/**************************************************************************** +copy a string from a char* unix src to a dos codepage string destination +return the number of bytes occupied by the string in the destination +flags can have: + STR_TERMINATE means include the null termination + STR_UPPER means uppercase in the destination +dest_len is the maximum length allowed in the destination. If dest_len +is -1 then no maxiumum is used +****************************************************************************/ +int push_ascii(void *dest, const char *src, int dest_len, int flags) { - int i; - if (!mapsinited) initmaps(); - - /* Do not map undefined characters to some accidental code */ - for (i = 128; i < 256; i++) - { - unix2dos[i] = CTRLZ; - dos2unix[i] = CTRLZ; - } -} + int src_len = strlen(src); + pstring tmpbuf; -static void init_iso8859_1(int codepage) { - - setupmaps(); - - if (codepage == 437) { - /* MSDOS Code Page 437 -> ISO-8859-1 */ - update_map("\xA1\xAD\xA2\x98\xA3\x9C\xA4\xED\xA5\x9D\xA6\xB3\xA7\xEE"); - update_map("\xAA\xA6\xAB\xAE\xAC\xAA\xAE\xE9\xAF\xC4"); - update_map("\xB0\xF8\xB1\xF1\xB2\xFD\xB5\xE6\xB7\xFA\xBA\xA7\xBC\xAC\xBD\xAB\xBF\xA8"); - update_map("\xC0\x85\xC1\xA0\xC2\x83\xC4\x8E\xC5\x8F\xC6\x92\xC7\x80\xC8\x8A"); - update_map("\xC9\x90\xCA\x88\xCB\x89\xCC\x8D\xCD\xA1\xCE\x8C\xCF\x8B"); - update_map("\xD1\xA5\xD2\x96\xD3\xA2\xD4\x93\xD6\x99\xD9\x97\xDA\xA3\xDB\x96\xDC\x9A\xDF\xE1"); - update_map("\xE0\x85\xE1\xA0\xE2\x83\xE4\x84\xE5\x86\xE6\x91\xE7\x87\xE8\x8A\xE9\x82\xEA\x88\xEB\x89\xEC\x8D\xED\xA1\xEE\x8C\xEF\x8B"); - update_map("\xF0\xEB\xF1\xA4\xF2\x95\xF3\xA2\xF4\x93\xF6\x94\xF7\xF6\xF8\xED\xF9\x97\xFA\xA3\xFB\x96\xFC\x81\xFF\x98"); - } else { - /* MSDOS Code Page 850 -> ISO-8859-1 */ - update_map("\240\377\241\255\242\275\243\234\244\317\245\276\246\335\247\365"); - update_map("\250\371\251\270\252\246\253\256\254\252\255\360\256\251\257\356"); - update_map("\260\370\261\361\262\375\263\374\264\357\265\346\266\364\267\372"); - update_map("\270\367\271\373\272\247\273\257\274\254\275\253\276\363\277\250"); - update_map("\300\267\301\265\302\266\303\307\304\216\305\217\306\222\307\200"); - update_map("\310\324\311\220\312\322\313\323\314\336\315\326\316\327\317\330"); - update_map("\320\321\321\245\322\343\323\340\324\342\325\345\326\231\327\236"); - update_map("\330\235\331\353\332\351\333\352\334\232\335\355\336\350\337\341"); - update_map("\340\205\341\240\342\203\343\306\344\204\345\206\346\221\347\207"); - update_map("\350\212\351\202\352\210\353\211\354\215\355\241\356\214\357\213"); - update_map("\360\320\361\244\362\225\363\242\364\223\365\344\366\224\367\366"); - update_map("\370\233\371\227\372\243\373\226\374\201\375\354\376\347\377\230"); - } -} + /* treat a pstring as "unlimited" length */ + if (dest_len == -1) { + dest_len = sizeof(pstring); + } -static void init_iso8859_15(int codepage) { - - setupmaps(); - - - if (codepage == 775) { - /* MSDOS Code Page 775 -> ISO-8859-15 this is for estonian */ -update_map("\240\377\242\226\243\234\246\276\247\365"); -update_map("\250\325\251\250\253\256\254\252\255\360\256\251"); -update_map("\260\370\261\361\262\375\263\374\264\317\265\346\266\364\267\372"); -update_map("\270\330\271\373\273\257"); -update_map("\304\216\305\217\306\222"); -update_map("\311\220"); -update_map("\323\340\325\345\326\231\327\236"); -update_map("\330\235\334\232\337\341"); -update_map("\344\204\345\206\346\221"); -update_map("\351\202"); -update_map("\363\242\365\344\366\224\367\366"); -update_map("\370\233\374\201"); - } else { - /* MSDOS Code Page 850 -> ISO-8859-15 */ -update_map("\240\377\241\255\242\275\243\234\244\317\245\276\246\321\247\365"); -update_map("\250\320\251\270\252\246\253\256\254\252\255\360\256\251\257\356"); -update_map("\260\370\261\361\262\375\263\374\264\350\265\346\266\364\267\372"); -update_map("\270\347\271\373\272\247\273\257\274\254\275\253\276\363\277\250"); -update_map("\300\267\301\265\302\266\303\307\304\216\305\217\306\222\307\200"); -update_map("\310\324\311\220\312\322\313\323\314\336\315\326\316\327\317\330"); -update_map("\320\321\321\245\322\343\323\340\324\342\325\345\326\231\327\236"); -update_map("\330\235\331\353\332\351\333\352\334\232\335\355\336\350\337\341"); -update_map("\340\205\341\240\342\203\343\306\344\204\345\206\346\221\347\207"); -update_map("\350\212\351\202\352\210\353\211\354\215\355\241\356\214\357\213"); -update_map("\360\320\361\244\362\225\363\242\364\223\365\344\366\224\367\366"); -update_map("\370\233\371\227\372\243\373\226\374\201\375\354\376\347\377\230"); -} -} + if (flags & STR_UPPER) { + pstrcpy(tmpbuf, src); + strupper(tmpbuf); + src = tmpbuf; + } -/* Init for eastern european languages. */ - -static void init_iso8859_2(void) { - - setupmaps(); - -/* - * Tranlation table created by Petr Hubeny - * Requires client code page = 852 - * and character set = ISO8859-2 in smb.conf - */ - -/* MSDOS Code Page 852 -> ISO-8859-2 */ -update_map("\240\377"); /* Fix for non-breaking space */ -update_map("\241\244\242\364\243\235\244\317\245\225\246\227\247\365"); -update_map("\250\371\251\346\252\270\253\233\254\215\256\246\257\275"); -update_map("\261\245\262\362\263\210\264\357\265\226\266\230\267\363"); -update_map("\270\367\271\347\272\255\273\234\274\253\275\361\276\247\277\276"); -update_map("\300\350\301\265\302\266\303\306\304\216\305\221\306\217\307\200"); -update_map("\310\254\311\220\312\250\313\323\314\267\315\326\316\327\317\322"); -update_map("\320\321\321\343\322\325\323\340\324\342\325\212\326\231\327\236"); -update_map("\330\374\331\336\332\351\333\353\334\232\335\355\336\335\337\341"); -update_map("\340\352\341\240\342\203\343\307\344\204\345\222\346\206\347\207"); -update_map("\350\237\351\202\352\251\353\211\354\330\355\241\356\214\357\324"); -update_map("\360\320\361\344\362\345\363\242\364\223\365\213\366\224\367\366"); -update_map("\370\375\371\205\372\243\373\373\374\201\375\354\376\356\377\372"); + if (flags & STR_TERMINATE) { + src_len++; + } + + return convert_string(unix_to_dos, src, src_len, dest, dest_len); } -/* Init for russian language (iso8859-5) */ +int push_ascii_fstring(void *dest, const char *src) +{ + return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE); +} -/* Added by Max Khon */ +int push_ascii_pstring(void *dest, const char *src) +{ + return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE); +} -static void init_iso8859_5(void) +int push_pstring(void *dest, const char *src) { - setupmaps(); - -/* MSDOS Code Page 866 -> ISO8859-5 */ -update_map("\260\200\261\201\262\202\263\203\264\204\265\205\266\206\267\207"); -update_map("\270\210\271\211\272\212\273\213\274\214\275\215\276\216\277\217"); -update_map("\300\220\301\221\302\222\303\223\304\224\305\225\306\226\307\227"); -update_map("\310\230\311\231\312\232\313\233\314\234\315\235\316\236\317\237"); -update_map("\320\240\321\241\322\242\323\243\324\244\325\245\326\246\327\247"); -update_map("\330\250\331\251\332\252\333\253\334\254\335\255\336\256\337\257"); -update_map("\340\340\341\341\342\342\343\343\344\344\345\345\346\346\347\347"); -update_map("\350\350\351\351\352\352\353\353\354\354\355\355\356\356\357\357"); -update_map("\241\360\361\361\244\362\364\363\247\364\367\365\256\366\376\367"); -update_map("\360\374\240\377"); + return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE); } -/* Added by Antonios Kavarnos (Antonios.Kavarnos@softlab.ece.ntua.gr */ -static void init_iso8859_7(void) +/**************************************************************************** +copy a string from a dos codepage source to a unix char* destination +flags can have: + STR_TERMINATE means the string in src is null terminated +if STR_TERMINATE is set then src_len is ignored +src_len is the length of the source area in bytes +return the number of bytes occupied by the string in src +the resulting string in "dest" is always null terminated +****************************************************************************/ +int pull_ascii(char *dest, const void *src, int dest_len, int src_len, int flags) { - setupmaps(); - -/* MSDOS Code Page 737 -> ISO-8859-7 (Greek-Hellenic) */ - -update_map("\301\200\302\201\303\202\304\203\305\204\306\205\307\206"); -update_map("\310\207\311\210\312\211\313\212\314\213\315\214\316\215\317\216"); -update_map("\320\217\321\220\323\221\324\222\325\223\326\224\327\225"); -update_map("\330\226\331\227"); -update_map("\341\230\342\231\343\232\344\233\345\234\346\235\347\236"); -update_map("\350\237\351\240\352\241\353\242\354\243\355\244\356\245\357\246"); -update_map("\360\247\361\250\362\252\363\251\364\253\365\254\366\255\367\256"); -update_map("\370\257\371\340"); -update_map("\332\364\333\365\334\341\335\342\336\343\337\345"); -update_map("\372\344\373\350\374\346\375\347\376\351"); -update_map("\266\352"); -update_map("\270\353\271\354\272\355\274\356\276\357\277\360"); + int ret; + + if (dest_len == -1) { + dest_len = sizeof(pstring); + } + + if (flags & STR_TERMINATE) src_len = strlen(src)+1; + + ret = convert_string(dos_to_unix, src, src_len, dest, dest_len); + + if (dest_len) dest[MIN(ret, dest_len-1)] = 0; + + return src_len; } -/* Added by Deniz Akkus (akkus@alum.mit.edu) */ +int pull_ascii_pstring(char *dest, const void *src) +{ + return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE); +} -static void init_iso8859_9(void) +int pull_ascii_fstring(char *dest, const void *src) { - setupmaps(); - - /* MSDOS Code Page 857 -> ISO-8859-9 (Turkish) */ - - update_map("\xa0\xff\xa1\xad\xa2\xbd\xa3\x9c\xa4\xcf\xA5\xbe\xa6\xdd\xa7\xf5"); - update_map("\xa8\xf9\xa9\xb8\xaa\xd1\xab\xae\xac\xaa\xad\xf0\xae\xa9\xaf\xee"); - update_map("\xb0\xf8\xb1\xf1\xb2\xfd\xb3\xfc\xb4\xef\xb5\xe6\xb6\xf4\xb7\xfa"); - update_map("\xb8\xf7\xb9\xfb\xba\xd0\xbb\xaf\xbc\xac\xbd\xab\xbe\xf3\xbf\xa8"); - update_map("\xc0\xb7\xc1\xb5\xc2\xb6\xc3\xc7\xc4\x8e\xc5\x8f\xc6\x92\xc7\x80"); - update_map("\xc8\xd4\xc9\x90\xca\xd2\xcb\xd3\xcc\xde\xcd\xd6\xce\xd7\xcf\xd8"); - update_map("\xd0\xa6\xd1\xa5\xd2\xe3\xd3\xe0\xd4\xe2\xd5\xe5\xd6\x99\xd7\xe8"); - update_map("\xd8\x9d\xd9\xeb\xda\xe9\xdb\xea\xdc\x9a\xdd\x98\xde\x9e\xdf\xe1"); - update_map("\xe0\x85\xe1\xa0\xe2\x83\xe3\xc6\xe4\x84\xe5\x86\xe6\x91\xe7\x87"); - update_map("\xe8\x8a\xe9\x82\xea\x88\xeb\x89\xec\xec\xed\xa1\xee\x8c\xef\x8b"); - update_map("\xf0\xa7\xf1\xa4\xf2\x95\xf3\xa2\xf4\x93\xf5\xe4\xf6\x94\xf7\xf6"); - update_map("\xf8\x9b\xf9\x97\xfa\xa3\xfb\x96\xfc\x81\xfd\x8d\xfe\x9f\xff\xed"); + return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE); } -/* init for Baltic Rim */ - -static void init_iso8859_13(void) { - - setupmaps(); - - /* MSDOS Code Page 775 -> ISO-8859-13 */ -update_map("\240\377\241\246\242\226\243\234\244\237\245\367\246\247\247\365"); -update_map("\250\235\251\250\252\212\253\256\254\252\255\360\256\251\257\222"); -update_map("\260\370\261\361\262\375\263\374\264\362\265\346\266\364\267\372"); -update_map("\270\233\271\373\272\213\273\257\274\254\275\253\276\363\277\221"); -update_map("\300\265\301\275\302\240\303\200\304\216\305\217\306\267\307\355"); -update_map("\310\266\311\220\312\215\313\270\314\225\315\350\316\241\317\352"); -update_map("\320\276\321\343\322\356\323\340\324\342\325\345\326\231\327\236"); -update_map("\330\306\331\255\332\227\333\307\334\232\335\243\336\317\337\341"); -update_map("\340\320\341\324\342\203\343\207\344\204\345\206\346\322\347\211"); -update_map("\350\321\351\202\352\245\353\323\354\205\355\351\356\214\357\353"); -update_map("\360\325\361\347\362\354\363\242\364\223\365\344\366\224\367\366"); -update_map("\370\326\371\210\372\230\373\327\374\201\375\244\376\330\377\357"); +/**************************************************************************** +copy a string from a char* src to a unicode destination +return the number of bytes occupied by the string in the destination +flags can have: + STR_TERMINATE means include the null termination + STR_UPPER means uppercase in the destination + STR_NOALIGN means don't do alignment +dest_len is the maximum length allowed in the destination. If dest_len +is -1 then no maxiumum is used +****************************************************************************/ +int push_ucs2(const void *base_ptr, void *dest, const char *src, int dest_len, int flags) +{ + int len=0; + int src_len = strlen(src); + pstring tmpbuf; + + /* treat a pstring as "unlimited" length */ + if (dest_len == -1) { + dest_len = sizeof(pstring); + } + + if (flags & STR_UPPER) { + pstrcpy(tmpbuf, src); + strupper(tmpbuf); + src = tmpbuf; + } + + if (flags & STR_TERMINATE) { + src_len++; + } + + if (ucs2_align(base_ptr, dest, flags)) { + *(char *)dest = 0; + dest = (void *)((char *)dest + 1); + if (dest_len) dest_len--; + len++; + } + + len += convert_string(unix_to_ucs2, src, src_len, dest, dest_len); + return len; } -/* Init for russian language (koi8) */ -static void init_koi8_r(void) +/**************************************************************************** +copy a string from a ucs2 source to a unix char* destination +flags can have: + STR_TERMINATE means the string in src is null terminated + STR_NOALIGN means don't try to align +if STR_TERMINATE is set then src_len is ignored +src_len is the length of the source area in bytes +return the number of bytes occupied by the string in src +the resulting string in "dest" is always null terminated +****************************************************************************/ +int pull_ucs2(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len, int flags) { - setupmaps(); - -/* MSDOS Code Page 866 -> KOI8-R */ -update_map("\200\304\201\263\202\332\203\277\204\300\205\331\206\303\207\264"); -update_map("\210\302\211\301\212\305\213\337\214\334\215\333\216\335\217\336"); -update_map("\220\260\221\261\222\262\223\364\224\376\225\371\226\373\227\367"); -update_map("\230\363\231\362\232\377\233\365\234\370\235\375\236\372\237\366"); -update_map("\240\315\241\272\242\325\243\361\244\326\245\311\246\270\247\267"); -update_map("\250\273\251\324\252\323\253\310\254\276\255\275\256\274\257\306"); -update_map("\260\307\261\314\262\265\263\360\264\266\265\271\266\321\267\322"); -update_map("\270\313\271\317\272\320\273\312\274\330\275\327\276\316\277\374"); -update_map("\300\356\301\240\302\241\303\346\304\244\305\245\306\344\307\243"); -update_map("\310\345\311\250\312\251\313\252\314\253\315\254\316\255\317\256"); -update_map("\320\257\321\357\322\340\323\341\324\342\325\343\326\246\327\242"); -update_map("\330\354\331\353\332\247\333\350\334\355\335\351\336\347\337\352"); -update_map("\340\236\341\200\342\201\343\226\344\204\345\205\346\224\347\203"); -update_map("\350\225\351\210\352\211\353\212\354\213\355\214\356\215\357\216"); -update_map("\360\217\361\237\362\220\363\221\364\222\365\223\366\206\367\202"); -update_map("\370\234\371\233\372\207\373\230\374\235\375\231\376\227\377\232"); -} + int ret; + + if (dest_len == -1) { + dest_len = sizeof(pstring); + } + if (ucs2_align(base_ptr, src, flags)) { + src = (const void *)((const char *)src + 1); + if (src_len > 0) src_len--; + } -/* Init for ROMAN-8 (HP-UX) */ + if (flags & STR_TERMINATE) src_len = strlen_w(src)*2+2; -static void init_roman8(void) { + ret = convert_string(ucs2_to_unix, src, src_len, dest, dest_len); + if (dest_len) dest[MIN(ret, dest_len-1)] = 0; - setupmaps(); + return src_len; +} -/* MSDOS Code Page 850 -> ROMAN8 */ -update_map("\240\377\241\267\242\266\243\324\244\322\245\323\246\327\247\330"); -update_map("\250\357\253\371\255\353\256\352\257\234"); -update_map("\260\356\261\355\262\354\263\370\264\200\265\207\266\245\267\244"); -update_map("\270\255\271\250\272\317\273\234\274\276\275\365\276\237\277\275"); -update_map("\300\203\301\210\302\223\303\226\304\240\305\202\306\242\307\243"); -update_map("\310\205\311\212\312\225\313\227\314\204\315\211\316\224\317\201"); -update_map("\320\217\321\214\322\235\323\222\324\206\325\241\326\233\327\221"); -update_map("\330\216\331\215\332\231\333\232\334\220\335\213\336\341\337\342"); -update_map("\340\265\341\307\342\306\343\321\344\320\345\326\346\336\347\340"); -update_map("\350\343\351\345\352\344\355\351\357\230"); -update_map("\360\350\361\347\362\372\363\346\364\364\365\363\366\360\367\254"); -update_map("\370\253\371\246\372\247\373\256\374\376\375\257\376\361"); +int pull_ucs2_pstring(char *dest, const void *src) +{ + return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE); } -/* - * Convert unix to dos - */ -char *unix2dos_format(char *str,BOOL overwrite) +int pull_ucs2_fstring(char *dest, const void *src) { - char *p; - char *dp; - - if (!mapsinited) - initmaps(); - - if (overwrite) { - for (p = str; *p; p++) - *p = unix2dos[(unsigned char)*p]; - return str; - } else { - for (p = str, dp = cvtbuf;*p && (dp - cvtbuf < sizeof(cvtbuf) - 1); p++,dp++) - *dp = unix2dos[(unsigned char)*p]; - *dp = 0; - return cvtbuf; - } + return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE); } -/* - * Convert dos to unix - */ -char *dos2unix_format(char *str, BOOL overwrite) + +/**************************************************************************** +copy a string from a char* src to a unicode or ascii +dos code page destination choosing unicode or ascii based on the +flags in the SMB buffer starting at base_ptr +return the number of bytes occupied by the string in the destination +flags can have: + STR_TERMINATE means include the null termination + STR_UPPER means uppercase in the destination + STR_ASCII use ascii even with unicode packet + STR_NOALIGN means don't do alignment +dest_len is the maximum length allowed in the destination. If dest_len +is -1 then no maxiumum is used +****************************************************************************/ +int push_string(const void *base_ptr, void *dest, const char *src, int dest_len, int flags) { - char *p; - char *dp; - - if (!mapsinited) - initmaps(); - - if (overwrite) { - for (p = str; *p; p++) - *p = dos2unix[(unsigned char)*p]; - return str; - } else { - for (p = str, dp = cvtbuf;*p && (dp - cvtbuf < sizeof(cvtbuf) - 1); p++,dp++) - *dp = dos2unix[(unsigned char)*p]; - *dp = 0; - return cvtbuf; - } + if (!(flags & STR_ASCII) && \ + ((flags & STR_UNICODE || \ + (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { + return push_ucs2(base_ptr, dest, src, dest_len, flags); + } + return push_ascii(dest, src, dest_len, flags); } -/* - * Interpret character set. - */ -void interpret_character_set(char *str, int codepage) +/**************************************************************************** +copy a string from a unicode or ascii source (depending on +the packet flags) to a char* destination +flags can have: + STR_TERMINATE means the string in src is null terminated + STR_UNICODE means to force as unicode + STR_ASCII use ascii even with unicode packet + STR_NOALIGN means don't do alignment +if STR_TERMINATE is set then src_len is ignored +src_len is the length of the source area in bytes +return the number of bytes occupied by the string in src +the resulting string in "dest" is always null terminated +****************************************************************************/ +int pull_string(const void *base_ptr, char *dest, const void *src, int dest_len, int src_len, + int flags) +{ + if (!(flags & STR_ASCII) && \ + ((flags & STR_UNICODE || \ + (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { + return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags); + } + return pull_ascii(dest, src, dest_len, src_len, flags); +} + +int align_string(const void *base_ptr, const char *p, int flags) { - if (strequal (str, "iso8859-1")) { - init_iso8859_1(codepage); - } else if (strequal (str, "iso8859-2")) { - init_iso8859_2(); - } else if (strequal (str, "iso8859-5")) { - init_iso8859_5(); - } else if (strequal (str, "iso8859-7")) { - init_iso8859_7(); - } else if (strequal (str, "iso8859-9")) { - init_iso8859_9(); - } else if (strequal (str, "iso8859-13")) { - init_iso8859_13(); - } else if (strequal (str, "iso8859-15")) { - init_iso8859_15(codepage); - } else if (strequal (str, "koi8-r")) { - init_koi8_r(); - } else if (strequal (str, "roman8")) { - init_roman8(); - } else { - DEBUG(0,("unrecognized character set %s\n", str)); - } - - load_unix_unicode_map(str); + if (!(flags & STR_ASCII) && \ + ((flags & STR_UNICODE || \ + (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) { + return ucs2_align(base_ptr, p, flags); + } + return 0; } diff --git a/source3/lib/charset.c b/source3/lib/charset.c index d699df3e2b..cca5fdd326 100644 --- a/source3/lib/charset.c +++ b/source3/lib/charset.c @@ -111,290 +111,3 @@ char *dos_char_map = xx_dos_char_map; char *upper_char_map = xx_upper_char_map; char *lower_char_map = xx_lower_char_map; -/* - * This code has been extended to deal with ascynchronous mappings - * like MS-DOS Latin US (Code page 437) where things like : - * a acute are capitalized to 'A', but the reverse mapping - * must not hold true. This allows the filename case insensitive - * matching in do_match() to work, as the DOS/Win95/NT client - * uses 'A' as a mask to match against characters like a acute. - * This is the meaning behind the parameters that allow a - * mapping from lower to upper, but not upper to lower. - */ - -static void add_dos_char(int lower, BOOL map_lower_to_upper, - int upper, BOOL map_upper_to_lower) -{ - lower &= 0xff; - upper &= 0xff; - DEBUGADD( 6, ( "Adding chars 0x%x 0x%x (l->u = %s) (u->l = %s)\n", - lower, upper, - map_lower_to_upper ? "True" : "False", - map_upper_to_lower ? "True" : "False" ) ); - if (lower) dos_char_map[lower] = 1; - if (upper) dos_char_map[upper] = 1; - lower_char_map[lower] = (char)lower; /* Define tolower(lower) */ - upper_char_map[upper] = (char)upper; /* Define toupper(upper) */ - if (lower && upper) { - if(map_upper_to_lower) - lower_char_map[upper] = (char)lower; - if(map_lower_to_upper) - upper_char_map[lower] = (char)upper; - } -} - -/**************************************************************************** -initialise the charset arrays -****************************************************************************/ -void charset_initialise(void) -{ - int i; - -#ifdef LC_ALL - /* include in includes.h if available for OS */ - /* we take only standard 7-bit ASCII definitions from ctype */ - setlocale(LC_ALL,"C"); -#endif - - for (i= 0;i<=255;i++) { - dos_char_map[i] = 0; - } - - for (i=0;i<=127;i++) { - if (isalnum(i) || strchr("._^$~!#%&-{}()@'`",(char)i)) - add_dos_char(i,False,0,False); - } - - for (i=0; i<=255; i++) { - char c = (char)i; - upper_char_map[i] = lower_char_map[i] = c; - - /* Some systems have buggy isupper/islower for characters - above 127. Best not to rely on them. */ - if(i < 128) { - if (isupper((int)c)) lower_char_map[i] = tolower(c); - if (islower((int)c)) upper_char_map[i] = toupper(c); - } - } -} - -/**************************************************************************** -load the client codepage. -****************************************************************************/ - -typedef unsigned char (*codepage_p)[4]; - -static codepage_p load_client_codepage( int client_codepage ) -{ - pstring codepage_file_name; - unsigned char buf[8]; - int fd = -1; - SMB_OFF_T size; - codepage_p cp_p = NULL; - SMB_STRUCT_STAT st; - - DEBUG(5, ("load_client_codepage: loading codepage %d.\n", client_codepage)); - - if(strlen(lp_codepagedir()) + 14 > sizeof(codepage_file_name)) - { - DEBUG(0,("load_client_codepage: filename too long to load\n")); - return NULL; - } - - pstrcpy(codepage_file_name, lp_codepagedir()); - pstrcat(codepage_file_name, "/"); - pstrcat(codepage_file_name, "codepage."); - slprintf(&codepage_file_name[strlen(codepage_file_name)], - sizeof(pstring)-(strlen(codepage_file_name)+1)-1, - "%03d", - client_codepage); - - if(sys_stat(codepage_file_name,&st)!=0) - { - DEBUG(0,("load_client_codepage: filename %s does not exist.\n", - codepage_file_name)); - return NULL; - } - - /* Check if it is at least big enough to hold the required - data. Should be 2 byte version, 2 byte codepage, 4 byte length, - plus zero or more bytes of data. Note that the data cannot be more - than 4 * MAXCODEPAGELINES bytes. - */ - size = st.st_size; - - if( size < CODEPAGE_HEADER_SIZE || size > (CODEPAGE_HEADER_SIZE + 4 * MAXCODEPAGELINES)) - { - DEBUG(0,("load_client_codepage: file %s is an incorrect size for a \ -code page file (size=%d).\n", codepage_file_name, (int)size)); - return NULL; - } - - /* Read the first 8 bytes of the codepage file - check - the version number and code page number. All the data - is held in little endian format. - */ - - if((fd = open(codepage_file_name, O_RDONLY)) == -1) - { - DEBUG(0,("load_client_codepage: cannot open file %s. Error was %s\n", - codepage_file_name, strerror(errno))); - return NULL; - } - - if (read(fd, buf, CODEPAGE_HEADER_SIZE)!=CODEPAGE_HEADER_SIZE) - { - DEBUG(0,("load_client_codepage: cannot read header from file %s. Error was %s\n", - codepage_file_name, strerror(errno))); - goto clean_and_exit; - } - - /* Check the version value */ - if(SVAL(buf,CODEPAGE_VERSION_OFFSET) != CODEPAGE_FILE_VERSION_ID) - { - DEBUG(0,("load_client_codepage: filename %s has incorrect version id. \ -Needed %hu, got %hu.\n", - codepage_file_name, (uint16)CODEPAGE_FILE_VERSION_ID, - SVAL(buf,CODEPAGE_VERSION_OFFSET))); - goto clean_and_exit; - } - - /* Check the codepage matches */ - if(SVAL(buf,CODEPAGE_CLIENT_CODEPAGE_OFFSET) != (uint16)client_codepage) - { - DEBUG(0,("load_client_codepage: filename %s has incorrect codepage. \ -Needed %hu, got %hu.\n", - codepage_file_name, (uint16)client_codepage, - SVAL(buf,CODEPAGE_CLIENT_CODEPAGE_OFFSET))); - goto clean_and_exit; - } - - /* Check the length is correct. */ - if(IVAL(buf,CODEPAGE_LENGTH_OFFSET) != (size - CODEPAGE_HEADER_SIZE)) - { - DEBUG(0,("load_client_codepage: filename %s has incorrect size headers. \ -Needed %u, got %u.\n", codepage_file_name, (uint32)(size - CODEPAGE_HEADER_SIZE), - IVAL(buf,CODEPAGE_LENGTH_OFFSET))); - goto clean_and_exit; - } - - size -= CODEPAGE_HEADER_SIZE; /* Remove header */ - - /* Make sure the size is a multiple of 4. */ - if((size % 4 ) != 0) - { - DEBUG(0,("load_client_codepage: filename %s has a codepage size not a \ -multiple of 4.\n", codepage_file_name)); - goto clean_and_exit; - } - - /* Allocate space for the code page file and read it all in. */ - if((cp_p = (codepage_p)malloc( size + 4 )) == NULL) - { - DEBUG(0,("load_client_codepage: malloc fail.\n")); - goto clean_and_exit; - } - - if(read(fd, (char *)cp_p, size)!=size) - { - DEBUG(0,("load_client_codepage: read fail on file %s. Error was %s.\n", - codepage_file_name, strerror(errno))); - goto clean_and_exit; - } - - /* Ensure array is correctly terminated. */ - memset(((char *)cp_p) + size, '\0', 4); - - close(fd); - return cp_p; - -clean_and_exit: - - /* pseudo destructor :-) */ - - if(fd != -1) - close(fd); - if(cp_p) - free((char *)cp_p); - return NULL; -} - -/**************************************************************************** - Initialise the client codepage. -****************************************************************************/ - -void codepage_initialise(int client_codepage) -{ - int i; - static codepage_p cp = NULL; - - if(cp != NULL) - { - DEBUG(6, - ("codepage_initialise: called twice - ignoring second client code page = %d\n", - client_codepage)); - return; - } - - DEBUG(6,("codepage_initialise: client code page = %d\n", client_codepage)); - - /* - * Known client codepages - these can be added to. - */ - cp = load_client_codepage( client_codepage ); - - if(cp == NULL) - { -#ifdef KANJI - DEBUG(6,("codepage_initialise: loading dynamic codepage file %s/codepage.%d \ -for code page %d failed. Using default client codepage 932\n", - lp_codepagedir(), client_codepage, client_codepage)); - cp = cp_932; - client_codepage = KANJI_CODEPAGE; -#else /* KANJI */ - DEBUG(6,("codepage_initialise: loading dynamic codepage file %s/codepage.%d \ -for code page %d failed. Using default client codepage 850\n", - lp_codepagedir(), client_codepage, client_codepage)); - cp = cp_850; - client_codepage = MSDOS_LATIN_1_CODEPAGE; -#endif /* KANJI */ - } - - /* - * Setup the function pointers for the loaded codepage. - */ - initialize_multibyte_vectors( client_codepage ); - - if(cp) - { - for(i = 0; !((cp[i][0] == '\0') && (cp[i][1] == '\0')); i++) - add_dos_char(cp[i][0], (BOOL)cp[i][2], cp[i][1], (BOOL)cp[i][3]); - } - - /* Try and load the unicode map. */ - load_dos_unicode_map(client_codepage); -} - -/******************************************************************* -add characters depending on a string passed by the user -********************************************************************/ -void add_char_string(char *s) -{ - char *extra_chars = (char *)strdup(s); - char *t; - if (!extra_chars) return; - - for (t=strtok(extra_chars," \t\r\n"); t; t=strtok(NULL," \t\r\n")) { - char c1=0,c2=0; - int i1=0,i2=0; - if (isdigit((unsigned char)*t) || (*t)=='-') { - sscanf(t,"%i:%i",&i1,&i2); - add_dos_char(i1,True,i2,True); - } else { - sscanf(t,"%c:%c",&c1,&c2); - add_dos_char((unsigned char)c1,True,(unsigned char)c2, True); - } - } - - free(extra_chars); -} diff --git a/source3/lib/cmd_interp.c b/source3/lib/cmd_interp.c index 20812d3f9a..ef6f94bd49 100644 --- a/source3/lib/cmd_interp.c +++ b/source3/lib/cmd_interp.c @@ -1293,8 +1293,6 @@ int command_main(int argc, char *argv[]) DEBUGLEVEL = 2; - charset_initialise(); - /* add in the internal command set and the various client RPC groups--spoolss, lsa, etc... */ add_command_set(general_commands); @@ -1357,8 +1355,6 @@ int command_main(int argc, char *argv[]) cmd_set_options &= ~CMD_USER; cmd_set_options &= ~CMD_PASS; - codepage_initialise(lp_client_code_page()); - /* parse the command line args init the first connection if possible process a command if passed in on the command line */ diff --git a/source3/lib/doscalls.c b/source3/lib/doscalls.c deleted file mode 100644 index 8d0071dde6..0000000000 --- a/source3/lib/doscalls.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - Unix SMB/Netbios implementation. - Version 1.9. - Samba system utilities - Copyright (C) Jeremy Allison 1992-1998 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include "includes.h" - -/* - * Wrappers for calls that need to translate to - * DOS/Windows semantics. Note that the pathnames - * in all these functions referred to as 'DOS' names - * are actually in UNIX path format (ie. '/' instead of - * '\' directory separators etc.), but the codepage they - * are in is still the client codepage, hence the 'DOS' - * name. - */ - -extern int DEBUGLEVEL; - -#if 0 /* Use vfs_unlink. */ -/******************************************************************* - Unlink wrapper that calls dos_to_unix. -********************************************************************/ - -int dos_unlink(char *fname) -{ - return(unlink(dos_to_unix(fname,False))); -} -#endif - -/******************************************************************* - Open() wrapper that calls dos_to_unix. -********************************************************************/ - -int dos_open(char *fname,int flags,mode_t mode) -{ - return(sys_open(dos_to_unix(fname,False),flags,mode)); -} - -/******************************************************************* - Opendir() wrapper that calls dos_to_unix. Should use the - vfs_ops->opendir() function instead. -********************************************************************/ - -#if 0 -DIR *dos_opendir(char *dname) -{ - return(opendir(dos_to_unix(dname,False))); -} -#endif - -/******************************************************************* - Readdirname() wrapper that calls unix_to_dos. Should use the - vfs_readdirname() function instead. -********************************************************************/ - -#if 0 -char *dos_readdirname(DIR *p) -{ - char *dname = readdirname(p); - - if (!dname) - return(NULL); - - unix_to_dos(dname, True); - return(dname); -} -#endif - -/******************************************************************* - A chown() wrapper that calls dos_to_unix. -********************************************************************/ - -#if 0 /* Use vfs_chown. */ -int dos_chown(char *fname, uid_t uid, gid_t gid) -{ - return(sys_chown(dos_to_unix(fname,False),uid,gid)); -} -#endif - -/******************************************************************* - A stat() wrapper that calls dos_to_unix. -********************************************************************/ - -int dos_stat(char *fname,SMB_STRUCT_STAT *sbuf) -{ - return(sys_stat(dos_to_unix(fname,False),sbuf)); -} - -/******************************************************************* - An lstat() that calls dos_to_unix. -********************************************************************/ - -int dos_lstat(char *fname,SMB_STRUCT_STAT *sbuf) -{ - return(sys_lstat(dos_to_unix(fname,False),sbuf)); -} - -#if 0 /* VFS */ -/******************************************************************* - Mkdir() that calls dos_to_unix. - Cope with UNIXes that don't allow high order mode bits on mkdir. - Patch from gcarter@lanier.com. - Don't use this call unless you really want to access a file on - disk. Use the vfs_ops.mkdir() function instead. -********************************************************************/ - -int dos_mkdir(char *dname,mode_t mode) -{ - int ret = mkdir(dos_to_unix(dname,False),mode); - if(!ret) - return(dos_chmod(dname,mode)); - else - return ret; -} -#endif - -/******************************************************************* - Rmdir() - call dos_to_unix. -********************************************************************/ - -int dos_rmdir(char *dname) -{ - return(rmdir(dos_to_unix(dname,False))); -} - -#if 0 /* VFS */ -/******************************************************************* - chdir() - call dos_to_unix. -********************************************************************/ - -int dos_chdir(char *dname) -{ - return(chdir(dos_to_unix(dname,False))); -} -#endif - -/******************************************************************* - Utime() - call dos_to_unix. -********************************************************************/ - -int dos_utime(char *fname,struct utimbuf *times) -{ - /* if the modtime is 0 or -1 then ignore the call and - return success */ - if (times->modtime == (time_t)0 || times->modtime == (time_t)-1) - return 0; - - /* if the access time is 0 or -1 then set it to the modtime */ - if (times->actime == (time_t)0 || times->actime == (time_t)-1) - times->actime = times->modtime; - - return(utime(dos_to_unix(fname,False),times)); -} - -/********************************************************* - For rename across filesystems Patch from Warren Birnbaum - -**********************************************************/ - -int copy_reg(char *source, const char *dest) -{ - SMB_STRUCT_STAT source_stats; - int ifd; - int ofd; - char *buf; - int len; /* Number of bytes read into `buf'. */ - - sys_lstat (source, &source_stats); - if (!S_ISREG (source_stats.st_mode)) - return 1; - - if (unlink (dest) && errno != ENOENT) - return 1; - - if((ifd = sys_open (source, O_RDONLY, 0)) < 0) - return 1; - - if((ofd = sys_open (dest, O_WRONLY | O_CREAT | O_TRUNC, 0600)) < 0 ) - { - close (ifd); - return 1; - } - - if((buf = malloc( COPYBUF_SIZE )) == NULL) - { - close (ifd); - close (ofd); - unlink (dest); - return 1; - } - - while ((len = read(ifd, buf, COPYBUF_SIZE)) > 0) - { - if (write_data(ofd, buf, len) < 0) - { - close (ifd); - close (ofd); - unlink (dest); - free(buf); - return 1; - } - } - free(buf); - if (len < 0) - { - close (ifd); - close (ofd); - unlink (dest); - return 1; - } - - if (close (ifd) < 0) - { - close (ofd); - return 1; - } - if (close (ofd) < 0) - return 1; - - /* chown turns off set[ug]id bits for non-root, - so do the chmod last. */ - - /* Try to copy the old file's modtime and access time. */ - { - struct utimbuf tv; - - tv.actime = source_stats.st_atime; - tv.modtime = source_stats.st_mtime; - if (utime (dest, &tv)) - return 1; - } - - /* Try to preserve ownership. For non-root it might fail, but that's ok. - But root probably wants to know, e.g. if NFS disallows it. */ - if (chown (dest, source_stats.st_uid, source_stats.st_gid) - && (errno != EPERM)) - return 1; - - if (chmod (dest, source_stats.st_mode & 07777)) - return 1; - - unlink (source); - return 0; -} - -/******************************************************************* - Rename() - call dos_to_unix. -********************************************************************/ - -int dos_rename(char *from, char *to) -{ - int rcode; - pstring zfrom, zto; - - pstrcpy (zfrom, dos_to_unix (from, False)); - pstrcpy (zto, dos_to_unix (to, False)); - rcode = rename (zfrom, zto); - - if (errno == EXDEV) - { - /* Rename across filesystems needed. */ - rcode = copy_reg (zfrom, zto); - } - return rcode; -} - -/******************************************************************* - Chmod - call dos_to_unix. -********************************************************************/ - -int dos_chmod(char *fname,mode_t mode) -{ - return(chmod(dos_to_unix(fname,False),mode)); -} - -#if 0 /* VFS */ -/******************************************************************* - Getwd - takes a UNIX directory name and returns the name - in dos format. -********************************************************************/ - -char *dos_getwd(char *unix_path) -{ - char *wd; - wd = sys_getwd(unix_path); - if (wd) - unix_to_dos(wd, True); - return wd; -} -#endif /* VFS */ - -/******************************************************************* - Check if a DOS file exists. Use vfs_file_exist function instead. -********************************************************************/ - -#if 0 -BOOL dos_file_exist(char *fname,SMB_STRUCT_STAT *sbuf) -{ - return file_exist(dos_to_unix(fname, False), sbuf); -} -#endif - -/******************************************************************* - Check if a DOS directory exists. -********************************************************************/ - -BOOL dos_directory_exist(char *dname,SMB_STRUCT_STAT *st) -{ - return directory_exist(dos_to_unix(dname, False), st); -} - -/******************************************************************* - Return the modtime of a DOS pathname. -********************************************************************/ - -time_t dos_file_modtime(char *fname) -{ - return file_modtime(dos_to_unix(fname, False)); -} - -/******************************************************************* - Return the file size of a DOS pathname. -********************************************************************/ - -SMB_OFF_T dos_file_size(char *file_name) -{ - return get_file_size(dos_to_unix(file_name, False)); -} diff --git a/source3/lib/iconv.c b/source3/lib/iconv.c new file mode 100644 index 0000000000..a22c84997d --- /dev/null +++ b/source3/lib/iconv.c @@ -0,0 +1,346 @@ +/* + Unix SMB/Netbios implementation. + Version 3.0 + minimal iconv implementation + Copyright (C) Andrew Tridgell 2001 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "includes.h" + +static size_t ascii_pull(char **, size_t *, char **, size_t *); +static size_t ascii_push(char **, size_t *, char **, size_t *); +static size_t weird_pull(char **, size_t *, char **, size_t *); +static size_t weird_push(char **, size_t *, char **, size_t *); +static size_t iconv_copy(char **, size_t *, char **, size_t *); + +/* + for each charset we have a function that pulls from that charset to + a ucs2 buffer, and a function that pushes to a ucs2 buffer +*/ +static struct { + char *name; + size_t (*pull)(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); + size_t (*push)(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); +} charsets[] = { + {"UCS2", iconv_copy, iconv_copy}, + {"ASCII", ascii_pull, ascii_push}, + {"WEIRD", weird_pull, weird_push}, + {NULL, NULL, NULL} +}; + +/* + this is a simple portable iconv() implementaion. It only knows about + a very small number of character sets - just enough that Samba works + on systems that don't have iconv + */ +size_t smb_iconv(smb_iconv_t cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + char cvtbuf[2048]; + char *bufp = cvtbuf; + size_t bufsize; + +#ifdef HAVE_NATIVE_ICONV + if (cd->cd) { + return iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); + } +#endif + + if (!inbuf || ! *inbuf || !outbuf || ! *outbuf) return 0; + + /* in most cases we can go direct */ + if (cd->direct) { + return cd->direct(inbuf, inbytesleft, outbuf, outbytesleft); + } + + /* otherwise we have to do it chunks at a time */ + while (*inbytesleft > 0) { + bufp = cvtbuf; + bufsize = sizeof(cvtbuf); + if (cd->pull(inbuf, inbytesleft, &bufp, &bufsize) == -1 && + errno != E2BIG) return -1; + + bufp = cvtbuf; + bufsize = sizeof(cvtbuf) - bufsize; + if (cd->push(&bufp, &bufsize, outbuf, outbytesleft) == -1) return -1; + } + + return 0; +} + +/* + simple iconv_open() wrapper + */ +smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode) +{ + smb_iconv_t ret; + int from, to; +#ifdef HAVE_NATIVE_ICONV + iconv_t cd = NULL; +#endif + + for (from=0; charsets[from].name; from++) { + if (strcasecmp(charsets[from].name, fromcode) == 0) break; + } + for (to=0; charsets[to].name; to++) { + if (strcasecmp(charsets[to].name, tocode) == 0) break; + } + + if (!charsets[from].name || !charsets[to].name) { +#ifdef HAVE_NATIVE_ICONV + cd = iconv_open(tocode, fromcode); + if (!cd) +#endif + { + errno = EINVAL; + return (smb_iconv_t)-1; + } + } + + ret = (smb_iconv_t)malloc(sizeof(*ret)); + if (!ret) { + errno = ENOMEM; + return (smb_iconv_t)-1; + } + memset(ret, 0, sizeof(*ret)); + +#ifdef HAVE_NATIVE_ICONV + /* see if we wil be using the native iconv */ + if (cd) { + ret->cd = cd; + return ret; + } +#endif + + /* check for the simplest null conversion */ + if (from == to) { + ret->direct = iconv_copy; + return ret; + } + + /* check for conversion to/from ucs2 */ + if (from == 0) { + ret->direct = charsets[to].push; + return ret; + } + if (to == 0) { + ret->direct = charsets[from].pull; + return ret; + } + + /* the general case has to go via a buffer */ + ret->pull = charsets[from].pull; + ret->push = charsets[to].push; + return ret; +} + +/* + simple iconv_close() wrapper +*/ +int smb_iconv_close (smb_iconv_t cd) +{ +#ifdef HAVE_NATIVE_ICONV + if (cd->cd) { + iconv_close(cd->cd); + } +#endif + memset(cd, 0, sizeof(*cd)); + free(cd); + return 0; +} + + +/********************************************************************** + the following functions implement the builtin character sets in Samba + and also the "test" character sets that are designed to test + multi-byte character set support for english users +***********************************************************************/ + +static size_t ascii_pull(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +static size_t ascii_push(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + int ir_count=0; + + while (*inbytesleft >= 2 && *outbytesleft >= 1) { + (*outbuf)[0] = (*inbuf)[0]; + if ((*inbuf)[1]) ir_count++; + (*inbytesleft) -= 2; + (*outbytesleft) -= 1; + (*inbuf) += 2; + (*outbuf) += 1; + } + + if (*inbytesleft == 1) { + errno = EINVAL; + return -1; + } + + if (*inbytesleft > 1) { + errno = E2BIG; + return -1; + } + + return ir_count; +} + + +/* the "weird" character set is very useful for testing multi-byte + support and finding bugs. Don't use on a production system! +*/ +static struct { + char from; + char *to; + int len; +} weird_table[] = { + {'q', "^q^", 3}, + {'Q', "^Q^", 3}, + {'x', "\\.q\\.", 5}, + {'X', "\\.Z\\.", 5}, + {0, NULL} +}; + +static size_t weird_pull(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + int i; + for (i=0;weird_table[i].from;i++) { + if (strncmp((*inbuf), + weird_table[i].to, + weird_table[i].len) == 0) { + if (*inbytesleft < weird_table[i].len) { + DEBUG(0,("ERROR: truncated weird string\n")); + smb_panic(__FUNCTION__); + + } else { + (*outbuf)[0] = weird_table[i].from; + (*outbuf)[1] = 0; + (*inbytesleft) -= weird_table[i].len; + (*outbytesleft) -= 2; + (*inbuf) += weird_table[i].len; + (*outbuf) += 2; + goto next; + } + } + } + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + next: + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +static size_t weird_push(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + int ir_count=0; + + while (*inbytesleft >= 2 && *outbytesleft >= 1) { + int i; + for (i=0;weird_table[i].from;i++) { + if ((*inbuf)[0] == weird_table[i].from && + (*inbuf)[1] == 0) { + if (*outbytesleft < weird_table[i].len) { + DEBUG(0,("No room for weird character\n")); + smb_panic(__FUNCTION__); + } else { + memcpy(*outbuf, weird_table[i].to, + weird_table[i].len); + (*inbytesleft) -= 2; + (*outbytesleft) -= weird_table[i].len; + (*inbuf) += 2; + (*outbuf) += weird_table[i].len; + goto next; + } + } + } + (*outbuf)[0] = (*inbuf)[0]; + if ((*inbuf)[1]) ir_count++; + (*inbytesleft) -= 2; + (*outbytesleft) -= 1; + (*inbuf) += 2; + (*outbuf) += 1; + next: + } + + if (*inbytesleft == 1) { + errno = EINVAL; + return -1; + } + + if (*inbytesleft > 1) { + errno = E2BIG; + return -1; + } + + return ir_count; +} + +static size_t iconv_copy(char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + int n; + + n = MIN(*inbytesleft, *outbytesleft); + + memmove(*outbuf, *inbuf, n); + + (*inbytesleft) -= n; + (*outbytesleft) -= n; + (*inbuf) += n; + (*outbuf) += n; + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c deleted file mode 100644 index 39e9933842..0000000000 --- a/source3/lib/kanji.c +++ /dev/null @@ -1,1691 +0,0 @@ -/* - Unix SMB/Netbios implementation. - Version 1.9. - Kanji Extensions - Copyright (C) Andrew Tridgell 1992-1998 - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Adding for Japanese language by 1994.9.5 - and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11 - and add all jis codes sequence type at 1995.8.16 - Notes: Hexadecimal code by - Adding features about Machine dependent codes and User Defined Codes - by Hiroshi MIURA 2000.3.19 -*/ - -#define _KANJI_C_ -#include "includes.h" - -/* - * Function pointers that get overridden when multi-byte code pages - * are loaded. - */ - -const char *(*multibyte_strchr)(const char *, int ) = (const char *(*)(const char *, int )) strchr; -const char *(*multibyte_strrchr)(const char *, int ) = (const char *(*)(const char *, int )) strrchr; -const char *(*multibyte_strstr)(const char *, const char *) = (const char *(*)(const char *, const char *)) strstr; -char *(*multibyte_strtok)(char *, const char *) = (char *(*)(char *, const char *)) strtok; - -/* - * Kanji is treated differently here due to historical accident of - * it being the first non-English codepage added to Samba. - * The define 'KANJI' is being overloaded to mean 'use kanji codepage - * by default' and also 'this is the filename-to-disk conversion - * method to use'. This really should be removed and all control - * over this left in the smb.conf parameters 'client codepage' - * and 'coding system'. - */ - -#ifndef KANJI - -/* - * Set the default conversion to be the functions in - * charcnv.c. - */ - -static size_t skip_non_multibyte_char(char); -static BOOL not_multibyte_char_1(char); - -char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format; -char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format; -size_t (*_skip_multibyte_char)(char) = skip_non_multibyte_char; -BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1; - -#else /* KANJI */ - -/* - * Set the default conversion to be the function - * sj_to_sj in this file. - */ - -static char *sj_to_sj(char *from, BOOL overwrite); -static size_t skip_kanji_multibyte_char(char); -static BOOL is_kanji_multibyte_char_1(char); - -char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj; -char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj; -size_t (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char; -int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1; - -#endif /* KANJI */ - -BOOL global_is_multibyte_codepage = False; - -/* jis si/so sequence */ -static char jis_kso = JIS_KSO; -static char jis_ksi = JIS_KSI; -static char hex_tag = HEXTAG; - -/******************************************************************* - SHIFT JIS functions -********************************************************************/ - -/******************************************************************* - search token from S1 separated any char of S2 - S1 contains SHIFT JIS chars. -********************************************************************/ - -static char *sj_strtok(char *s1, const char *s2) -{ - static char *s = NULL; - char *q; - if (!s1) { - if (!s) { - return NULL; - } - s1 = s; - } - for (q = s1; *s1; ) { - if (is_shift_jis (*s1)) { - s1 += 2; - } else if (is_kana (*s1)) { - s1++; - } else { - char *p = strchr (s2, *s1); - if (p) { - if (s1 != q) { - s = s1 + 1; - *s1 = '\0'; - return q; - } - q = s1 + 1; - } - s1++; - } - } - s = NULL; - if (*q) { - return q; - } - return NULL; -} - -/******************************************************************* - search string S2 from S1 - S1 contains SHIFT JIS chars. -********************************************************************/ - -static const char *sj_strstr(const char *s1, const char *s2) -{ - size_t len = strlen (s2); - if (!*s2) - return (const char *) s1; - for (;*s1;) { - if (*s1 == *s2) { - if (strncmp (s1, s2, len) == 0) - return (const char *) s1; - } - if (is_shift_jis (*s1)) { - s1 += 2; - } else { - s1++; - } - } - return NULL; -} - -/******************************************************************* - Search char C from beginning of S. - S contains SHIFT JIS chars. -********************************************************************/ - -static const char *sj_strchr (const char *s, int c) -{ - for (; *s; ) { - if (*s == c) - return (const char *) s; - if (is_shift_jis (*s)) { - s += 2; - } else { - s++; - } - } - return NULL; -} - -/******************************************************************* - Search char C end of S. - S contains SHIFT JIS chars. -********************************************************************/ - -static const char *sj_strrchr(const char *s, int c) -{ - const char *q; - - for (q = 0; *s; ) { - if (*s == c) { - q = (const char *) s; - } - if (is_shift_jis (*s)) { - s += 2; - } else { - s++; - } - } - return q; -} - -/******************************************************************* - Kanji multibyte char skip function. -*******************************************************************/ - -static size_t skip_kanji_multibyte_char(char c) -{ - if(is_shift_jis(c)) { - return 2; - } else if (is_kana(c)) { - return 1; - } - return 0; -} - -/******************************************************************* - Kanji multibyte char identification. -*******************************************************************/ - -static BOOL is_kanji_multibyte_char_1(char c) -{ - return is_shift_jis(c); -} - -/******************************************************************* - The following functions are the only ones needed to do multibyte - support for Hangul, Big5 and Simplified Chinese. Most of the - real work for these codepages is done in the generic multibyte - functions. The only reason these functions are needed at all - is that the is_xxx(c) calls are really preprocessor macros. -********************************************************************/ - -/******************************************************************* - Hangul (Korean - code page 949) function. -********************************************************************/ - -static BOOL hangul_is_multibyte_char_1(char c) -{ - return is_hangul(c); -} - -/******************************************************************* - Big5 Traditional Chinese (code page 950) function. -********************************************************************/ - -static BOOL big5_is_multibyte_char_1(char c) -{ - return is_big5_c1(c); -} - -/******************************************************************* - Simplified Chinese (code page 936) function. -********************************************************************/ - -static BOOL simpch_is_multibyte_char_1(char c) -{ - return is_simpch_c1(c); -} - -/******************************************************************* - Generic multibyte functions - used by Hangul, Big5 and Simplified - Chinese codepages. -********************************************************************/ - -/******************************************************************* - search token from S1 separated any char of S2 - S1 contains generic multibyte chars. -********************************************************************/ - -static char *generic_multibyte_strtok(char *s1, const char *s2) -{ - static char *s = NULL; - char *q; - if (!s1) { - if (!s) { - return NULL; - } - s1 = s; - } - for (q = s1; *s1; ) { - if ((*is_multibyte_char_1)(*s1)) { - s1 += 2; - } else { - char *p = strchr (s2, *s1); - if (p) { - if (s1 != q) { - s = s1 + 1; - *s1 = '\0'; - return q; - } - q = s1 + 1; - } - s1++; - } - } - s = NULL; - if (*q) { - return q; - } - return NULL; -} - -/******************************************************************* - search string S2 from S1 - S1 contains generic multibyte chars. -********************************************************************/ - -static const char *generic_multibyte_strstr(const char *s1, const char *s2) -{ - size_t len = strlen (s2); - if (!*s2) - return (const char *) s1; - for (;*s1;) { - if (*s1 == *s2) { - if (strncmp (s1, s2, len) == 0) - return (const char *) s1; - } - if ((*is_multibyte_char_1)(*s1)) { - s1 += 2; - } else { - s1++; - } - } - return NULL; -} - -/******************************************************************* - Search char C from beginning of S. - S contains generic multibyte chars. -********************************************************************/ - -static const char *generic_multibyte_strchr(const char *s, int c) -{ - for (; *s; ) { - if (*s == c) - return (const char *) s; - if ((*is_multibyte_char_1)(*s)) { - s += 2; - } else { - s++; - } - } - return NULL; -} - -/******************************************************************* - Search char C end of S. - S contains generic multibyte chars. -********************************************************************/ - -static const char *generic_multibyte_strrchr(const char *s, int c) -{ - const char *q; - - for (q = 0; *s; ) { - if (*s == c) { - q = (const char *) s; - } - if ((*is_multibyte_char_1)(*s)) { - s += 2; - } else { - s++; - } - } - return q; -} - -/******************************************************************* - Generic multibyte char skip function. -*******************************************************************/ - -static size_t skip_generic_multibyte_char(char c) -{ - if( (*is_multibyte_char_1)(c)) { - return 2; - } - return 0; -} - -/******************************************************************* - Code conversion -********************************************************************/ - -/* convesion buffer */ -static char cvtbuf[2*sizeof(pstring)]; - -/******************************************************************* - EUC <-> SJIS -********************************************************************/ - -static int euc2sjis (int hi, int lo) -{ - int w; - int maxidx = SJISREVTBLSIZ; - int minidx = 0; - int i = 2; - - if (hi & 1) { - hi = hi / 2 + (hi < 0xdf ? 0x31 : 0x71); - w = (hi << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61)); - } else { - hi = hi / 2 + (hi < 0xdf ? 0x30 : 0x70); - w = (hi << 8) | (lo - 2); - } - if ( (0x87 < hi ) && (hi < 0xed ) ) { - return w; - } - while ( maxidx >= minidx ) { - if ( sjisrev[i].start > w ) { - maxidx = i-1; - } else if ( w > sjisrev[i].end ) { - minidx = i+1; - } else { - w -= sjisrev[i].start; - w += sjisrev[i].rstart; - break; - } - i = (int)( minidx + (maxidx - minidx) % 2 ); - } - return w; -} - -static int sjis2euc (int hi, int lo) -{ - int minidx = 0; - int maxidx = SJISCONVTBLSIZ -1; /* max index 1 less than number of entries */ - int i = ( 0 + SJISCONVTBLSIZ ) % 2; - int w = (int)((hi << 8) | lo); - - if ( (sjisconv[0].start < w) && (w < sjisconv[SJISCONVTBLSIZ-1].end) ) { - while (maxidx >= minidx) { - if ( sjisconv[i].start > w ) { - maxidx = i-1; - } else if (w > sjisconv[i].end) { - minidx = i+1; - } else { - w -= sjisconv[i].start; - w += sjisconv[i].rstart; - break; - } - i = (int)( minidx + (maxidx-minidx)%2 ); - } - hi = (int) ((w >> 8) & 0xff); - lo = (int) (w & 0xff); - } - if (hi >= 0xf0) { - hi = GETAHI; - lo = GETALO; - } - if (lo >= 0x9f) - return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2); - else - return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) | - (lo + (lo >= 0x7f ? 0x60 : 0x61)); -} - -/******************************************************************* - Convert FROM contain SHIFT JIS codes to EUC codes - return converted buffer -********************************************************************/ - -static char *sj_to_euc(char *from, BOOL overwrite) -{ - char *out; - char *save; - - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) { - if (is_shift_jis (*from)) { - int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code & 0xff; - from += 2; - } else if (is_kana (*from)) { - *out++ = (char)euc_kana; - *out++ = *from++; - } else { - *out++ = *from++; - } - } - *out = 0; - if (overwrite) { - pstrcpy((char *) save, (char *) cvtbuf); - return (char *) save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain EUC codes to SHIFT JIS codes - return converted buffer -********************************************************************/ - -static char *euc_to_sj(char *from, BOOL overwrite) -{ - char *out; - char *save; - - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3); ) { - if (is_euc (*from)) { - int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code & 0xff; - from += 2; - } else if (is_euc_kana (*from)) { - *out++ = from[1]; - from += 2; - } else { - *out++ = *from++; - } - } - *out = 0; - - if (overwrite) { - pstrcpy(save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - EUC3 <-> SJIS -********************************************************************/ -static int sjis3euc (int hi, int lo, int *len) -{ - int i,w; - int minidx; - int maxidx; - - w = (int)((hi << 8) | lo); - - /* no sjis */ - if ( ( 0x40 >= lo ) && (lo >= 0xfc) && (lo == 0x7f )) { - w = (GETAHI << 8) | GETALO; - - /* IBM Extended Kanji */ - } else if (( w == 0xfa54 )||( w == 0x81ca )) { - *len = 2; - return (0xa2cc); - - } else if (( w == 0xfa5b )||( w == 0x81e6)) { - *len = 2; - return (0xa2e8); - - } else if (( 0xfa <= hi ) && ( hi <= 0xfc ) ) { - i = w - 0xfa40 - ( hi - 0xfa )*( 0xfb40 - 0xfafc) - ((lo < 0x7f)? 0 : 1 ); - if ( i <= EUC3CONVTBLSIZ ){ - *len = 3; - return euc3conv[i]; - } - -/* NEC selected IBM Extend Kanji */ - /* there are 3 code that is not good for conv */ - } else if (( 0x8754 <= w ) && ( w <= 0x878a)) { - minidx = 0; - maxidx = EUC3CONV2TBLSIZ; - i = minidx + (maxidx - minidx) % 2; - while ( maxidx >= minidx ) { - if ( euc3conv2[i].sjis > w ) { - maxidx = i-1; - } else if ( w > euc3conv2[i].sjis ) { - minidx = i+1; - } else { - *len = 3; - return (euc3conv2[i].euc); - } - i = (int)( minidx + (maxidx - minidx) % 2 ); - } - /* else normal EUC */ - - } else if (( w == 0xeef9 ) || ( w == 0x81ca )) { - *len = 2; - return (0xa2cc); - - } else if (( 0xed <= hi ) && ( hi <= 0xef )) { - minidx = 0; - maxidx = SJISREVTBLSIZ; - i = 10; - while ( maxidx >= minidx ) { - if ( sjisrev[i].start > w ) { - maxidx = i-1; - } else if ( w > sjisrev[i].end ) { - minidx = i+1; - } else { - w -= sjisrev[i].start; - w += sjisrev[i].rstart; - break; - } - i = (int)( minidx + (maxidx - minidx) % 2 ); - } - if ( w >= 0xfa40 ) { - i = w - 0xfa40 - ( hi - 0xfa )*( 0xfb40 - 0xfafc) - ((lo < 0x7f)? 0 : 1 ); - if ( i <= EUC3CONVTBLSIZ ){ - *len = 3; - return euc3conv[i]; - } else { - w = (GETAHI << 8) | GETALO; - } - } - /* else normal EUC */ - -/* UDC half low*/ -/* this area maps to the G2 UDC area: 0xf5a1 -- 0xfefe */ - } else if ((0xf0 <= hi) && (hi <= 0xf4)) { - *len = 2; - if (lo >= 0x9f) { - return (((hi * 2 - 0xea) << 8) | (lo + 2)); - } else { - return (((hi * 2 - 0xeb) << 8) | (lo + (lo >=0x7f ? 0x60: 0x61 ))); - } - -/* UDC half high*/ -/* this area maps to the G3 UDC area: 0xf8f5a1 -- 0xf8fefe */ - } else if ((0xf5 <= hi) && (hi <= 0xf9)) { - *len = 3; - if (lo >= 0x9f) { - return (((hi*2 - 0xf4) << 8) | (lo + 2)); - } else { - return (((hi*2 - 0xf5) << 8) | (lo + (lo >= 0x7f ? 0x60: 0x61 ))); - } - /* ....checked all special case */ - } - - /* These Normal 2 byte EUC */ - *len = 2; - hi = (int) ((w >> 8) & 0xff); - lo = (int) (w & 0xff); - - if (hi >= 0xf0) { /* Check range */ - hi = GETAHI; - lo = GETALO; - } - - if (lo >= 0x9f) - return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2); - else - return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) | - (lo + (lo >= 0x7f ? 0x60 : 0x61)); -} - -static int euc3sjis (int hi, int lo, BOOL is_3byte) -{ - int w; - - w = (int)((hi << 8) | lo); - if (is_3byte) { - if (( 0xf5 <= hi) && ( hi <= 0xfe)) { - /* UDC half high*/ - /* this area maps to the G3 UDC area */ - /* 0xf8f5a1 -- 0xf8fefe --> 0xf540 -- 0xf9fc */ - if (hi & 1) { - return (((hi / 2 + 0x7b) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61))); - } else { - return (((hi / 2 + 0x7a) << 8) | (lo - 2)); - } - } else { - /* Using map table */ - int minidx = 0; - int maxidx = EUC3REVTBLSIZ; - int i = minidx + (maxidx - minidx) % 2; - - while ( maxidx >= minidx ) { - if (euc3rev[i].euc > w) { - maxidx = i-1; - } else if (euc3rev[i].euc < w) { - minidx = i+1; - } else { - return (euc3rev[i].sjis); - } - i = (int)( minidx + ( maxidx - minidx ) % 2); - } - return ((GETAHI << 8 ) | GETALO); - } - } else { /* is_2byte */ - if ((0xf5 <= hi) && (hi <= 0xfe)) { - /* UDC half low*/ - /* this area maps to the G2 UDC area */ - /* 0xf5a1 -- 0xfefe --> 0xf040 -- 0xf4fc */ - if (hi & 1) { - return (((hi / 2 + 0x76) << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61))); - } else { - return (((hi / 2 + 0x75) << 8) | (lo - 2)); - } - } else { /* Normal EUC */ - if (hi & 1) { - hi = hi / 2 + (hi < 0xdf ? 0x31 : 0x71); - return ((hi << 8) | (lo - (lo >= 0xe0 ? 0x60 : 0x61))); - } else { - hi = hi / 2 + (hi < 0xdf ? 0x30 : 0x70); - return ((hi << 8) | (lo - 2)); - } - } - } -} - -/******************************************************************* - Convert FROM contain SHIFT JIS codes to EUC codes (with SS2) - return converted buffer -********************************************************************/ - -static char *sj_to_euc3(char *from, BOOL overwrite) -{ - char *out; - char *save; - int len; - - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4);) { - if (is_shift_jis (*from)) { - int code = sjis3euc ((int) from[0] & 0xff, (int) from[1] & 0xff, &len); - if (len == 3) { - *out++ = (char)euc_sup; - } - *out++ = (code >> 8) & 0xff; - *out++ = code & 0xff; - from += 2; - } else if (is_kana (*from)) { - *out++ = (char)euc_kana; - *out++ = *from++; - } else { - *out++ = *from++; - } - } - *out = 0; - if (overwrite) { - pstrcpy((char *) save, (char *) cvtbuf); - return (char *) save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain EUC codes (with Sup-Kanji) to SHIFT JIS codes - return converted buffer -********************************************************************/ -static char *euc3_to_sj(char *from, BOOL overwrite) -{ - char *out; - char *save; - - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3); ) { - if (is_euc_sup (*from)) { - int code = euc3sjis((int) from[1] & 0xff, (int) from[2] & 0xff, True); - *out++ = (code >> 8) & 0xff; - *out++ = code & 0xff; - from += 3; - } else if (is_euc (*from)) { - int code = euc3sjis ((int) from[0] & 0xff, (int) from[1] & 0xff,False); - *out++ = (code >> 8) & 0xff; - *out++ = code & 0xff; - from += 2; - } else if (is_euc_kana (*from)) { - *out++ = from[1]; - from += 2; - } else { - *out++ = *from++; - } - } - *out = 0; - - if (overwrite) { - pstrcpy(save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - JIS7,JIS8,JUNET <-> SJIS -********************************************************************/ - -static int sjis2jis(int hi, int lo) -{ - int minidx = 0; - int maxidx = SJISCONVTBLSIZ -1; /* max index 1 less than number of entries */ - int i = (0 + SJISCONVTBLSIZ) % 2; - int w = (int)((hi << 8) | lo); - - if ((sjisconv[0].start < w) && (w < sjisconv[SJISCONVTBLSIZ-1].end)) { - while (maxidx >= minidx) { - if (sjisconv[i].start > w) { - maxidx = i-1; - } else if (w > sjisconv[i].end) { - minidx = i+1; - } else { - w -= sjisconv[i].start; - w += sjisconv[i].rstart; - break; - } - i = (int)( minidx + (maxidx-minidx) %2 ); - } - hi = (int) ((w >> 8) & 0xff); - lo = (int) (w & 0xff); - } - if (hi >= 0xf0) { - hi = GETAHI; - lo = GETALO; - } - if (lo >= 0x9f) - return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e); - else - return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) | - (lo - (lo >= 0x7f ? 0x20 : 0x1f)); -} - -static int jis2sjis(int hi, int lo) -{ - int w; - int minidx = 0; - int maxidx = SJISREVTBLSIZ; - int i = 2; - - if (hi & 1) { - hi = hi / 2 + (hi < 0x5f ? 0x71 : 0xb1); - w = (hi << 8) | (lo + (lo >= 0x60 ? 0x20 : 0x1f)); - } else { - hi = hi / 2 + (hi < 0x5f ? 0x70 : 0xb0); - w = (hi << 8) | (lo + 0x7e); - } - - if (( 0x87 < hi ) && ( hi < 0xed )) { - return w; - } - while (maxidx >= minidx) { - if (sjisrev[i].start > w) { - maxidx = i-1; - } else if (w > sjisrev[i].end) { - minidx = i+1; - } else { - w -= sjisrev[i].start; - w += sjisrev[i].rstart; - break; - } - i = (int)( minidx + (maxidx-minidx) %2 ); - } - return w; -} - -/******************************************************************* - Convert FROM contain JIS codes to SHIFT JIS codes - return converted buffer -********************************************************************/ - -static char *jis8_to_sj(char *from, BOOL overwrite) -{ - char *out; - int shifted; - char *save; - - shifted = _KJ_ROMAN; - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) { - if (is_esc (*from)) { - if (is_so1 (from[1]) && is_so2 (from[2])) { - shifted = _KJ_KANJI; - from += 3; - } else if (is_si1 (from[1]) && is_si2 (from[2])) { - shifted = _KJ_ROMAN; - from += 3; - } else { /* sequence error */ - goto normal; - } - } else { - -normal: - - switch (shifted) { - default: - case _KJ_ROMAN: - *out++ = *from++; - break; - case _KJ_KANJI: - { - int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code; - from += 2; - break; - } - } - } - } - - *out = 0; - if (overwrite) { - pstrcpy (save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain SHIFT JIS codes to JIS codes - return converted buffer -********************************************************************/ - -static char *sj_to_jis8(char *from, BOOL overwrite) -{ - char *out; - int shifted; - char *save; - - shifted = _KJ_ROMAN; - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) { - if (is_shift_jis (*from)) { - int code; - switch (shifted) { - case _KJ_ROMAN: /* to KANJI */ - *out++ = jis_esc; - *out++ = jis_so1; - *out++ = jis_kso; - shifted = _KJ_KANJI; - break; - } - code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code; - from += 2; - } else { - switch (shifted) { - case _KJ_KANJI: /* to ROMAN/KANA */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_ksi; - shifted = _KJ_ROMAN; - break; - } - *out++ = *from++; - } - } - - switch (shifted) { - case _KJ_KANJI: /* to ROMAN/KANA */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_ksi; - shifted = _KJ_ROMAN; - break; - } - *out = 0; - if (overwrite) { - pstrcpy (save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain 7 bits JIS codes to SHIFT JIS codes - return converted buffer -********************************************************************/ -static char *jis7_to_sj(char *from, BOOL overwrite) -{ - char *out; - int shifted; - char *save; - - shifted = _KJ_ROMAN; - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) { - if (is_esc (*from)) { - if (is_so1 (from[1]) && is_so2 (from[2])) { - shifted = _KJ_KANJI; - from += 3; - } else if (is_si1 (from[1]) && is_si2 (from[2])) { - shifted = _KJ_ROMAN; - from += 3; - } else { /* sequence error */ - goto normal; - } - } else if (is_so (*from)) { - shifted = _KJ_KANA; /* to KANA */ - from++; - } else if (is_si (*from)) { - shifted = _KJ_ROMAN; /* to ROMAN */ - from++; - } else { - normal: - switch (shifted) { - default: - case _KJ_ROMAN: - *out++ = *from++; - break; - case _KJ_KANJI: - { - int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code; - from += 2; - } - break; - case _KJ_KANA: - *out++ = ((int) from[0]) + 0x80; - break; - } - } - } - *out = 0; - if (overwrite) { - pstrcpy (save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain SHIFT JIS codes to 7 bits JIS codes - return converted buffer -********************************************************************/ -static char *sj_to_jis7(char *from, BOOL overwrite) -{ - char *out; - int shifted; - char *save; - - shifted = _KJ_ROMAN; - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) { - if (is_shift_jis (*from)) { - int code; - switch (shifted) { - case _KJ_KANA: - *out++ = jis_si; /* to ROMAN and through down */ - case _KJ_ROMAN: /* to KANJI */ - *out++ = jis_esc; - *out++ = jis_so1; - *out++ = jis_kso; - shifted = _KJ_KANJI; - break; - } - code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code; - from += 2; - } else if (is_kana (from[0])) { - switch (shifted) { - case _KJ_KANJI: /* to ROMAN */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_ksi; - case _KJ_ROMAN: /* to KANA */ - *out++ = jis_so; - shifted = _KJ_KANA; - break; - } - *out++ = ((int) *from++) - 0x80; - } else { - switch (shifted) { - case _KJ_KANA: - *out++ = jis_si; /* to ROMAN */ - shifted = _KJ_ROMAN; - break; - case _KJ_KANJI: /* to ROMAN */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_ksi; - shifted = _KJ_ROMAN; - break; - } - *out++ = *from++; - } - } - switch (shifted) { - case _KJ_KANA: - *out++ = jis_si; /* to ROMAN */ - break; - case _KJ_KANJI: /* to ROMAN */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_ksi; - break; - } - *out = 0; - if (overwrite) { - pstrcpy (save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes - return converted buffer -********************************************************************/ - -static char *junet_to_sj(char *from, BOOL overwrite) -{ - char *out; - int shifted; - char *save; - - shifted = _KJ_ROMAN; - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-3);) { - if (is_esc (*from)) { - if (is_so1 (from[1]) && is_so2 (from[2])) { - shifted = _KJ_KANJI; - from += 3; - } else if (is_si1 (from[1]) && is_si2 (from[2])) { - shifted = _KJ_ROMAN; - from += 3; - } else if (is_juk1(from[1]) && is_juk2 (from[2])) { - shifted = _KJ_KANA; - from += 3; - } else { /* sequence error */ - goto normal; - } - } else { - normal: - switch (shifted) { - default: - case _KJ_ROMAN: - *out++ = *from++; - break; - case _KJ_KANJI: - { - int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code; - from += 2; - } - break; - case _KJ_KANA: - *out++ = ((int) from[0]) + 0x80; - break; - } - } - } - *out = 0; - if (overwrite) { - pstrcpy (save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes - return converted buffer -********************************************************************/ -static char *sj_to_junet(char *from, BOOL overwrite) -{ - char *out; - int shifted; - char *save; - - shifted = _KJ_ROMAN; - save = (char *) from; - for (out = cvtbuf; *from && (out - cvtbuf < sizeof(cvtbuf)-4); ) { - if (is_shift_jis (*from)) { - int code; - switch (shifted) { - case _KJ_KANA: - case _KJ_ROMAN: /* to KANJI */ - *out++ = jis_esc; - *out++ = jis_so1; - *out++ = jis_so2; - shifted = _KJ_KANJI; - break; - } - code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff); - *out++ = (code >> 8) & 0xff; - *out++ = code; - from += 2; - } else if (is_kana (from[0])) { - switch (shifted) { - case _KJ_KANJI: /* to ROMAN */ - case _KJ_ROMAN: /* to KANA */ - *out++ = jis_esc; - *out++ = junet_kana1; - *out++ = junet_kana2; - shifted = _KJ_KANA; - break; - } - *out++ = ((int) *from++) - 0x80; - } else { - switch (shifted) { - case _KJ_KANA: - case _KJ_KANJI: /* to ROMAN */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_si2; - shifted = _KJ_ROMAN; - break; - } - *out++ = *from++; - } - } - switch (shifted) { - case _KJ_KANA: - case _KJ_KANJI: /* to ROMAN */ - *out++ = jis_esc; - *out++ = jis_si1; - *out++ = jis_si2; - break; - } - *out = 0; - if (overwrite) { - pstrcpy (save, (char *) cvtbuf); - return save; - } else { - return cvtbuf; - } -} - -/******************************************************************* - HEX <-> SJIS -********************************************************************/ -/* ":xx" -> a byte */ -static char *hex_to_sj(char *from, BOOL overwrite) -{ - char *sp, *dp; - - sp = (char *) from; - dp = cvtbuf; - while (*sp && (dp - cvtbuf < sizeof(cvtbuf)-3)) { - if (*sp == hex_tag && isxdigit((int)sp[1]) && isxdigit((int)sp[2])) { - *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2])); - sp += 3; - } else - *dp++ = *sp++; - } - *dp = '\0'; - if (overwrite) { - pstrcpy ((char *) from, (char *) cvtbuf); - return (char *) from; - } else { - return cvtbuf; - } -} - -/******************************************************************* - kanji/kana -> ":xx" -********************************************************************/ -static char *sj_to_hex(char *from, BOOL overwrite) -{ - unsigned char *sp, *dp; - - sp = (unsigned char*) from; - dp = (unsigned char*) cvtbuf; - while (*sp && (((char *)dp)- cvtbuf < sizeof(cvtbuf)-7)) { - if (is_kana(*sp)) { - *dp++ = hex_tag; - *dp++ = bin2hex (((*sp)>>4)&0x0f); - *dp++ = bin2hex ((*sp)&0x0f); - sp++; - } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) { - *dp++ = hex_tag; - *dp++ = bin2hex (((*sp)>>4)&0x0f); - *dp++ = bin2hex ((*sp)&0x0f); - sp++; - *dp++ = hex_tag; - *dp++ = bin2hex (((*sp)>>4)&0x0f); - *dp++ = bin2hex ((*sp)&0x0f); - sp++; - } else - *dp++ = *sp++; - } - *dp = '\0'; - if (overwrite) { - pstrcpy ((char *) from, (char *) cvtbuf); - return (char *) from; - } else { - return cvtbuf; - } -} - -/******************************************************************* - CAP <-> SJIS -********************************************************************/ -/* ":xx" CAP -> a byte */ -static char *cap_to_sj(char *from, BOOL overwrite) -{ - char *sp, *dp; - - sp = (char *) from; - dp = cvtbuf; - while (*sp && (dp- cvtbuf < sizeof(cvtbuf)-2)) { - /* - * The only change between this and hex_to_sj is here. sj_to_cap only - * translates characters greater or equal to 0x80 - make sure that here - * we only do the reverse (that's why the strchr is used rather than - * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson). - */ - if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit((int)sp[2])) { - *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2])); - sp += 3; - } else - *dp++ = *sp++; - } - *dp = '\0'; - if (overwrite) { - pstrcpy ((char *) from, (char *) cvtbuf); - return (char *) from; - } else { - return cvtbuf; - } -} - -/******************************************************************* - kanji/kana -> ":xx" - CAP format. -********************************************************************/ -static char *sj_to_cap(char *from, BOOL overwrite) -{ - unsigned char *sp, *dp; - - sp = (unsigned char*) from; - dp = (unsigned char*) cvtbuf; - while (*sp && (((char *)dp) - cvtbuf < sizeof(cvtbuf)-4)) { - if (*sp >= 0x80) { - *dp++ = hex_tag; - *dp++ = bin2hex (((*sp)>>4)&0x0f); - *dp++ = bin2hex ((*sp)&0x0f); - sp++; - } else { - *dp++ = *sp++; - } - } - *dp = '\0'; - if (overwrite) { - pstrcpy ((char *) from, (char *) cvtbuf); - return (char *) from; - } else { - return cvtbuf; - } -} - -/******************************************************************* - sj to sj -********************************************************************/ -static char *sj_to_sj(char *from, BOOL overwrite) -{ - if (!overwrite) { - pstrcpy (cvtbuf, (char *) from); - return cvtbuf; - } else { - return (char *) from; - } -} - -/******************************************************************* - cp to utf8 -********************************************************************/ -static char *cp_to_utf8(char *from, BOOL overwrite) -{ - unsigned char *dst; - unsigned char *src; - smb_ucs2_t val; - int w; - size_t len; - - src = (unsigned char *)from; - dst = (unsigned char *)cvtbuf; - while (*src && (((char *)dst - cvtbuf) < sizeof(cvtbuf)-4)) { - len = _skip_multibyte_char(*src); - if ( len == 2 ) { - w = (int)(*src++ & 0xff); - w = (int)((w << 8)|(*src++ & 0xff)); - } else { - w = (int)(*src++ & 0xff); - } - val = doscp2ucs2(w); - - if ( val <= 0x7f ) { - *dst++ = (char)(val & 0xff); - } else if ( val <= 0x7ff ){ - *dst++ = (char)( 0xc0 | ((val >> 6) & 0xff)); - *dst++ = (char)( 0x80 | ( val & 0x3f )); - } else { - *dst++ = (char)( 0xe0 | ((val >> 12) & 0x0f)); - *dst++ = (char)( 0x80 | ((val >> 6) & 0x3f)); - *dst++ = (char)( 0x80 | (val & 0x3f)); - } - - } - *dst++='\0'; - if (overwrite) { - pstrcpy ((char *) from, (char *) cvtbuf); - return (char *) from; - } else { - return cvtbuf; - } -} - -/******************************************************************* - utf8 to cp -********************************************************************/ -static char *utf8_to_cp(char *from, BOOL overwrite) -{ - unsigned char *src; - unsigned char *dst; - smb_ucs2_t val; - int w; - - src = (unsigned char *)from; - dst = (unsigned char *)cvtbuf; - - while (*src && ((char *)dst - cvtbuf < sizeof(cvtbuf)-4)) { - val = (*src++ & 0xff); - if (val < 0x80) { - *dst++ = (char)(val & 0x7f); - } else if ((0xc0 <= val) && (val <= 0xdf) - && (0x80 <= *src) && (*src <= 0xbf)) { - w = ucs2doscp( ((val & 31) << 6) | ((*src++) & 63 )); - *dst++ = (char)((w >> 8) & 0xff); - *dst++ = (char)(w & 0xff); - } else { - val = (val & 0x0f) << 12; - val |= ((*src++ & 0x3f) << 6); - val |= (*src++ & 0x3f); - w = ucs2doscp(val); - *dst++ = (char)((w >> 8) & 0xff); - *dst++ = (char)(w & 0xff); - } - } - *dst++='\0'; - if (overwrite) { - pstrcpy ((char *) from, (char *) cvtbuf); - return (char *) from; - } else { - return cvtbuf; - } -} - -/************************************************************************ - conversion: - _dos_to_unix _unix_to_dos -************************************************************************/ - -static void setup_string_function(int codes) -{ - switch (codes) { - default: - _dos_to_unix = dos2unix_format; - _unix_to_dos = unix2dos_format; - break; - - case SJIS_CODE: - _dos_to_unix = sj_to_sj; - _unix_to_dos = sj_to_sj; - break; - - case EUC_CODE: - _dos_to_unix = sj_to_euc; - _unix_to_dos = euc_to_sj; - break; - - case JIS7_CODE: - _dos_to_unix = sj_to_jis7; - _unix_to_dos = jis7_to_sj; - break; - - case JIS8_CODE: - _dos_to_unix = sj_to_jis8; - _unix_to_dos = jis8_to_sj; - break; - - case JUNET_CODE: - _dos_to_unix = sj_to_junet; - _unix_to_dos = junet_to_sj; - break; - - case HEX_CODE: - _dos_to_unix = sj_to_hex; - _unix_to_dos = hex_to_sj; - break; - - case CAP_CODE: - _dos_to_unix = sj_to_cap; - _unix_to_dos = cap_to_sj; - break; - case UTF8_CODE: - _dos_to_unix = cp_to_utf8; - _unix_to_dos = utf8_to_cp; - break; - case EUC3_CODE: - _dos_to_unix = sj_to_euc3; - _unix_to_dos = euc3_to_sj; - break; - } -} - -/************************************************************************ - Interpret coding system. -************************************************************************/ - -void interpret_coding_system(char *str) -{ - int codes = UNKNOWN_CODE; - - if (strequal (str, "sjis")) { - codes = SJIS_CODE; - } else if (strequal (str, "euc")) { - codes = EUC_CODE; - } else if (strequal (str, "cap")) { - codes = CAP_CODE; - hex_tag = HEXTAG; - } else if (strequal (str, "hex")) { - codes = HEX_CODE; - hex_tag = HEXTAG; - } else if (!strncasecmp (str, "hex", 3)) { - codes = HEX_CODE; - hex_tag = (str[3] ? str[3] : HEXTAG); - } else if (strequal (str, "j8bb")) { - codes = JIS8_CODE; - jis_kso = 'B'; - jis_ksi = 'B'; - } else if (strequal (str, "j8bj") || strequal (str, "jis8")) { - codes = JIS8_CODE; - jis_kso = 'B'; - jis_ksi = 'J'; - } else if (strequal (str, "j8bh")) { - codes = JIS8_CODE; - jis_kso = 'B'; - jis_ksi = 'H'; - } else if (strequal (str, "j8@b")) { - codes = JIS8_CODE; - jis_kso = '@'; - jis_ksi = 'B'; - } else if (strequal (str, "j8@j")) { - codes = JIS8_CODE; - jis_kso = '@'; - jis_ksi = 'J'; - } else if (strequal (str, "j8@h")) { - codes = JIS8_CODE; - jis_kso = '@'; - jis_ksi = 'H'; - } else if (strequal (str, "j7bb")) { - codes = JIS7_CODE; - jis_kso = 'B'; - jis_ksi = 'B'; - } else if (strequal (str, "j7bj") || strequal (str, "jis7")) { - codes = JIS7_CODE; - jis_kso = 'B'; - jis_ksi = 'J'; - } else if (strequal (str, "j7bh")) { - codes = JIS7_CODE; - jis_kso = 'B'; - jis_ksi = 'H'; - } else if (strequal (str, "j7@b")) { - codes = JIS7_CODE; - jis_kso = '@'; - jis_ksi = 'B'; - } else if (strequal (str, "j7@j")) { - codes = JIS7_CODE; - jis_kso = '@'; - jis_ksi = 'J'; - } else if (strequal (str, "j7@h")) { - codes = JIS7_CODE; - jis_kso = '@'; - jis_ksi = 'H'; - } else if (strequal (str, "jubb")) { - codes = JUNET_CODE; - jis_kso = 'B'; - jis_ksi = 'B'; - } else if (strequal (str, "jubj") || strequal (str, "junet")) { - codes = JUNET_CODE; - jis_kso = 'B'; - jis_ksi = 'J'; - } else if (strequal (str, "jubh")) { - codes = JUNET_CODE; - jis_kso = 'B'; - jis_ksi = 'H'; - } else if (strequal (str, "ju@b")) { - codes = JUNET_CODE; - jis_kso = '@'; - jis_ksi = 'B'; - } else if (strequal (str, "ju@j")) { - codes = JUNET_CODE; - jis_kso = '@'; - jis_ksi = 'J'; - } else if (strequal (str, "ju@h")) { - codes = JUNET_CODE; - jis_kso = '@'; - jis_ksi = 'H'; - } else if (strequal (str, "utf8")) { - codes = UTF8_CODE; - } else if (strequal (str, "euc3")) { - codes = EUC3_CODE; - } - setup_string_function (codes); -} - -/******************************************************************* - Non multibyte char function. -*******************************************************************/ - -static size_t skip_non_multibyte_char(char c) -{ - return 0; -} - -/******************************************************************* - Function that always says a character isn't multibyte. -*******************************************************************/ - -static BOOL not_multibyte_char_1(char c) -{ - return False; -} - -/******************************************************************* - Setup the function pointers for the functions that are replaced - when multi-byte codepages are used. - - The dos_to_unix and unix_to_dos function pointers are only - replaced by setup_string_function called by interpret_coding_system - above. -*******************************************************************/ - -void initialize_multibyte_vectors( int client_codepage) -{ - switch( client_codepage ) - { - case KANJI_CODEPAGE: - multibyte_strchr = sj_strchr; - multibyte_strrchr = sj_strrchr; - multibyte_strstr = sj_strstr; - multibyte_strtok = sj_strtok; - _skip_multibyte_char = skip_kanji_multibyte_char; - is_multibyte_char_1 = is_kanji_multibyte_char_1; - global_is_multibyte_codepage = True; - break; - case HANGUL_CODEPAGE: - multibyte_strchr = generic_multibyte_strchr; - multibyte_strrchr = generic_multibyte_strrchr; - multibyte_strstr = generic_multibyte_strstr; - multibyte_strtok = generic_multibyte_strtok; - _skip_multibyte_char = skip_generic_multibyte_char; - is_multibyte_char_1 = hangul_is_multibyte_char_1; - global_is_multibyte_codepage = True; - break; - case BIG5_CODEPAGE: - multibyte_strchr = generic_multibyte_strchr; - multibyte_strrchr = generic_multibyte_strrchr; - multibyte_strstr = generic_multibyte_strstr; - multibyte_strtok = generic_multibyte_strtok; - _skip_multibyte_char = skip_generic_multibyte_char; - is_multibyte_char_1 = big5_is_multibyte_char_1; - global_is_multibyte_codepage = True; - break; - case SIMPLIFIED_CHINESE_CODEPAGE: - multibyte_strchr = generic_multibyte_strchr; - multibyte_strrchr = generic_multibyte_strrchr; - multibyte_strstr = generic_multibyte_strstr; - multibyte_strtok = generic_multibyte_strtok; - _skip_multibyte_char = skip_generic_multibyte_char; - is_multibyte_char_1 = simpch_is_multibyte_char_1; - global_is_multibyte_codepage = True; - break; - /* - * Single char size code page. - */ - default: - multibyte_strchr = (const char *(*)(const char *, int )) strchr; - multibyte_strrchr = (const char *(*)(const char *, int )) strrchr; - multibyte_strstr = (const char *(*)(const char *, const char *)) strstr; - multibyte_strtok = (char *(*)(char *, const char *)) strtok; - _skip_multibyte_char = skip_non_multibyte_char; - is_multibyte_char_1 = not_multibyte_char_1; - global_is_multibyte_codepage = False; - break; - } -} -/* ******************************************************* - function(s) for "dynamic" encoding of SWAT output. - in this version, only dos_to_dos, dos_to_unix, unix_to_dos - are used for bug fix. conversion to web encoding - (to catalog file encoding) is not needed because - they are using same character codes. - **************************************************** */ -static char *no_conversion(char *str, BOOL bOverwrite) -{ - static pstring temp; - if(bOverwrite) - return str; - pstrcpy(temp, str); - return temp; -} -char *(*_dos_to_dos)(char *, BOOL) = no_conversion; diff --git a/source3/lib/ms_fnmatch.c b/source3/lib/ms_fnmatch.c index 72f61c021c..39b3e0013c 100644 --- a/source3/lib/ms_fnmatch.c +++ b/source3/lib/ms_fnmatch.c @@ -31,37 +31,37 @@ #include "includes.h" #endif - - /* bugger. we need a separate wildcard routine for older versions of the protocol. This is not yet perfect, but its a lot - better thaan what we had */ -static int ms_fnmatch_lanman_core(const char *pattern, const char *string) + better than what we had */ +static int ms_fnmatch_lanman_core(const smb_ucs2_t *pattern, + const smb_ucs2_t *string) { - const char *p = pattern, *n = string; - char c; + const smb_ucs2_t *p = pattern, *n = string; + smb_ucs2_t c; - if (strcmp(p,"?")==0 && strcmp(n,".")==0) goto match; + if (strcmp_wa(p, "?")==0 && strcmp_wa(n, ".")) goto match; while ((c = *p++)) { switch (c) { - case '.': + case UCS2_CHAR('.'): if (! *n) goto next; - /* if (! *n && ! *p) goto match; */ - if (*n != '.') goto nomatch; + if (*n != UCS2_CHAR('.')) goto nomatch; n++; break; - case '?': + case UCS2_CHAR('?'): if (! *n) goto next; - if ((*n == '.' && n[1] != '.') || ! *n) goto next; + if ((*n == UCS2_CHAR('.') && + n[1] != UCS2_CHAR('.')) || ! *n) + goto next; n++; break; - case '>': + case UCS2_CHAR('>'): if (! *n) goto next; - if (n[0] == '.') { + if (n[0] == UCS2_CHAR('.')) { if (! n[1] && ms_fnmatch_lanman_core(p, n+1) == 0) goto match; if (ms_fnmatch_lanman_core(p, n) == 0) goto match; goto nomatch; @@ -69,7 +69,7 @@ static int ms_fnmatch_lanman_core(const char *pattern, const char *string) n++; break; - case '*': + case UCS2_CHAR('*'): if (! *n) goto next; if (! *p) goto match; for (; *n; n++) { @@ -77,19 +77,20 @@ static int ms_fnmatch_lanman_core(const char *pattern, const char *string) } break; - case '<': + case UCS2_CHAR('<'): for (; *n; n++) { if (ms_fnmatch_lanman_core(p, n) == 0) goto match; - if (*n == '.' && !strchr(n+1,'.')) { + if (*n == UCS2_CHAR('.') && + !strchr_w(n+1,UCS2_CHAR('.'))) { n++; break; } } break; - case '"': + case UCS2_CHAR('"'): if (*n == 0 && ms_fnmatch_lanman_core(p, n) == 0) goto match; - if (*n != '.') goto nomatch; + if (*n != UCS2_CHAR('.')) goto nomatch; n++; break; @@ -118,16 +119,19 @@ next: return 0; } -static int ms_fnmatch_lanman1(const char *pattern, const char *string) +static int ms_fnmatch_lanman1(const smb_ucs2_t *pattern, const smb_ucs2_t *string) { - if (!strpbrk(pattern, "?*<>\"")) { - if (strcmp(string,"..") == 0) string = "."; - return strcasecmp(pattern, string); + if (!strpbrk_wa(pattern, "?*<>\"")) { + smb_ucs2_t s[] = {UCS2_CHAR('.'), 0}; + if (strcmp_wa(string,"..") == 0) string = s; + return strcasecmp_w(pattern, string); } - if (strcmp(string,"..") == 0 || strcmp(string,".") == 0) { - return ms_fnmatch_lanman_core(pattern, "..") && - ms_fnmatch_lanman_core(pattern, "."); + if (strcmp_wa(string,"..") == 0 || strcmp_wa(string,".") == 0) { + smb_ucs2_t dot[] = {UCS2_CHAR('.'), 0}; + smb_ucs2_t dotdot[] = {UCS2_CHAR('.'), UCS2_CHAR('.'), 0}; + return ms_fnmatch_lanman_core(pattern, dotdot) && + ms_fnmatch_lanman_core(pattern, dot); } return ms_fnmatch_lanman_core(pattern, string); @@ -142,10 +146,10 @@ static int ms_fnmatch_lanman1(const char *pattern, const char *string) Returns 0 on match, -1 on fail. */ -int ms_fnmatch(const char *pattern, const char *string) +static int ms_fnmatch_w(const smb_ucs2_t *pattern, const smb_ucs2_t *string) { - const char *p = pattern, *n = string; - char c; + const smb_ucs2_t *p = pattern, *n = string; + smb_ucs2_t c; extern int Protocol; if (Protocol <= PROTOCOL_LANMAN2) { @@ -154,40 +158,40 @@ int ms_fnmatch(const char *pattern, const char *string) while ((c = *p++)) { switch (c) { - case '?': + case UCS2_CHAR('?'): if (! *n) return -1; n++; break; - case '>': - if (n[0] == '.') { - if (! n[1] && ms_fnmatch(p, n+1) == 0) return 0; - if (ms_fnmatch(p, n) == 0) return 0; + case UCS2_CHAR('>'): + if (n[0] == UCS2_CHAR('.')) { + if (! n[1] && ms_fnmatch_w(p, n+1) == 0) return 0; + if (ms_fnmatch_w(p, n) == 0) return 0; return -1; } - if (! *n) return ms_fnmatch(p, n); + if (! *n) return ms_fnmatch_w(p, n); n++; break; - case '*': + case UCS2_CHAR('*'): for (; *n; n++) { - if (ms_fnmatch(p, n) == 0) return 0; + if (ms_fnmatch_w(p, n) == 0) return 0; } break; - case '<': + case UCS2_CHAR('<'): for (; *n; n++) { - if (ms_fnmatch(p, n) == 0) return 0; - if (*n == '.' && !strchr(n+1,'.')) { + if (ms_fnmatch_w(p, n) == 0) return 0; + if (*n == UCS2_CHAR('.') && !strchr_wa(n+1,'.')) { n++; break; } } break; - case '"': - if (*n == 0 && ms_fnmatch(p, n) == 0) return 0; - if (*n != '.') return -1; + case UCS2_CHAR('"'): + if (*n == 0 && ms_fnmatch_w(p, n) == 0) return 0; + if (*n != UCS2_CHAR('.')) return -1; n++; break; @@ -203,57 +207,12 @@ int ms_fnmatch(const char *pattern, const char *string) } -#if FNMATCH_TEST - -static int match_one(char *pattern, char *file) +int ms_fnmatch(const char *pattern, const char *string) { - if (strcmp(file,"..") == 0) file = "."; - if (strcmp(pattern,".") == 0) return -1; - - return ms_fnmatch(pattern, file); -} + wpstring p, s; -static char *match_test(char *pattern, char *file, char *short_name) -{ - static char ret[4]; - strncpy(ret, "---", 3); - - if (match_one(pattern, ".") == 0) ret[0] = '+'; - if (match_one(pattern, "..") == 0) ret[1] = '+'; - if (match_one(pattern, file) == 0 || - (*short_name && match_one(pattern, short_name)==0)) ret[2] = '+'; - return ret; -} + pstrcpy_wa(p, pattern); + pstrcpy_wa(s, string); - int main(int argc, char *argv[]) -{ - int ret; - char ans[4], mask[100], file[100], mfile[100]; - char *ans2; - int n, i=0; - char line[200]; - - if (argc == 3) { - ret = ms_fnmatch(argv[1], argv[2]); - if (ret == 0) - printf("YES\n"); - else printf("NO\n"); - return ret; - } - mfile[0] = 0; - - while (fgets(line, sizeof(line)-1, stdin)) { - n = sscanf(line, "%3s %s %s %s\n", ans, mask, file, mfile); - if (n < 3) continue; - ans2 = match_test(mask, file, mfile); - if (strcmp(ans2, ans)) { - printf("%s %s %d mask=[%s] file=[%s] mfile=[%s]\n", - ans, ans2, i, mask, file, mfile); - } - i++; - mfile[0] = 0; - } - return 0; + return ms_fnmatch_w(p, s); } -#endif /* FNMATCH_TEST */ - diff --git a/source3/lib/sysacls.c b/source3/lib/sysacls.c index 0770a8856a..bdf18b52fc 100644 --- a/source3/lib/sysacls.c +++ b/source3/lib/sysacls.c @@ -161,7 +161,7 @@ int sys_acl_valid( SMB_ACL_T theacl ) return acl_valid(theacl); } -int sys_acl_set_file( const char *name, SMB_ACL_TYPE_T acltype, SMB_ACL_T theacl) +int sys_acl_set_file(const char *name, SMB_ACL_TYPE_T acltype, SMB_ACL_T theacl) { return acl_set_file(name, acltype, theacl); } diff --git a/source3/lib/system.c b/source3/lib/system.c index a402af77c9..8d4a872f14 100644 --- a/source3/lib/system.c +++ b/source3/lib/system.c @@ -726,176 +726,6 @@ struct passwd *sys_getpwuid(uid_t uid) return setup_pwret(getpwuid(uid)); } -/************************************************************************** - The following are the UNICODE versions of *all* system interface functions - called within Samba. Ok, ok, the exceptions are the gethostbyXX calls, - which currently are left as ascii as they are not used other than in name - resolution. -****************************************************************************/ - -/************************************************************************** - Wide stat. Just narrow and call sys_xxx. -****************************************************************************/ - -int wsys_stat(const smb_ucs2_t *wfname,SMB_STRUCT_STAT *sbuf) -{ - pstring fname; - return sys_stat(unicode_to_unix(fname,wfname,sizeof(fname)), sbuf); -} - -/************************************************************************** - Wide lstat. Just narrow and call sys_xxx. -****************************************************************************/ - -int wsys_lstat(const smb_ucs2_t *wfname,SMB_STRUCT_STAT *sbuf) -{ - pstring fname; - return sys_lstat(unicode_to_unix(fname,wfname,sizeof(fname)), sbuf); -} - -/************************************************************************** - Wide creat. Just narrow and call sys_xxx. -****************************************************************************/ - -int wsys_creat(const smb_ucs2_t *wfname, mode_t mode) -{ - pstring fname; - return sys_creat(unicode_to_unix(fname,wfname,sizeof(fname)), mode); -} - -/************************************************************************** - Wide open. Just narrow and call sys_xxx. -****************************************************************************/ - -int wsys_open(const smb_ucs2_t *wfname, int oflag, mode_t mode) -{ - pstring fname; - return sys_open(unicode_to_unix(fname,wfname,sizeof(fname)), oflag, mode); -} - -/************************************************************************** - Wide fopen. Just narrow and call sys_xxx. -****************************************************************************/ - -FILE *wsys_fopen(const smb_ucs2_t *wfname, const char *type) -{ - pstring fname; - return sys_fopen(unicode_to_unix(fname,wfname,sizeof(fname)), type); -} - -/************************************************************************** - Wide opendir. Just narrow and call sys_xxx. -****************************************************************************/ - -DIR *wsys_opendir(const smb_ucs2_t *wfname) -{ - pstring fname; - return opendir(unicode_to_unix(fname,wfname,sizeof(fname))); -} - -/************************************************************************** - Wide readdir. Return a structure pointer containing a wide filename. -****************************************************************************/ - -SMB_STRUCT_WDIRENT *wsys_readdir(DIR *dirp) -{ - static SMB_STRUCT_WDIRENT retval; - SMB_STRUCT_DIRENT *dirval = sys_readdir(dirp); - - if(!dirval) - return NULL; - - /* - * The only POSIX defined member of this struct is d_name. - */ - - unix_to_unicode(retval.d_name,dirval->d_name,sizeof(retval.d_name)); - - return &retval; -} - -/************************************************************************** - Wide getwd. Call sys_xxx and widen. Assumes s points to a wpstring. -****************************************************************************/ - -smb_ucs2_t *wsys_getwd(smb_ucs2_t *s) -{ - pstring fname; - char *p = sys_getwd(fname); - - if(!p) - return NULL; - - return unix_to_unicode(s, p, sizeof(wpstring)); -} - -/************************************************************************** - Wide chown. Just narrow and call sys_xxx. -****************************************************************************/ - -int wsys_chown(const smb_ucs2_t *wfname, uid_t uid, gid_t gid) -{ - pstring fname; - return chown(unicode_to_unix(fname,wfname,sizeof(fname)), uid, gid); -} - -/************************************************************************** - Wide chroot. Just narrow and call sys_xxx. -****************************************************************************/ - -int wsys_chroot(const smb_ucs2_t *wfname) -{ - pstring fname; - return chroot(unicode_to_unix(fname,wfname,sizeof(fname))); -} - -/************************************************************************** - Wide getpwnam. Return a structure pointer containing wide names. -****************************************************************************/ - -SMB_STRUCT_WPASSWD *wsys_getpwnam(const smb_ucs2_t *wname) -{ - static SMB_STRUCT_WPASSWD retval; - fstring name; - struct passwd *pwret = sys_getpwnam(unicode_to_unix(name,wname,sizeof(name))); - - if(!pwret) - return NULL; - - unix_to_unicode(retval.pw_name, pwret->pw_name, sizeof(retval.pw_name)); - retval.pw_passwd = pwret->pw_passwd; - retval.pw_uid = pwret->pw_uid; - retval.pw_gid = pwret->pw_gid; - unix_to_unicode(retval.pw_gecos, pwret->pw_gecos, sizeof(retval.pw_gecos)); - unix_to_unicode(retval.pw_dir, pwret->pw_dir, sizeof(retval.pw_dir)); - unix_to_unicode(retval.pw_shell, pwret->pw_shell, sizeof(retval.pw_shell)); - - return &retval; -} - -/************************************************************************** - Wide getpwuid. Return a structure pointer containing wide names. -****************************************************************************/ - -SMB_STRUCT_WPASSWD *wsys_getpwuid(uid_t uid) -{ - static SMB_STRUCT_WPASSWD retval; - struct passwd *pwret = sys_getpwuid(uid); - - if(!pwret) - return NULL; - - unix_to_unicode(retval.pw_name, pwret->pw_name, sizeof(retval.pw_name)); - retval.pw_passwd = pwret->pw_passwd; - retval.pw_uid = pwret->pw_uid; - retval.pw_gid = pwret->pw_gid; - unix_to_unicode(retval.pw_gecos, pwret->pw_gecos, sizeof(retval.pw_gecos)); - unix_to_unicode(retval.pw_dir, pwret->pw_dir, sizeof(retval.pw_dir)); - unix_to_unicode(retval.pw_shell, pwret->pw_shell, sizeof(retval.pw_shell)); - - return &retval; -} - /************************************************************************** Extract a command into an arg list. Uses a static pstring for storage. Caller frees returned arg list (which contains pointers into the static pstring). diff --git a/source3/lib/util.c b/source3/lib/util.c index 2e2c887b93..a8ef69e559 100644 --- a/source3/lib/util.c +++ b/source3/lib/util.c @@ -194,21 +194,6 @@ BOOL file_exist(char *fname,SMB_STRUCT_STAT *sbuf) return(S_ISREG(sbuf->st_mode)); } -/******************************************************************* - rename a unix file -********************************************************************/ -int file_rename(char *from, char *to) -{ - int rcode = rename (from, to); - - if (errno == EXDEV) - { - /* Rename across filesystems needed. */ - rcode = copy_reg (from, to); - } - return rcode; -} - /******************************************************************* check a files mod time ********************************************************************/ @@ -790,7 +775,7 @@ BOOL is_ipaddress(const char *str) interpret an internet address or name into an IP address in 4 byte form ****************************************************************************/ -uint32 interpret_addr(char *str) +uint32 interpret_addr(const char *str) { struct hostent *hp; uint32 res; @@ -823,7 +808,7 @@ uint32 interpret_addr(char *str) /******************************************************************* a convenient addition to interpret_addr() ******************************************************************/ -struct in_addr *interpret_addr2(char *str) +struct in_addr *interpret_addr2(const char *str) { static struct in_addr ret; uint32 a = interpret_addr(str); diff --git a/source3/lib/util_file.c b/source3/lib/util_file.c index 4e2adc97bc..7dc25a8dae 100644 --- a/source3/lib/util_file.c +++ b/source3/lib/util_file.c @@ -417,7 +417,7 @@ char *file_load(char *fname, size_t *size) /**************************************************************************** parse a buffer into lines ****************************************************************************/ -static char **file_lines_parse(char *p, size_t size, int *numlines, BOOL convert) +static char **file_lines_parse(char *p, size_t size, int *numlines) { int i; char *s, **ret; @@ -446,21 +446,15 @@ static char **file_lines_parse(char *p, size_t size, int *numlines, BOOL convert if (s[0] == '\r') s[0] = 0; } - if (convert) { - for (i = 0; ret[i]; i++) - unix_to_dos(ret[i], True); - } - return ret; } /**************************************************************************** load a file into memory and return an array of pointers to lines in the file -must be freed with file_lines_free(). If convert is true calls unix_to_dos on -the list. +must be freed with file_lines_free(). ****************************************************************************/ -char **file_lines_load(char *fname, int *numlines, BOOL convert) +char **file_lines_load(char *fname, int *numlines) { char *p; size_t size; @@ -468,7 +462,7 @@ char **file_lines_load(char *fname, int *numlines, BOOL convert) p = file_load(fname, &size); if (!p) return NULL; - return file_lines_parse(p, size, numlines, convert); + return file_lines_parse(p, size, numlines); } /**************************************************************************** @@ -476,7 +470,7 @@ load a fd into memory and return an array of pointers to lines in the file must be freed with file_lines_free(). If convert is true calls unix_to_dos on the list. ****************************************************************************/ -char **fd_lines_load(int fd, int *numlines, BOOL convert) +char **fd_lines_load(int fd, int *numlines) { char *p; size_t size; @@ -484,16 +478,15 @@ char **fd_lines_load(int fd, int *numlines, BOOL convert) p = fd_load(fd, &size); if (!p) return NULL; - return file_lines_parse(p, size, numlines, convert); + return file_lines_parse(p, size, numlines); } /**************************************************************************** load a pipe into memory and return an array of pointers to lines in the data -must be freed with file_lines_free(). If convert is true calls unix_to_dos on -the list. +must be freed with file_lines_free(). ****************************************************************************/ -char **file_lines_pload(char *syscmd, int *numlines, BOOL convert) +char **file_lines_pload(char *syscmd, int *numlines) { char *p; size_t size; @@ -501,7 +494,7 @@ char **file_lines_pload(char *syscmd, int *numlines, BOOL convert) p = file_pload(syscmd, &size); if (!p) return NULL; - return file_lines_parse(p, size, numlines, convert); + return file_lines_parse(p, size, numlines); } /**************************************************************************** diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index d52ff82b10..8ff3e23443 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1,8 +1,8 @@ /* Unix SMB/Netbios implementation. - Version 1.9. + Version 3.0 Samba utility functions - Copyright (C) Andrew Tridgell 1992-1998 + Copyright (C) Andrew Tridgell 1992-2001 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -83,6 +83,8 @@ BOOL next_token_nr(char **ptr,char *buff,char *sep, size_t bufsize) return ret; } +static uint16 tmpbuf[sizeof(pstring)]; + void set_first_token(char *ptr) { last_ptr = ptr; @@ -126,78 +128,15 @@ char **toktocliplist(int *ctok, char *sep) return ret; } - /******************************************************************* case insensitive string compararison ********************************************************************/ int StrCaseCmp(const char *s, const char *t) { - /* compare until we run out of string, either t or s, or find a difference */ - /* We *must* use toupper rather than tolower here due to the - asynchronous upper to lower mapping. - */ -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - int diff; - for (;;) - { - if (!*s || !*t) - return toupper (*s) - toupper (*t); - else if (is_sj_alph (*s) && is_sj_alph (*t)) - { - diff = sj_toupper2 (*(s+1)) - sj_toupper2 (*(t+1)); - if (diff) - return diff; - s += 2; - t += 2; - } - else if (is_shift_jis (*s) && is_shift_jis (*t)) - { - diff = ((int) (unsigned char) *s) - ((int) (unsigned char) *t); - if (diff) - return diff; - diff = ((int) (unsigned char) *(s+1)) - ((int) (unsigned char) *(t+1)); - if (diff) - return diff; - s += 2; - t += 2; - } - else if (is_shift_jis (*s)) - return 1; - else if (is_shift_jis (*t)) - return -1; - else - { - diff = toupper (*s) - toupper (*t); - if (diff) - return diff; - s++; - t++; - } - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - while (*s && *t && toupper(*s) == toupper(*t)) - { - s++; - t++; - } - - return(toupper(*s) - toupper(*t)); - } + pstring buf1, buf2; + unix_strlower(s, strlen(s)+1, buf1, sizeof(buf1)); + unix_strlower(t, strlen(t)+1, buf2, sizeof(buf2)); + return strcmp(buf1,buf2); } /******************************************************************* @@ -205,83 +144,10 @@ int StrCaseCmp(const char *s, const char *t) ********************************************************************/ int StrnCaseCmp(const char *s, const char *t, size_t n) { - /* compare until we run out of string, either t or s, or chars */ - /* We *must* use toupper rather than tolower here due to the - asynchronous upper to lower mapping. - */ -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - int diff; - for (;n > 0;) - { - if (!*s || !*t) - return toupper (*s) - toupper (*t); - else if (is_sj_alph (*s) && is_sj_alph (*t)) - { - diff = sj_toupper2 (*(s+1)) - sj_toupper2 (*(t+1)); - if (diff) - return diff; - s += 2; - t += 2; - n -= 2; - } - else if (is_shift_jis (*s) && is_shift_jis (*t)) - { - diff = ((int) (unsigned char) *s) - ((int) (unsigned char) *t); - if (diff) - return diff; - diff = ((int) (unsigned char) *(s+1)) - ((int) (unsigned char) *(t+1)); - if (diff) - return diff; - s += 2; - t += 2; - n -= 2; - } - else if (is_shift_jis (*s)) - return 1; - else if (is_shift_jis (*t)) - return -1; - else - { - diff = toupper (*s) - toupper (*t); - if (diff) - return diff; - s++; - t++; - n--; - } - } - return 0; - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - while (n && *s && *t && toupper(*s) == toupper(*t)) - { - s++; - t++; - n--; - } - - /* not run out of chars - strings are different lengths */ - if (n) - return(toupper(*s) - toupper(*t)); - - /* identical up to where we run out of chars, - and strings are same length */ - return(0); - } + pstring buf1, buf2; + unix_strlower(s, strlen(s)+1, buf1, sizeof(buf1)); + unix_strlower(t, strlen(t)+1, buf2, sizeof(buf2)); + return strncmp(buf1,buf2,n); } /******************************************************************* @@ -348,112 +214,6 @@ int strwicmp(char *psz1, char *psz2) } -/******************************************************************* - convert a string to lower case -********************************************************************/ -void strlower(char *s) -{ - while (*s) - { -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - if (is_shift_jis (*s)) - { - if (is_sj_upper (s[0], s[1])) - s[1] = sj_tolower2 (s[1]); - s += 2; - } - else if (is_kana (*s)) - { - s++; - } - else - { - if (isupper(*s)) - *s = tolower(*s); - s++; - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - size_t skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else - { - if (isupper(*s)) - *s = tolower(*s); - s++; - } - } - } -} - -/******************************************************************* - convert a string to upper case -********************************************************************/ -void strupper(char *s) -{ - while (*s) - { -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - if (is_shift_jis (*s)) - { - if (is_sj_lower (s[0], s[1])) - s[1] = sj_toupper2 (s[1]); - s += 2; - } - else if (is_kana (*s)) - { - s++; - } - else - { - if (islower(*s)) - *s = toupper(*s); - s++; - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - size_t skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else - { - if (islower(*s)) - *s = toupper(*s); - s++; - } - } - } -} - /******************************************************************* convert a string to "normal" form ********************************************************************/ @@ -471,44 +231,26 @@ check if a string is in "normal" case ********************************************************************/ BOOL strisnormal(char *s) { - extern int case_default; - if (case_default == CASE_UPPER) - return(!strhaslower(s)); - - return(!strhasupper(s)); + extern int case_default; + if (case_default == CASE_UPPER) + return(!strhaslower(s)); + + return(!strhasupper(s)); } /**************************************************************************** string replace + NOTE: oldc and newc must be 7 bit characters ****************************************************************************/ void string_replace(char *s,char oldc,char newc) { - size_t skip; - - /* - * sbcs optimization. - */ - if(!global_is_multibyte_codepage) { - while (*s) { - if (oldc == *s) - *s = newc; - s++; - } - } else { - while (*s) - { - skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else - { - if (oldc == *s) - *s = newc; - s++; - } - } - } + smb_ucs2_t *ptr; + push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE); + for(ptr=tmpbuf;*ptr;ptr++) { + if(*ptr==UCS2_CHAR(oldc)) *ptr = UCS2_CHAR(newc); + } + pull_ucs2(NULL, s, tmpbuf, -1, sizeof(tmpbuf), STR_TERMINATE); } @@ -517,35 +259,20 @@ skip past some strings in a buffer ********************************************************************/ char *skip_string(char *buf,size_t n) { - while (n--) - buf += strlen(buf) + 1; - return(buf); + while (n--) + buf += strlen(buf) + 1; + return(buf); } /******************************************************************* Count the number of characters in a string. Normally this will be the same as the number of bytes in a string for single byte strings, but will be different for multibyte. - 16.oct.98, jdblair@cobaltnet.com. ********************************************************************/ - size_t str_charnum(const char *s) { - size_t len = 0; - - /* - * sbcs optimization. - */ - if(!global_is_multibyte_codepage) { - return strlen(s); - } else { - while (*s != '\0') { - int skip = get_character_len(*s); - s += (skip ? skip : 1); - len++; - } - } - return len; + push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE); + return strlen_w(tmpbuf); } /******************************************************************* @@ -554,110 +281,36 @@ trim the specified elements off the front and back of a string BOOL trim_string(char *s,const char *front,const char *back) { - BOOL ret = False; - size_t s_len; - size_t front_len; - size_t back_len; - char *sP; + BOOL ret = False; + size_t front_len; + size_t back_len; + size_t len; /* Ignore null or empty strings. */ + if (!s || (s[0] == '\0')) + return False; - if ( !s || (s[0] == '\0')) - return False; - - sP = s; - s_len = strlen( s ) + 1; - front_len = (front) ? strlen( front ) + 1 : 0; - back_len = (back) ? strlen( back ) + 1 : 0; - - /* - * remove "front" string from given "s", if it matches front part, - * repeatedly. - */ - if ( front && front_len > 1 ) { - while (( s_len >= front_len )&& - ( memcmp( sP, front, front_len - 1 )) == 0 ) { - ret = True; - sP += ( front_len - 1 ); - s_len -= ( front_len - 1 ); - } - } + front_len = front? strlen(front) : 0; + back_len = back? strlen(back) : 0; - /* - * we'll memmove sP to s later, after we're done with - * back part removal, for minimizing copy. - */ - - - /* - * We split out the multibyte code page - * case here for speed purposes. Under a - * multibyte code page we need to walk the - * string forwards only and multiple times. - * Thanks to John Blair for finding this - * one. JRA. - */ - /* - * This JRA's comment is partly correct, but partly wrong. - * You can always check from "end" part, and if it did not match, - * it means there is no possibility of finding one. - * If you found matching point, mark them, then look from front - * if marking point suits multi-byte string rule. - * Kenichi Okuyama. - */ - - if ( back && back_len > 1 && s_len >= back_len) { - char *bP = sP + s_len - back_len; - long b_len = s_len; - - while (( b_len >= back_len )&& - ( memcmp( bP, back, back_len - 1 ) == 0 )) { - bP -= ( back_len - 1 ); - b_len -= ( back_len - 1 ); - } + len = strlen(s); - /* - * You're here, means you ether have found match multiple times, - * or you found none. If you've found match, then bP should be - * moving. - */ - if ( bP != sP + s_len - back_len ) { - bP += ( back_len - 1 ); /* slide bP to first matching point. */ - - if( !global_is_multibyte_codepage ) { - /* simply terminate */ - (*bP) = '\0'; - s_len = b_len; - ret = True; - } else { - /* trace string from start. */ - char *cP = sP; - while ( cP < sP + s_len - back_len ) { - size_t skip; - skip = skip_multibyte_char( *cP ); - cP += ( skip ? skip : 1 ); - if ( cP == bP ) { - /* you found the match */ - (*bP) = '\0'; - ret = True; - s_len = b_len; - break; - } - while (( cP > bP )&&( bP < sP + s_len - back_len )) { - bP += ( back_len - 1 ); - b_len += ( back_len - 1 ); - } - } - } - } - } - - /* if front found matching point */ - if ( sP != s ) { - /* slide string to buffer top */ - memmove( s, sP, s_len ); - } - return ret; + if (front_len) { + while (len && strncmp(s, front, front_len)==0) { + memcpy(s, s+front_len, (len-front_len)+1); + len -= front_len; + ret=True; + } + } + + if (back_len) { + while (strncmp(s+len-back_len,back,back_len)==0) { + s[len-back_len]='\0'; + len -= back_len; + ret=True; + } + } + return ret; } @@ -666,46 +319,11 @@ does a string have any uppercase chars in it? ****************************************************************************/ BOOL strhasupper(const char *s) { - while (*s) - { -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - if (is_shift_jis (*s)) - s += 2; - else if (is_kana (*s)) - s++; - else - { - if (isupper(*s)) - return(True); - s++; - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - size_t skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else { - if (isupper(*s)) - return(True); - s++; - } - } - } - return(False); + smb_ucs2_t *ptr; + push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE); + for(ptr=tmpbuf;*ptr;ptr++) + if(isupper_w(*ptr)) return True; + return(False); } /**************************************************************************** @@ -713,104 +331,23 @@ does a string have any lowercase chars in it? ****************************************************************************/ BOOL strhaslower(const char *s) { - while (*s) - { -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - if (is_shift_jis (*s)) - { - if (is_sj_upper (s[0], s[1])) - return(True); - if (is_sj_lower (s[0], s[1])) - return (True); - s += 2; - } - else if (is_kana (*s)) - { - s++; - } - else - { - if (islower(*s)) - return(True); - s++; - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - size_t skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else { - if (islower(*s)) - return(True); - s++; - } - } - } - return(False); + smb_ucs2_t *ptr; + push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE); + for(ptr=tmpbuf;*ptr;ptr++) + if(islower_w(*ptr)) return True; + return(False); } /**************************************************************************** -find the number of chars in a string +find the number of 'c' chars in a string ****************************************************************************/ size_t count_chars(const char *s,char c) { - size_t count=0; - -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - while (*s) - { - if (is_shift_jis (*s)) - s += 2; - else - { - if (*s == c) - count++; - s++; - } - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - while (*s) - { - size_t skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else { - if (*s == c) - count++; - s++; - } - } - } - return(count); + smb_ucs2_t *ptr; + int count; + push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE); + for(count=0,ptr=tmpbuf;*ptr;ptr++) if(*ptr==UCS2_CHAR(c)) count++; + return(count); } /******************************************************************* @@ -819,52 +356,15 @@ Return True if a string consists only of one particular character. BOOL str_is_all(const char *s,char c) { - if(s == NULL) - return False; - if(!*s) - return False; - -#if !defined(KANJI_WIN95_COMPATIBILITY) - /* - * For completeness we should put in equivalent code for code pages - * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but - * doubt anyone wants Samba to behave differently from Win95 and WinNT - * here. They both treat full width ascii characters as case senstive - * filenames (ie. they don't do the work we do here). - * JRA. - */ - - if(lp_client_code_page() == KANJI_CODEPAGE) - { - /* Win95 treats full width ascii characters as case sensitive. */ - while (*s) - { - if (is_shift_jis (*s)) - s += 2; - else - { - if (*s != c) - return False; - s++; - } - } - } - else -#endif /* KANJI_WIN95_COMPATIBILITY */ - { - while (*s) - { - size_t skip = get_character_len( *s ); - if( skip != 0 ) - s += skip; - else { - if (*s != c) - return False; - s++; - } - } - } - return True; + smb_ucs2_t *ptr; + + if(s == NULL) return False; + if(!*s) return False; + + push_ucs2(NULL, tmpbuf,s, sizeof(tmpbuf), STR_TERMINATE); + for(ptr=tmpbuf;*ptr;ptr++) if(*ptr!=UCS2_CHAR(c)) return False; + + return True; } /******************************************************************* @@ -874,29 +374,29 @@ include the terminating zero. char *safe_strcpy(char *dest,const char *src, size_t maxlength) { - size_t len; + size_t len; - if (!dest) { - DEBUG(0,("ERROR: NULL dest in safe_strcpy\n")); - return NULL; - } + if (!dest) { + DEBUG(0,("ERROR: NULL dest in safe_strcpy\n")); + return NULL; + } - if (!src) { - *dest = 0; - return dest; - } + if (!src) { + *dest = 0; + return dest; + } - len = strlen(src); + len = strlen(src); - if (len > maxlength) { - DEBUG(0,("ERROR: string overflow by %d in safe_strcpy [%.50s]\n", - (int)(len-maxlength), src)); - len = maxlength; - } + if (len > maxlength) { + DEBUG(0,("ERROR: string overflow by %d in safe_strcpy [%.50s]\n", + (int)(len-maxlength), src)); + len = maxlength; + } - memmove(dest, src, len); - dest[len] = 0; - return dest; + memmove(dest, src, len); + dest[len] = 0; + return dest; } /******************************************************************* @@ -906,29 +406,29 @@ include the terminating zero. char *safe_strcat(char *dest, const char *src, size_t maxlength) { - size_t src_len, dest_len; + size_t src_len, dest_len; - if (!dest) { - DEBUG(0,("ERROR: NULL dest in safe_strcat\n")); - return NULL; - } - - if (!src) { - return dest; - } - - src_len = strlen(src); - dest_len = strlen(dest); + if (!dest) { + DEBUG(0,("ERROR: NULL dest in safe_strcat\n")); + return NULL; + } - if (src_len + dest_len > maxlength) { - DEBUG(0,("ERROR: string overflow by %d in safe_strcat [%.50s]\n", - (int)(src_len + dest_len - maxlength), src)); - src_len = maxlength - dest_len; - } - - memcpy(&dest[dest_len], src, src_len); - dest[dest_len + src_len] = 0; - return dest; + if (!src) { + return dest; + } + + src_len = strlen(src); + dest_len = strlen(dest); + + if (src_len + dest_len > maxlength) { + DEBUG(0,("ERROR: string overflow by %d in safe_strcat [%.50s]\n", + (int)(src_len + dest_len - maxlength), src)); + src_len = maxlength - dest_len; + } + + memcpy(&dest[dest_len], src, src_len); + dest[dest_len + src_len] = 0; + return dest; } /******************************************************************* @@ -979,15 +479,15 @@ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, si char *StrnCpy(char *dest,const char *src,size_t n) { - char *d = dest; - if (!dest) return(NULL); - if (!src) { - *dest = 0; - return(dest); - } - while (n-- && (*d++ = *src++)) ; - *d = 0; - return(dest); + char *d = dest; + if (!dest) return(NULL); + if (!src) { + *dest = 0; + return(dest); + } + while (n-- && (*d++ = *src++)) ; + *d = 0; + return(dest); } /**************************************************************************** @@ -1303,3 +803,57 @@ char *string_truncate(char *s, int length) } return s; } + + +/**************************************************************************** +strchr and strrchr are very hard to do on general multi-byte strings. +we convert via ucs2 for now +****************************************************************************/ +char *strchr_m(const char *s, char c) +{ + wpstring ws; + pstring s2; + smb_ucs2_t *p; + + push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); + p = strchr_wa(ws, c); + if (!p) return NULL; + *p = 0; + pull_ucs2_pstring(s2, ws); + return (char *)(s+strlen(s2)); +} + +char *strrchr_m(const char *s, char c) +{ + wpstring ws; + pstring s2; + smb_ucs2_t *p; + + push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); + p = strrchr_wa(ws, c); + if (!p) return NULL; + *p = 0; + pull_ucs2_pstring(s2, ws); + return (char *)(s+strlen(s2)); +} + +/******************************************************************* + convert a string to lower case +********************************************************************/ +void strlower_m(char *s) +{ + /* I assume that lowercased string takes the same number of bytes + * as source string even in UTF-8 encoding. (VIV) */ + unix_strlower(s,strlen(s)+1,s,strlen(s)+1); +} + +/******************************************************************* + convert a string to upper case +********************************************************************/ +void strupper_m(char *s) +{ + /* I assume that lowercased string takes the same number of bytes + * as source string even in multibyte encoding. (VIV) */ + unix_strupper(s,strlen(s)+1,s,strlen(s)+1); +} + diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c index 96aa62a283..1bfcd179f4 100644 --- a/source3/lib/util_unistr.c +++ b/source3/lib/util_unistr.c @@ -1,8 +1,8 @@ /* Unix SMB/Netbios implementation. - Version 1.9. + Version 3.0 Samba utility functions - Copyright (C) Andrew Tridgell 1992-1998 + Copyright (C) Andrew Tridgell 1992-2001 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,11 +31,7 @@ extern int DEBUGLEVEL; * These are dynamically loaded from a unicode translation file. */ -static smb_ucs2_t *doscp_to_ucs2; -static uint16 *ucs2_to_doscp; - -static smb_ucs2_t *unixcp_to_ucs2; -static uint16 *ucs2_to_unixcp; +#define CONV_DEBUGLEVEL 83 #ifndef MAXUNI #define MAXUNI 1024 @@ -54,62 +50,10 @@ static uint16 *ucs2_to_unixcp; size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate) { - size_t ret = 0; - while (*src && (len >= 2)) { - size_t skip = get_character_len(*src); - smb_ucs2_t val = (*src & 0xff); - - /* - * If this is a multibyte character (and all DOS/Windows - * codepages have at maximum 2 byte multibyte characters) - * then work out the index value for the unicode conversion. - */ - - if (skip == 2) - val = ((val << 8) | (src[1] & 0xff)); - - SSVAL(dst,ret,doscp_to_ucs2[val]); - ret += 2; - len -= 2; - if (skip) - src += skip; - else - src++; - } - if (null_terminate) { - SSVAL(dst,ret,0); - ret += 2; - } - return(ret); + return push_ucs2(NULL, dst, src, len, + STR_UNICODE|STR_NOALIGN | (null_terminate?STR_TERMINATE:0)); } -/******************************************************************* - Pull a DOS codepage string out of a UNICODE array. len is in bytes. -********************************************************************/ - -void unistr_to_dos(char *dest, const char *src, size_t len) -{ - char *destend = dest + len; - - while (dest < destend) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - src += 2; - - if (ucs2_val == 0) - break; - - if (cp_val < 256) - *dest++ = (char)cp_val; - else { - *dest++ = (cp_val >> 8) & 0xff; - *dest++ = (cp_val & 0xff); - } - } - - *dest = 0; -} /******************************************************************* Skip past a unicode string, but not more than len. Always move @@ -129,933 +73,61 @@ char *skip_unibuf(char *src, size_t len) return src; } -/******************************************************************* - Return a DOS codepage version of a little-endian unicode string. - len is the filename length (ignoring any terminating zero) in uin16 - units. Always null terminates. - Hack alert: uses fixed buffer(s). -********************************************************************/ - -char *dos_unistrn2(uint16 *src, int len) -{ - static char lbufs[8][MAXUNI]; - static int nexti; - char *lbuf = lbufs[nexti]; - char *p; - - nexti = (nexti+1)%8; - - for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p++ = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - - *p = 0; - return lbuf; -} - -static char lbufs[8][MAXUNI]; -static int nexti; - -/******************************************************************* - Return a DOS codepage version of a little-endian unicode string. - Hack alert: uses fixed buffer(s). -********************************************************************/ - -char *dos_unistr2(uint16 *src) -{ - char *lbuf = lbufs[nexti]; - char *p; - - nexti = (nexti+1)%8; - - for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p++ = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - - *p = 0; - return lbuf; -} - -/******************************************************************* -Return a DOS codepage version of a little-endian unicode string -********************************************************************/ - -char *dos_unistr2_to_str(UNISTR2 *str) +/* Copy a string from little-endian or big-endian unicode source (depending + * on flags) to internal samba format destination + */ +int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags) { - char *lbuf = lbufs[nexti]; - char *p; - uint16 *src = str->buffer; - - nexti = (nexti+1)%8; - - for (p = lbuf; (p - lbuf < MAXUNI-3) && (src - str->buffer < str->uni_str_len) && *src; src++) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p++ = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - - *p = 0; - return lbuf; + if(dest_len==-1) dest_len=MAXUNI-3; + return pull_ucs2(NULL, dest, src, dest_len, src_len, flags|STR_UNICODE|STR_NOALIGN); } -/******************************************************************* - Put an ASCII string into a UNICODE array (uint16's). - use little-endian ucs2 - ********************************************************************/ -void ascii_to_unistr(uint16 *dest, const char *src, int maxlen) +/* Converts a string from internal samba format to unicode + */ +int rpcstr_push(void* dest, const char *src, int dest_len, int flags) { - uint16 *destend = dest + maxlen; - char c; - - while (dest < destend) { - c = *(src++); - if (c == 0) - break; - - SSVAL(dest, 0, c); - dest++; - } - - *dest = 0; -} - -/******************************************************************* - Pull an ASCII string out of a UNICODE array (uint16's). - ********************************************************************/ - -void unistr_to_ascii(char *dest, const uint16 *src, int len) -{ - char *destend = dest + len; - uint16 c; - - if (src == NULL) { - *dest = '\0'; - return; - } - - /* normal code path for a valid 'src' */ - while (dest < destend) { - c = SVAL(src, 0); - src++; - if (c == 0) - break; - - *(dest++) = (char)c; - } - - *dest = 0; - return; -} - -/******************************************************************* - Convert a (little-endian) UNISTR2 structure to an ASCII string - Warning: this version does DOS codepage. -********************************************************************/ - -void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen) -{ - char *p; - uint16 *src; - size_t len; - - if (str == NULL) { - *dest='\0'; - return; - } - - src = str->buffer; - len = MIN(str->uni_str_len, maxlen); - - if (len == 0) { - *dest='\0'; - return; - } - - for (p = dest; (p-dest < len) && *src; src++) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - - *p = 0; -} - - -/******************************************************************* -Return a number stored in a buffer -********************************************************************/ - -uint32 buffer2_to_uint32(BUFFER2 *str) -{ - if (str->buf_len == 4) - return IVAL(str->buffer, 0); - else - return 0; -} - -/******************************************************************* -Return a DOS codepage version of a NOTunicode string -********************************************************************/ - -char *dos_buffer2_to_str(BUFFER2 *str) -{ - char *lbuf = lbufs[nexti]; - char *p; - uint16 *src = str->buffer; - - nexti = (nexti+1)%8; - - for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2) && *src; src++) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p++ = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - - *p = 0; - return lbuf; -} - -/******************************************************************* - Return a dos codepage version of a NOTunicode string -********************************************************************/ - -char *dos_buffer2_to_multistr(BUFFER2 *str) -{ - char *lbuf = lbufs[nexti]; - char *p; - uint16 *src = str->buffer; - - nexti = (nexti+1)%8; - - for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2); src++) { - if (*src == 0) { - *p++ = ' '; - } else { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p++ = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - } - - *p = 0; - return lbuf; -} - -/******************************************************************* - Create a null-terminated unicode string from a null-terminated DOS - codepage string. - Return number of unicode chars copied, excluding the null character. - Unicode strings created are in little-endian format. -********************************************************************/ - -size_t dos_struni2(char *dst, const char *src, size_t max_len) -{ - size_t len = 0; - - if (dst == NULL) - return 0; - - if (src != NULL) { - for (; (len < max_len-2) && *src; len++, dst +=2) { - size_t skip = get_character_len(*src); - smb_ucs2_t val = (*src & 0xff); - - /* - * If this is a multibyte character (and all DOS/Windows - * codepages have at maximum 2 byte multibyte characters) - * then work out the index value for the unicode conversion. - */ - - if (skip == 2) - val = ((val << 8) | (src[1] & 0xff)); - - SSVAL(dst,0,doscp_to_ucs2[val]); - if (skip) - src += skip; - else - src++; - } - } - - SSVAL(dst,0,0); - - return len; + return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN); } /******************************************************************* Return a DOS codepage version of a little-endian unicode string. - Hack alert: uses fixed buffer(s). -********************************************************************/ - -char *dos_unistr(char *buf) -{ - char *lbuf = lbufs[nexti]; - uint16 *src = (uint16 *)buf; - char *p; - - nexti = (nexti+1)%8; - - for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) { - uint16 ucs2_val = SVAL(src,0); - uint16 cp_val = ucs2_to_doscp[ucs2_val]; - - if (cp_val < 256) - *p++ = (char)cp_val; - else { - *p++ = (cp_val >> 8) & 0xff; - *p++ = (cp_val & 0xff); - } - } - - *p = 0; - return lbuf; -} - -/******************************************************************* - Strcpy for unicode strings. returns length (in num of wide chars) -********************************************************************/ - -int unistrcpy(char *dst, char *src) -{ - int num_wchars = 0; - uint16 *wsrc = (uint16 *)src; - uint16 *wdst = (uint16 *)dst; - - while (*wsrc) { - *wdst++ = *wsrc++; - num_wchars++; - } - *wdst = 0; - - return num_wchars; -} - -/******************************************************************* - Free any existing maps. -********************************************************************/ - -static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp) -{ - /* this handles identity mappings where we share the pointer */ - if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) { - *pp_ucs2_to_cp = NULL; - } - - if (*pp_cp_to_ucs2) { - free(*pp_cp_to_ucs2); - *pp_cp_to_ucs2 = NULL; - } - - if (*pp_ucs2_to_cp) { - free(*pp_ucs2_to_cp); - *pp_ucs2_to_cp = NULL; - } -} - -/******************************************************************* - Build a default (null) codepage to unicode map. -********************************************************************/ - -void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp) -{ - int i; - - free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp); - - if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) { - DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536)); - abort(); - } - - *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */ - for (i = 0; i < 65536; i++) - (*pp_cp_to_ucs2)[i] = i; -} - -/******************************************************************* - Load a codepage to unicode and vica-versa map. -********************************************************************/ - -BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp) -{ - pstring unicode_map_file_name; - FILE *fp = NULL; - SMB_STRUCT_STAT st; - smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2; - uint16 *ucs2_to_cp = *pp_ucs2_to_cp; - size_t cp_to_ucs2_size; - size_t ucs2_to_cp_size; - size_t i; - size_t size; - char buf[UNICODE_MAP_HEADER_SIZE]; - - DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage)); - - if (*codepage == '\0') - goto clean_and_exit; - - if(strlen(lp_codepagedir()) + 13 + strlen(codepage) > - sizeof(unicode_map_file_name)) { - DEBUG(0,("load_unicode_map: filename too long to load\n")); - goto clean_and_exit; - } - - pstrcpy(unicode_map_file_name, lp_codepagedir()); - pstrcat(unicode_map_file_name, "/"); - pstrcat(unicode_map_file_name, "unicode_map."); - pstrcat(unicode_map_file_name, codepage); - - if(sys_stat(unicode_map_file_name,&st)!=0) { - DEBUG(0,("load_unicode_map: filename %s does not exist.\n", - unicode_map_file_name)); - goto clean_and_exit; - } - - size = st.st_size; - - if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) { - DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \ -unicode map file (size=%d).\n", unicode_map_file_name, (int)size)); - goto clean_and_exit; - } - - if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) { - DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n", - unicode_map_file_name, strerror(errno))); - goto clean_and_exit; - } - - if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) { - DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n", - unicode_map_file_name, strerror(errno))); - goto clean_and_exit; - } - - /* Check the version value */ - if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) { - DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \ -Needed %hu, got %hu.\n", - unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID, - SVAL(buf,UNICODE_MAP_VERSION_OFFSET))); - goto clean_and_exit; - } - - /* Check the codepage value */ - if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) { - DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \ -requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage )); - goto clean_and_exit; - } - - ucs2_to_cp_size = 2*65536; - if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) { - /* - * This is a multibyte code page. - */ - cp_to_ucs2_size = 2*65536; - } else { - /* - * Single byte code page. - */ - cp_to_ucs2_size = 2*256; - } - - /* - * Free any old translation tables. - */ - - free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp); - - if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) { - DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size )); - goto clean_and_exit; - } - - if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) { - DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size )); - goto clean_and_exit; - } - - if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) { - DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n", - unicode_map_file_name, strerror(errno))); - goto clean_and_exit; - } - - if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) { - DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n", - unicode_map_file_name, strerror(errno))); - goto clean_and_exit; - } - - /* - * Now ensure the 16 bit values are in the correct endianness. - */ - - for (i = 0; i < cp_to_ucs2_size/2; i++) - cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2); - - for (i = 0; i < ucs2_to_cp_size/2; i++) - ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2); - - fclose(fp); - - *pp_cp_to_ucs2 = cp_to_ucs2; - *pp_ucs2_to_cp = ucs2_to_cp; - - return True; - -clean_and_exit: - - /* pseudo destructor :-) */ - - if(fp != NULL) - fclose(fp); - - free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp); - - default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp); - - return False; -} - -/******************************************************************* - Load a dos codepage to unicode and vica-versa map. -********************************************************************/ - -BOOL load_dos_unicode_map(int codepage) -{ - fstring codepage_str; - - slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage); - return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp); -} - -/******************************************************************* - Load a UNIX codepage to unicode and vica-versa map. -********************************************************************/ - -BOOL load_unix_unicode_map(const char *unix_char_set) -{ - fstring upper_unix_char_set; - - fstrcpy(upper_unix_char_set, unix_char_set); - strupper(upper_unix_char_set); - return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp); -} - -/******************************************************************* - The following functions reproduce many of the non-UNICODE standard - string functions in Samba. -********************************************************************/ - -/******************************************************************* - Convert a UNICODE string to multibyte format. Note that the 'src' is in - native byte order, not little endian. Always zero terminates. - dst_len is in bytes. -********************************************************************/ - -static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src, - size_t dst_len, const uint16 *ucs2_to_cp) -{ - size_t dst_pos; - - for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) { - smb_ucs2_t val = ucs2_to_cp[*src++]; - if(val < 256) { - dst[dst_pos++] = (char)val; - } else { - - if(dst_pos >= dst_len - 2) - break; - - /* - * A 2 byte value is always written as - * high/low into the buffer stream. - */ - - dst[dst_pos++] = (char)((val >> 8) & 0xff); - dst[dst_pos++] = (char)(val & 0xff); - } - } - - dst[dst_pos] = '\0'; - - return dst; -} - -/******************************************************************* - Convert a multibyte string to UNICODE format. Note that the 'dst' is in - native byte order, not little endian. Always zero terminates. - dst_len is in bytes. -********************************************************************/ - -smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src, - size_t dst_len, smb_ucs2_t *cp_to_ucs2) -{ - size_t i; - - dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */ - - for(i = 0; (i < (dst_len - 1)) && src[i];) { - size_t skip = skip_multibyte_char(*src); - smb_ucs2_t val = (*src & 0xff); - - /* - * If this is a multibyte character - * then work out the index value for the unicode conversion. - */ - - if (skip == 2) - val = ((val << 8) | (src[1] & 0xff)); - - dst[i++] = cp_to_ucs2[val]; - if (skip) - src += skip; - else - src++; - } - - dst[i] = 0; - - return dst; -} - -/******************************************************************* - Convert a UNICODE string to multibyte format. Note that the 'src' is in - native byte order, not little endian. Always zero terminates. - This function may be replaced if the MB codepage format is an - encoded one (ie. utf8, hex). See the code in lib/kanji.c - for details. dst_len is in bytes. -********************************************************************/ - -char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len) -{ - return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp); -} - -/******************************************************************* - Convert a UNIX string to UNICODE format. Note that the 'dst' is in - native byte order, not little endian. Always zero terminates. - This function may be replaced if the UNIX codepage format is a - multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c - for details. dst_len is in bytes, not ucs2 units. -********************************************************************/ - -smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len) -{ - return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2); -} - -/******************************************************************* - Convert a UNICODE string to DOS format. Note that the 'src' is in - native byte order, not little endian. Always zero terminates. - dst_len is in bytes. -********************************************************************/ - -char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len) -{ - return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp); -} - -/******************************************************************* - Convert a single UNICODE character to DOS codepage. Returns the - number of bytes in the DOS codepage character. -********************************************************************/ - -size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src) -{ - smb_ucs2_t val = ucs2_to_doscp[src]; - if(val < 256) { - *dst = (char)val; - return (size_t)1; - } - /* - * A 2 byte value is always written as - * high/low into the buffer stream. - */ - - dst[0] = (char)((val >> 8) & 0xff); - dst[1] = (char)(val & 0xff); - return (size_t)2; -} - -/******************************************************************* - Convert a DOS string to UNICODE format. Note that the 'dst' is in - native byte order, not little endian. Always zero terminates. - This function may be replaced if the DOS codepage format is a - multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c - for details. dst_len is in bytes, not ucs2 units. -********************************************************************/ - -smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len) -{ - return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2); -} - -/******************************************************************* - Count the number of characters in a smb_ucs2_t string. -********************************************************************/ - -size_t strlen_w(const smb_ucs2_t *src) -{ - size_t len; - - for(len = 0; *src++; len++) - ; - - return len; -} - -/******************************************************************* - Safe wstring copy into a known length string. maxlength includes - the terminating zero. maxlength is in ucs2 units. -********************************************************************/ - -smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength) -{ - size_t ucs2_len; - - if (!dest) { - DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n")); - return NULL; - } - - if (!src) { - *dest = 0; - return dest; - } - - maxlength /= sizeof(smb_ucs2_t); - - ucs2_len = strlen_w(src); - - if (ucs2_len >= maxlength) { - fstring out; - DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n", - (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)), - unicode_to_unix(out,src,sizeof(out))) ); - ucs2_len = maxlength - 1; - } - - memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t)); - dest[ucs2_len] = 0; - return dest; -} - -/******************************************************************* - Safe string cat into a string. maxlength includes the terminating zero. - maxlength is in ucs2 units. -********************************************************************/ - -smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength) -{ - size_t ucs2_src_len, ucs2_dest_len; - - if (!dest) { - DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n")); - return NULL; - } - - if (!src) - return dest; - - ucs2_src_len = strlen_w(src); - ucs2_dest_len = strlen_w(dest); - - if (ucs2_src_len + ucs2_dest_len >= maxlength) { - fstring out; - int new_len = maxlength - ucs2_dest_len - 1; - DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n", - (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)), - unicode_to_unix(out,src,sizeof(out))) ); - ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0); - } - - memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t)); - dest[ucs2_dest_len + ucs2_src_len] = 0; - return dest; -} - -/******************************************************************* - Compare the two strings s1 and s2. -********************************************************************/ - -int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2) -{ - smb_ucs2_t c1, c2; - - for (;;) { - c1 = *s1++; - c2 = *s2++; - - if (c1 != c2) - return c1 - c2; - - if (c1 == 0) - break; - } - return 0; -} - -/******************************************************************* - Compare the first n characters of s1 to s2. len is in ucs2 units. -********************************************************************/ - -int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len) -{ - smb_ucs2_t c1, c2; - - for (; len != 0; --len) { - c1 = *s1++; - c2 = *s2++; - - if (c1 != c2) - return c1 - c2; - - if (c1 == 0) - break; - - } - return 0; -} - -/******************************************************************* - Search string s2 from s1. -********************************************************************/ - -smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2) -{ - size_t len = strlen_w(s2); - - if (!*s2) - return (smb_ucs2_t *)s1; - - for(;*s1; s1++) { - if (*s1 == *s2) { - if (strncmp_w(s1, s2, len) == 0) - return (smb_ucs2_t *)s1; - } - } - return NULL; -} - -/******************************************************************* - Search for ucs2 char c from the beginning of s. -********************************************************************/ - -smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) -{ - do { - if (*s == c) - return (smb_ucs2_t *)s; - } while (*s++); - - return NULL; -} - -/******************************************************************* - Search for ucs2 char c from the end of s. -********************************************************************/ - -smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c) + len is the filename length (ignoring any terminating zero) in uin16 + units. Always null terminates. + Hack alert: uses fixed buffer(s). +********************************************************************/ +char *dos_unistrn2(uint16 *src, int len) { - smb_ucs2_t *retval = 0; - - do { - if (*s == c) - retval = (smb_ucs2_t *)s; - } while (*s++); - - return retval; + static char lbufs[8][MAXUNI]; + static int nexti; + char *lbuf = lbufs[nexti]; + nexti = (nexti+1)%8; + pull_ucs2(NULL, lbuf, src, MAXUNI-3, len*2, STR_NOALIGN); + return lbuf; } /******************************************************************* - Search token from s1 separated by any ucs2 char of s2. + Convert a (little-endian) UNISTR2 structure to an ASCII string ********************************************************************/ - -smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2) +void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen) { - static smb_ucs2_t *s = NULL; - smb_ucs2_t *q; - - if (!s1) { - if (!s) - return NULL; - s1 = s; - } - - for (q = s1; *s1; s1++) { - smb_ucs2_t *p = strchr_w(s2, *s1); - if (p) { - if (s1 != q) { - s = s1 + 1; - *s1 = '\0'; - return q; - } - q = s1 + 1; - } + if (str == NULL) { + *dest='\0'; + return; } - - s = NULL; - if (*q) - return q; - - return NULL; + pull_ucs2(NULL, dest, str->buffer, maxlen, str->uni_str_len, STR_NOALIGN); } + /******************************************************************* - Duplicate a ucs2 string. +Return a number stored in a buffer ********************************************************************/ -smb_ucs2_t *strdup_w(const smb_ucs2_t *s) +uint32 buffer2_to_uint32(BUFFER2 *str) { - size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t); - smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen); - if (newstr == NULL) - return NULL; - safe_strcpy_w(newstr, s, newlen); - return newstr; + if (str->buf_len == 4) + return IVAL(str->buffer, 0); + else + return 0; } /******************************************************************* @@ -1124,33 +196,6 @@ int islower_w( smb_ucs2_t val) return (map_table_flags(val) & UNI_LOWER); } -/******************************************************************* - Is a digit wchar. -********************************************************************/ - -int isdigit_w( smb_ucs2_t val) -{ - return (map_table_flags(val) & UNI_DIGIT); -} - -/******************************************************************* - Is a hex digit wchar. -********************************************************************/ - -int isxdigit_w( smb_ucs2_t val) -{ - return (map_table_flags(val) & UNI_XDIGIT); -} - -/******************************************************************* - Is a space wchar. -********************************************************************/ - -int isspace_w( smb_ucs2_t val) -{ - return (map_table_flags(val) & UNI_SPACE); -} - /******************************************************************* Convert a wchar to upper case. ********************************************************************/ @@ -1170,102 +215,33 @@ smb_ucs2_t tolower_w( smb_ucs2_t val ) } /******************************************************************* - Case insensitive string compararison. + Count the number of characters in a smb_ucs2_t string. ********************************************************************/ - -int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t) +size_t strlen_w(const smb_ucs2_t *src) { - /* - * Compare until we run out of string, either t or s, or find a difference. - */ + size_t len; - while (*s && *t && toupper_w(*s) == toupper_w(*t)) { - s++; - t++; - } + for(len = 0; *src++; len++) ; - return(toupper_w(*s) - toupper_w(*t)); + return len; } /******************************************************************* - Case insensitive string compararison, length limited. - n is in ucs2 units. +wide strchr() ********************************************************************/ - -int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n) +smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) { - /* - * Compare until we run out of string, either t or s, or chars. - */ - - while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) { + while (*s != 0) { + if (c == *s) return (smb_ucs2_t *)s; s++; - t++; - n--; } - - /* - * Not run out of chars - strings are different lengths. - */ - - if (n) - return(toupper_w(*s) - toupper_w(*t)); - - /* - * Identical up to where we run out of chars, - * and strings are same length. - */ - - return(0); -} - -/******************************************************************* - Compare 2 strings. -********************************************************************/ - -BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2) -{ - if (s1 == s2) - return(True); - if (!s1 || !s2) - return(False); - - return(StrCaseCmp_w(s1,s2)==0); -} - -/******************************************************************* - Compare 2 strings up to and including the nth char. n is in ucs2 - units. -******************************************************************/ - -BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n) -{ - if (s1 == s2) - return(True); - if (!s1 || !s2 || !n) - return(False); - - return(StrnCaseCmp_w(s1,s2,n)==0); + return NULL; } -/******************************************************************* - Compare 2 strings (case sensitive). -********************************************************************/ - -BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2) -{ - if (s1 == s2) - return(True); - if (!s1 || !s2) - return(False); - - return(strcmp_w(s1,s2)==0); -} /******************************************************************* Convert a string to lower case. ********************************************************************/ - void strlower_w(smb_ucs2_t *s) { while (*s) { @@ -1278,7 +254,6 @@ void strlower_w(smb_ucs2_t *s) /******************************************************************* Convert a string to upper case. ********************************************************************/ - void strupper_w(smb_ucs2_t *s) { while (*s) { @@ -1289,547 +264,70 @@ void strupper_w(smb_ucs2_t *s) } /******************************************************************* - Convert a string to "normal" form. -********************************************************************/ - -void strnorm_w(smb_ucs2_t *s) -{ - extern int case_default; - if (case_default == CASE_UPPER) - strupper_w(s); - else - strlower_w(s); -} - -/******************************************************************* - Check if a string is in "normal" case. +case insensitive string comparison ********************************************************************/ - -BOOL strisnormal_w(smb_ucs2_t *s) -{ - extern int case_default; - if (case_default == CASE_UPPER) - return(!strhaslower_w(s)); - - return(!strhasupper_w(s)); -} - -/**************************************************************************** - String replace. -****************************************************************************/ - -void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc) +int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) { - while (*s) { - if (oldc == *s) - *s = newc; - s++; - } + while (*b && tolower_w(*a) == tolower_w(*b)) { a++; b++; } + return (tolower_w(*a) - tolower_w(*b)); } -/******************************************************************* - Skip past some strings in a buffer. n is in bytes. -********************************************************************/ - -smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n) -{ - while (n--) - buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1; - return(buf); -} -/******************************************************************* - Count the number of characters in a string. Same as strlen_w in - smb_ucs2_t string units. -********************************************************************/ +/* + The *_wa() functions take a combination of 7 bit ascii + and wide characters They are used so that you can use string + functions combining C string constants with ucs2 strings -size_t str_charnum_w(const smb_ucs2_t *s) -{ - return strlen_w(s); -} + The char* arguments must NOT be multibyte - to be completely sure + of this only pass string constants */ -/******************************************************************* - Trim the specified elements off the front and back of a string. -********************************************************************/ -BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back) +void pstrcpy_wa(smb_ucs2_t *dest, const char *src) { - BOOL ret = False; - size_t front_len = (front && *front) ? strlen_w(front) : 0; - size_t back_len = (back && *back) ? strlen_w(back) : 0; - size_t s_len; - - while (front_len && strncmp_w(s, front, front_len) == 0) { - smb_ucs2_t *p = s; - ret = True; - - while (1) { - if (!(*p = p[front_len])) - break; - p++; - } - } - - if(back_len) { - s_len = strlen_w(s); - while ((s_len >= back_len) && - (strncmp_w(s + s_len - back_len, back, back_len)==0)) { - ret = True; - s[s_len - back_len] = 0; - s_len = strlen_w(s); - } + int i; + for (i=0;i= maxlength) - len = maxlength - 1; - - if (!other_safe_chars) - other_safe_chars = &nullstr_w; - - for(i = 0; i < len; i++) { - smb_ucs2_t val = src[i]; - if(isupper_w(val) ||islower_w(val) || isdigit_w(val) || strchr_w(other_safe_chars, val)) - dest[i] = src[i]; - else - dest[i] = (smb_ucs2_t)'_'; - } - - dest[i] = 0; - - return dest; -} - -/**************************************************************************** - Like strncpy but always null terminates. Make sure there is room ! - The variable n should always be one less than the available size and is in - ucs2 units. -****************************************************************************/ - -smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n) -{ - smb_ucs2_t *d = dest; - if (!dest) - return(NULL); - if (!src) { - *dest = 0; - return(dest); - } - - while (n-- && (*d++ = *src++)) - ; - *d = 0; - return(dest); -} - -/**************************************************************************** - Like strncpy but copies up to the character marker. Always null terminates. - returns a pointer to the character marker in the source string (src). - n is in ucs2 units. -****************************************************************************/ - -smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c) -{ - smb_ucs2_t *p; - size_t str_len; - - p = strchr_w(src, c); - if (p == NULL) { - fstring cval; - smb_ucs2_t mbcval[2]; - mbcval[0] = c; - mbcval[1] = 0; - DEBUG(5, ("strncpyn_w: separator character (%s) not found\n", - unicode_to_unix(cval,mbcval,sizeof(cval)) )); - return NULL; - } - - str_len = PTR_DIFF(p, src) + 1; - safe_strcpy_w(dest, src, MIN(n, str_len)); - - return p; -} - -/************************************************************* - Routine to get hex characters and turn them into a 16 byte array. - The array can be variable length, and any non-hex-numeric - characters are skipped. "0xnn" or "0Xnn" is specially catered - for. len is in bytes. - Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n" -**************************************************************/ - -static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 }; -static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3', - (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7', - (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B', - (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 }; - -size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex) -{ - size_t i; - size_t num_chars = 0; - unsigned char lonybble, hinybble; - smb_ucs2_t *p1 = NULL, *p2 = NULL; - - /* - * Convert to smb_ucs2_t units. - */ - - len /= sizeof(smb_ucs2_t); - - for (i = 0; i < len && strhex[i] != 0; i++) { - if (strnequal_w(hexchars, hexprefix, 2)) { - i++; /* skip two chars */ - continue; - } - - if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i])))) - break; - - i++; /* next hex digit */ - - if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i])))) - break; - - /* get the two nybbles */ - hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t)); - lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t)); - - p[num_chars] = (hinybble << 4) | lonybble; - num_chars++; - - p1 = NULL; - p2 = NULL; - } - return num_chars; -} - - -/* This is used to prevent lots of mallocs of size 2 */ -static smb_ucs2_t *null_string = NULL; - -/**************************************************************************** - Set a string value, allocing the space for the string. -****************************************************************************/ - -BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src) -{ - size_t l; - - if (!null_string) { - if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) { - DEBUG(0,("string_init_w: malloc fail for null_string.\n")); - return False; - } - *null_string = 0; - } - - if (!src) - src = null_string; - - l = strlen_w(src); - - if (l == 0) - *dest = null_string; - else { - (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1)); - if ((*dest) == NULL) { - DEBUG(0,("Out of memory in string_init_w\n")); - return False; - } - - wpstrcpy(*dest,src); - } - return(True); -} - -/**************************************************************************** - Free a string value. -****************************************************************************/ - -void string_free_w(smb_ucs2_t **s) -{ - if (!s || !(*s)) - return; - if (*s == null_string) - *s = NULL; - if (*s) - free((char *)*s); - *s = NULL; -} - -/**************************************************************************** - Set a string value, allocing the space for the string, and deallocating any - existing space. -****************************************************************************/ - -BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src) -{ - string_free_w(dest); - - return(string_init_w(dest,src)); -} - -/**************************************************************************** - Substitute a string for a pattern in another string. Make sure there is - enough room ! - - This routine looks for pattern in s and replaces it with - insert. It may do multiple replacements. - - Any of " ; ' $ or ` in the insert string are replaced with _ - if len==0 then no length check is performed - len is in ucs2 units. -****************************************************************************/ - -void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len) -{ - smb_ucs2_t *p; - ssize_t ls,lp,li, i; - - if (!insert || !pattern || !s) - return; - - ls = (ssize_t)strlen_w(s); - lp = (ssize_t)strlen_w(pattern); - li = (ssize_t)strlen_w(insert); - - if (!*pattern) - return; - - while (lp <= ls && (p = strstr_w(s,pattern))) { - if (len && (ls + (li-lp) >= len)) { - fstring out; - DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n", - (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)), - unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t))); - break; - } - if (li != lp) - memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1)); - - for (i=0;i= len)) { - fstring out; - DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n", - (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)), - unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t))); - break; - } - if (li != lp) - memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1)); - - memcpy(p, insert, li*sizeof(smb_ucs2_t)); - s = p + li; - ls += (li-lp); - } -} - -/**************************************************************************** - Splits out the front and back at a separator. -****************************************************************************/ - -void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back) -{ - smb_ucs2_t *p = strrchr_w(path, sep); - - if (p != NULL) - *p = 0; - - if (front != NULL) - wpstrcpy(front, path); - - if (p != NULL) { - if (back != NULL) - wpstrcpy(back, p+1); - *p = (smb_ucs2_t)'\\'; - } else { - if (back != NULL) - back[0] = 0; - } -} - - -/**************************************************************************** - Write an octal as a string. -****************************************************************************/ - -smb_ucs2_t *octal_string_w(int i) -{ - static smb_ucs2_t wret[64]; - char ret[64]; - - if (i == -1) - slprintf(ret, sizeof(ret)-1, "-1"); - else - slprintf(ret, sizeof(ret)-1, "0%o", i); - return unix_to_unicode(wret, ret, sizeof(wret)); -} - - -/**************************************************************************** - Truncate a string at a specified length. - length is in ucs2 units. -****************************************************************************/ - -smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length) -{ - if (s && strlen_w(s) > length) - s[length] = 0; - - return s; -} - -/****************************************************************** - functions for UTF8 support (using in kanji.c) - ******************************************************************/ -smb_ucs2_t doscp2ucs2(int w) -{ - return ((smb_ucs2_t)doscp_to_ucs2[w]); -} - -int ucs2doscp(smb_ucs2_t w) -{ - return ((int)ucs2_to_doscp[w]); + return NULL; } -- cgit