diff options
-rw-r--r-- | source3/Makefile.in | 2 | ||||
-rw-r--r-- | source3/lib/charcnv.c | 1 | ||||
-rw-r--r-- | source3/lib/util_unistr.c | 52 | ||||
-rw-r--r-- | source3/torture/t_doschar.c | 42 |
4 files changed, 96 insertions, 1 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in index ff1cf92f83..9910f0feb5 100644 --- a/source3/Makefile.in +++ b/source3/Makefile.in @@ -1735,6 +1735,8 @@ bin/t_strappend@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ tortur bin/t_stringoverflow@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ torture/t_stringoverflow.o $(CC) $(FLAGS) -o $@ $(DYNEXP) torture/t_stringoverflow.o -L./bin -lbigballofmud +bin/t_doschar@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ torture/t_doschar.o + $(CC) $(FLAGS) -o $@ $(DYNEXP) $(LIBS) torture/t_doschar.o -L ./bin -lbigballofmud bin/t_push_ucs2@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ torture/t_push_ucs2.o $(CC) $(FLAGS) -o $@ $(DYNEXP) $(LIBS) torture/t_push_ucs2.o -L ./bin -lbigballofmud diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 7be073fafc..35343b2f0a 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -179,6 +179,7 @@ void init_iconv(void) * codepage changes? */ /* XXX: Is the did_reload test too strict? */ conv_silent = True; + init_doschar_table(); init_valid_table(); conv_silent = False; } diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c index 0221d19dc1..cf040a2dfc 100644 --- a/source3/lib/util_unistr.c +++ b/source3/lib/util_unistr.c @@ -36,6 +36,14 @@ static BOOL lowcase_table_use_unmap; static BOOL valid_table_use_unmap; /** + * This table says which Unicode characters are valid dos + * characters. + * + * Each value is just a single bit. + **/ +static uint8 doschar_table[8192]; /* 65536 characters / 8 bits/byte */ + +/** * Destroy global objects allocated by load_case_tables() **/ void gfree_case_tables(void) @@ -142,6 +150,21 @@ void load_case_tables(void) #endif } +/* + see if a ucs2 character can be mapped correctly to a dos character + and mapped back to the same character in ucs2 +*/ + +int check_dos_char(smb_ucs2_t c) +{ + lazy_initialize_conv(); + + /* Find the right byte, and right bit within the byte; return + * 1 or 0 */ + return (doschar_table[(c & 0xffff) / 8] & (1 << (c & 7))) != 0; +} + + static int check_dos_char_slowly(smb_ucs2_t c) { char buf[10]; @@ -159,6 +182,33 @@ static int check_dos_char_slowly(smb_ucs2_t c) return (c == c2); } + +/** + * Fill out doschar table the hard way, by examining each character + **/ + +void init_doschar_table(void) +{ + int i, j, byteval; + + /* For each byte of packed table */ + + for (i = 0; i <= 0xffff; i += 8) { + byteval = 0; + for (j = 0; j <= 7; j++) { + smb_ucs2_t c; + + c = i + j; + + if (check_dos_char_slowly(c)) { + byteval |= 1 << j; + } + } + doschar_table[i/8] = byteval; + } +} + + /** * Load the valid character map table from <tt>valid.dat</tt> or * create from the configured codepage. @@ -207,7 +257,7 @@ void init_valid_table(void) for (;i<0x10000;i++) { smb_ucs2_t c; SSVAL(&c, 0, i); - valid_table[i] = check_dos_char_slowly(c); + valid_table[i] = check_dos_char(c); } } diff --git a/source3/torture/t_doschar.c b/source3/torture/t_doschar.c new file mode 100644 index 0000000000..41698350d6 --- /dev/null +++ b/source3/torture/t_doschar.c @@ -0,0 +1,42 @@ +/* + Samba - Unix SMB/CIFS implementation + Test harness for check_dos_char + Copyright (C) Martin Pool 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "includes.h" + + +/* + * Just print out DOS validity or not for every character. + * + * DOS validity for a Unicode character set means that it can be + * represented in DOS codepage, and that the DOS character maps back + * to the same Unicode character. + * + * This depends on which DOS codepage is configured. + */ + int main(void) +{ + smb_ucs2_t i; + + for (i = 0; i < 0xffff; i++) { + printf("%d %d\n", (int) i, (int) check_dos_char(i)); + } + + return 0; +} |