summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source3/Makefile.in2
-rw-r--r--source3/lib/charcnv.c1
-rw-r--r--source3/lib/util_unistr.c52
-rw-r--r--source3/torture/t_doschar.c42
4 files changed, 96 insertions, 1 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in
index ff1cf92f83..9910f0feb5 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -1735,6 +1735,8 @@ bin/t_strappend@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ tortur
bin/t_stringoverflow@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ torture/t_stringoverflow.o
$(CC) $(FLAGS) -o $@ $(DYNEXP) torture/t_stringoverflow.o -L./bin -lbigballofmud
+bin/t_doschar@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ torture/t_doschar.o
+ $(CC) $(FLAGS) -o $@ $(DYNEXP) $(LIBS) torture/t_doschar.o -L ./bin -lbigballofmud
bin/t_push_ucs2@EXEEXT@: $(BINARY_PREREQS) bin/libbigballofmud.@SHLIBEXT@ torture/t_push_ucs2.o
$(CC) $(FLAGS) -o $@ $(DYNEXP) $(LIBS) torture/t_push_ucs2.o -L ./bin -lbigballofmud
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index 7be073fafc..35343b2f0a 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -179,6 +179,7 @@ void init_iconv(void)
* codepage changes? */
/* XXX: Is the did_reload test too strict? */
conv_silent = True;
+ init_doschar_table();
init_valid_table();
conv_silent = False;
}
diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 0221d19dc1..cf040a2dfc 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -36,6 +36,14 @@ static BOOL lowcase_table_use_unmap;
static BOOL valid_table_use_unmap;
/**
+ * This table says which Unicode characters are valid dos
+ * characters.
+ *
+ * Each value is just a single bit.
+ **/
+static uint8 doschar_table[8192]; /* 65536 characters / 8 bits/byte */
+
+/**
* Destroy global objects allocated by load_case_tables()
**/
void gfree_case_tables(void)
@@ -142,6 +150,21 @@ void load_case_tables(void)
#endif
}
+/*
+ see if a ucs2 character can be mapped correctly to a dos character
+ and mapped back to the same character in ucs2
+*/
+
+int check_dos_char(smb_ucs2_t c)
+{
+ lazy_initialize_conv();
+
+ /* Find the right byte, and right bit within the byte; return
+ * 1 or 0 */
+ return (doschar_table[(c & 0xffff) / 8] & (1 << (c & 7))) != 0;
+}
+
+
static int check_dos_char_slowly(smb_ucs2_t c)
{
char buf[10];
@@ -159,6 +182,33 @@ static int check_dos_char_slowly(smb_ucs2_t c)
return (c == c2);
}
+
+/**
+ * Fill out doschar table the hard way, by examining each character
+ **/
+
+void init_doschar_table(void)
+{
+ int i, j, byteval;
+
+ /* For each byte of packed table */
+
+ for (i = 0; i <= 0xffff; i += 8) {
+ byteval = 0;
+ for (j = 0; j <= 7; j++) {
+ smb_ucs2_t c;
+
+ c = i + j;
+
+ if (check_dos_char_slowly(c)) {
+ byteval |= 1 << j;
+ }
+ }
+ doschar_table[i/8] = byteval;
+ }
+}
+
+
/**
* Load the valid character map table from <tt>valid.dat</tt> or
* create from the configured codepage.
@@ -207,7 +257,7 @@ void init_valid_table(void)
for (;i<0x10000;i++) {
smb_ucs2_t c;
SSVAL(&c, 0, i);
- valid_table[i] = check_dos_char_slowly(c);
+ valid_table[i] = check_dos_char(c);
}
}
diff --git a/source3/torture/t_doschar.c b/source3/torture/t_doschar.c
new file mode 100644
index 0000000000..41698350d6
--- /dev/null
+++ b/source3/torture/t_doschar.c
@@ -0,0 +1,42 @@
+/*
+ Samba - Unix SMB/CIFS implementation
+ Test harness for check_dos_char
+ Copyright (C) Martin Pool 2003
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+
+/*
+ * Just print out DOS validity or not for every character.
+ *
+ * DOS validity for a Unicode character set means that it can be
+ * represented in DOS codepage, and that the DOS character maps back
+ * to the same Unicode character.
+ *
+ * This depends on which DOS codepage is configured.
+ */
+ int main(void)
+{
+ smb_ucs2_t i;
+
+ for (i = 0; i < 0xffff; i++) {
+ printf("%d %d\n", (int) i, (int) check_dos_char(i));
+ }
+
+ return 0;
+}