summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2001-10-03 12:18:20 +0000
committerAndrew Tridgell <tridge@samba.org>2001-10-03 12:18:20 +0000
commit9bcd133e9e7b0cfe974f273fb23409d660af8358 (patch)
treeaed9e919a60602c2a7d9826038a990e51be949cf
parent5b24e783dd60b01e2cef1e47cc4b181e7cf2bc38 (diff)
downloadsamba-9bcd133e9e7b0cfe974f273fb23409d660af8358.tar.gz
samba-9bcd133e9e7b0cfe974f273fb23409d660af8358.tar.bz2
samba-9bcd133e9e7b0cfe974f273fb23409d660af8358.zip
switched over to a new method of handling uppercase/lowercase mappings
for unicode strings. The new method relies on 3 files that are mmap'd at startup to provide the mapping tables. The upcase.dat and lowcase.dat tables should be the same on all systems. The valid.dat table says what characters are valid in 8.3 names, and differs between systems. I'm committing the japanese valid.dat here, in future we need some way of automatically installing and choosing a appropriate table. This commit also adds my mini tdb based gettext replacement in intl/lang_tdb.c. I have not enabled this yet and have not removed the old gettext code as the new code is still being looked at by Monyo. Right now the code assumes that the upcase.dat, lowcase.dat and valid.dat files are installed in the Samba lib directory. That is not a good choice, but I'll leave them there until we work out the new install directory structure for Samba 3.0. simo - please look at the isvalid_w() function and think about using it in your new mangling code. That should be the final step to correctly passing the chargen test code from monyo. (This used to be commit 1c221994f118dd542a158b2db51e07d04d0e9314)
-rw-r--r--source3/Makefile.in5
-rw-r--r--source3/codepages/lowcase.datbin0 -> 131072 bytes
-rw-r--r--source3/codepages/upcase.datbin0 -> 131072 bytes
-rw-r--r--source3/codepages/valid.datbin0 -> 65536 bytes
-rw-r--r--source3/intl/lang_tdb.c181
-rw-r--r--source3/lib/charcnv.c1
-rw-r--r--source3/lib/util.c10
-rw-r--r--source3/lib/util_file.c35
-rw-r--r--source3/lib/util_str.c12
-rw-r--r--source3/lib/util_unistr.c139
-rw-r--r--source3/smbd/server.c2
-rw-r--r--source3/torture/utable.c20
12 files changed, 326 insertions, 79 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in
index b2c12b97fa..a5951372a4 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -72,7 +72,7 @@ PASSWD_FLAGS = -DSMB_PASSWD_FILE=\"$(SMB_PASSWD_FILE)\" -DPRIVATE_DIR=\"$(PRIVAT
FLAGS1 = $(CFLAGS) -Iinclude -I$(srcdir)/include -I$(srcdir)/ubiqx -I$(srcdir)/smbwrapper $(CPPFLAGS) -DLOGFILEBASE=\"$(LOGFILEBASE)\"
FLAGS2 = -DCONFIGFILE=\"$(CONFIGFILE)\" -DLMHOSTSFILE=\"$(LMHOSTSFILE)\"
FLAGS3 = -DSWATDIR=\"$(SWATDIR)\" -DSBINDIR=\"$(SBINDIR)\" -DLOCKDIR=\"$(LOCKDIR)\"
-FLAGS4 = -DDRIVERFILE=\"$(DRIVERFILE)\" -DBINDIR=\"$(BINDIR)\" -DFORMSFILE=\"$(FORMSFILE)\" -DNTDRIVERSDIR=\"$(NTDRIVERSDIR)\"
+FLAGS4 = -DDRIVERFILE=\"$(DRIVERFILE)\" -DBINDIR=\"$(BINDIR)\" -DFORMSFILE=\"$(FORMSFILE)\" -DNTDRIVERSDIR=\"$(NTDRIVERSDIR)\" -DLIBDIR=\"$(LIBDIR)\"
FLAGS5 = $(FLAGS1) $(FLAGS2) $(FLAGS3) $(FLAGS4) -DHAVE_INCLUDES_H -DI18N_PACKAGE=\"$(I18N_PACKAGE)\" -DI18N_LOCALEDIR=\"$(i18n_localedir)\" -Iintl -I$(srcdir)/intl
FLAGS = $(ISA) $(FLAGS5) $(PASSWD_FLAGS)
FLAGS32 = $(ISA32) $(FLAGS5) $(PASSWD_FLAGS)
@@ -114,7 +114,8 @@ LIB_OBJ = lib/charcnv.o lib/debug.o lib/fault.o \
lib/talloc.o lib/hash.o lib/substitute.o lib/fsusage.o \
lib/ms_fnmatch.o lib/select.o lib/error.o lib/messages.o \
lib/md5.o lib/hmacmd5.o lib/iconv.o lib/smbpasswd.o \
- nsswitch/wb_client.o nsswitch/wb_common.o $(TDB_OBJ)
+ nsswitch/wb_client.o nsswitch/wb_common.o \
+ intl/lang_tdb.o $(TDB_OBJ)
READLINE_OBJ = lib/readline.o
diff --git a/source3/codepages/lowcase.dat b/source3/codepages/lowcase.dat
new file mode 100644
index 0000000000..62b6e2e952
--- /dev/null
+++ b/source3/codepages/lowcase.dat
Binary files differ
diff --git a/source3/codepages/upcase.dat b/source3/codepages/upcase.dat
new file mode 100644
index 0000000000..bb6f9beb4e
--- /dev/null
+++ b/source3/codepages/upcase.dat
Binary files differ
diff --git a/source3/codepages/valid.dat b/source3/codepages/valid.dat
new file mode 100644
index 0000000000..78c14b33f0
--- /dev/null
+++ b/source3/codepages/valid.dat
Binary files differ
diff --git a/source3/intl/lang_tdb.c b/source3/intl/lang_tdb.c
new file mode 100644
index 0000000000..52a84d59a2
--- /dev/null
+++ b/source3/intl/lang_tdb.c
@@ -0,0 +1,181 @@
+/*
+ Unix SMB/Netbios implementation.
+ Version 3.0
+ tdb based replacement for gettext
+ Copyright (C) Andrew Tridgell 2001
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+static TDB_CONTEXT *tdb;
+
+/* load a po file into the tdb */
+static BOOL load_po(const char *po_file)
+{
+ char **lines;
+ int num_lines, i;
+ char *msgid, *msgstr;
+ TDB_DATA key, data;
+
+ lines = file_lines_load(po_file, &num_lines);
+
+ if (!lines) {
+ return False;
+ }
+
+ if (tdb_lockall(tdb) != 0) return False;
+
+ /* wipe the db */
+ tdb_traverse(tdb, (tdb_traverse_func) tdb_delete, NULL);
+
+ for (i=0;i<num_lines;i++) {
+ if (strncmp(lines[i], "msgid \"", 7) == 0) {
+ msgid = lines[i] + 7;
+ }
+ if (strncmp(lines[i], "msgstr \"", 8) == 0) {
+ msgstr = lines[i] + 8;
+ trim_string(msgid, NULL, "\"");
+ trim_string(msgstr, NULL, "\"");
+ if (*msgstr == 0) {
+ msgstr = msgid;
+ }
+ key.dptr = msgid;
+ key.dsize = strlen(msgid)+1;
+ data.dptr = msgstr;
+ data.dsize = strlen(msgstr)+1;
+ tdb_store(tdb, key, data, 0);
+ }
+ }
+
+ file_lines_free(lines);
+ tdb_unlockall(tdb);
+
+ return True;
+}
+
+
+/* work out what language to use from locale variables */
+static char *get_lang(void)
+{
+ char *vars[] = {"LANGUAGE", "LC_ALL", "LC_LANG", "LANG", NULL};
+ int i;
+ char *p;
+
+ for (i=0; vars[i]; i++) {
+ if ((p = getenv(vars[i]))) {
+ return p;
+ }
+ }
+
+ return NULL;
+}
+
+/* initialise the message translation subsystem */
+void lang_tdb_init(void)
+{
+ char *lang;
+ char *path = NULL;
+ struct stat st;
+ static int initialised;
+ time_t loadtime;
+
+ /* we only want to init once per process */
+ if (initialised) return;
+ initialised = 1;
+
+ lang = get_lang();
+
+ /* if no lang then we don't translate */
+ if (!lang) return;
+
+ asprintf(&path, "%s%s.tdb", lock_path("lang_"), lang);
+
+ tdb = tdb_open_log(path, 0, TDB_DEFAULT, O_RDWR|O_CREAT, 0644);
+ if (!tdb) {
+ tdb = tdb_open_log(path, 0, TDB_DEFAULT, O_RDONLY, 0);
+ free(path);
+ return;
+ }
+
+ free(path);
+
+ asprintf(&path, "%s.po", lock_path(lang));
+
+ loadtime = tdb_fetch_int(tdb, "/LOADTIME/");
+
+ if (stat(path, &st) == 0 && (loadtime == -1 || loadtime < st.st_mtime)) {
+ load_po(path);
+ tdb_store_int(tdb, "/LOADTIME/", (int)time(NULL));
+ }
+ free(path);
+}
+
+/* translate a msgid to a message string in the current language
+ returns a string that must be freed by calling lang_msg_free()
+*/
+const char *lang_msg(const char *msgid)
+{
+ TDB_DATA key, data;
+
+ lang_tdb_init();
+
+ if (!tdb) return msgid;
+
+ key.dptr = (char *)msgid;
+ key.dsize = strlen(msgid)+1;
+
+ data = tdb_fetch(tdb, key);
+
+ /* if the message isn't found then we still need to return a pointer
+ that can be freed. Pity. */
+ if (!data.dptr) return strdup(msgid);
+
+ return (const char *)data.dptr;
+}
+
+
+/* free up a string from lang_msg() */
+void lang_msg_free(const char *msgstr)
+{
+ if (!tdb) return;
+ free((void *)msgstr);
+}
+
+
+/*
+ when the _() translation macro is used there is no obvious place to free
+ the resulting string and there is no easy way to give a static pointer.
+ All we can do is rotate between some static buffers and hope a single d_printf()
+ doesn't have more calls to _() than the number of buffers
+*/
+const char *lang_msg_rotate(const char *msgid)
+{
+#define NUM_LANG_BUFS 4
+ char *msgstr;
+ static pstring bufs[NUM_LANG_BUFS];
+ static int next;
+
+ msgstr = lang_msg(msgid);
+ if (!msgstr) return msgid;
+
+ pstrcpy(bufs[next], msgstr);
+ msgstr = bufs[next];
+
+ next = (next+1) % NUM_LANG_BUFS;
+
+ return msgstr;
+}
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index 45d7d4d8d5..b7af6fef4b 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -101,6 +101,7 @@ size_t convert_string(charset_t from, charset_t to,
if (!initialised) {
initialised = 1;
+ load_case_tables();
init_iconv();
}
diff --git a/source3/lib/util.c b/source3/lib/util.c
index ce39bb3b1d..62e08333dd 100644
--- a/source3/lib/util.c
+++ b/source3/lib/util.c
@@ -1687,6 +1687,16 @@ char *lock_path(char *name)
return fname;
}
+/*****************************************************************
+a useful function for returning a path in the Samba lib directory
+ *****************************************************************/
+char *lib_path(char *name)
+{
+ static pstring fname;
+ snprintf(fname, sizeof(fname), "%s/%s", LIBDIR, name);
+ return fname;
+}
+
/*******************************************************************
Given a filename - get its directory name
NB: Returned in static storage. Caveats:
diff --git a/source3/lib/util_file.c b/source3/lib/util_file.c
index 3d072bb170..77c0d7888e 100644
--- a/source3/lib/util_file.c
+++ b/source3/lib/util_file.c
@@ -422,6 +422,41 @@ char *file_load(const char *fname, size_t *size)
}
+/*******************************************************************
+mmap (if possible) or read a file
+********************************************************************/
+void *map_file(char *fname, size_t size)
+{
+ size_t s2 = 0;
+ void *p = NULL;
+#ifdef HAVE_MMAP
+ int fd;
+ fd = open(fname, O_RDONLY, 0);
+ if (fd == -1) {
+ DEBUG(1,("Failed to load %s - %s\n", fname, strerror(errno)));
+ return NULL;
+ }
+ p = mmap(NULL, size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
+ close(fd);
+ if (p == MAP_FAILED) {
+ DEBUG(1,("Failed to mmap %s - %s\n", fname, strerror(errno)));
+ return NULL;
+ }
+#endif
+ if (!p) {
+ p = file_load(fname, &s2);
+ if (!p || s2 != size) {
+ DEBUG(1,("incorrect size for %s - got %d expected %d\n",
+ fname, s2, size));
+ if (p) free(p);
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+
/****************************************************************************
parse a buffer into lines
****************************************************************************/
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 0e7a7c02f5..3a77098e09 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -132,8 +132,8 @@ char **toktocliplist(int *ctok, char *sep)
int StrCaseCmp(const char *s, const char *t)
{
pstring buf1, buf2;
- unix_strlower(s, strlen(s)+1, buf1, sizeof(buf1));
- unix_strlower(t, strlen(t)+1, buf2, sizeof(buf2));
+ unix_strupper(s, strlen(s)+1, buf1, sizeof(buf1));
+ unix_strupper(t, strlen(t)+1, buf2, sizeof(buf2));
return strcmp(buf1,buf2);
}
@@ -142,10 +142,10 @@ int StrCaseCmp(const char *s, const char *t)
********************************************************************/
int StrnCaseCmp(const char *s, const char *t, size_t n)
{
- pstring buf1, buf2;
- unix_strlower(s, strlen(s)+1, buf1, sizeof(buf1));
- unix_strlower(t, strlen(t)+1, buf2, sizeof(buf2));
- return strncmp(buf1,buf2,n);
+ pstring buf1, buf2;
+ unix_strupper(s, strlen(s)+1, buf1, sizeof(buf1));
+ unix_strupper(t, strlen(t)+1, buf2, sizeof(buf2));
+ return strncmp(buf1,buf2,n);
}
/*******************************************************************
diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 1054eab6e1..a0e1b88eb8 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -25,6 +25,53 @@
#define MAXUNI 1024
#endif
+/* these 3 tables define the unicode case handling. They are loaded
+ at startup either via mmap() or read() from the lib directory */
+static smb_ucs2_t *upcase_table;
+static smb_ucs2_t *lowcase_table;
+static uint8 *valid_table;
+
+/*******************************************************************
+load the case handling tables
+********************************************************************/
+void load_case_tables(void)
+{
+ static int initialised;
+ int i;
+
+ if (initialised) return;
+ initialised = 1;
+
+ upcase_table = map_file(lib_path("upcase.dat"), 0x20000);
+ lowcase_table = map_file(lib_path("lowcase.dat"), 0x20000);
+ valid_table = map_file(lib_path("valid.dat"), 0x10000);
+
+ /* we would like Samba to limp along even if these tables are
+ not available */
+ if (!upcase_table) {
+ DEBUG(1,("creating lame upcase table\n"));
+ upcase_table = malloc(0x20000);
+ for (i=0;i<256;i++) upcase_table[i] = islower(i)?toupper(i):i;
+ for (;i<0x10000;i++) upcase_table[i] = i;
+ }
+
+ if (!lowcase_table) {
+ DEBUG(1,("creating lame lowcase table\n"));
+ lowcase_table = malloc(0x20000);
+ for (i=0;i<256;i++) lowcase_table[i] = isupper(i)?tolower(i):i;
+ for (;i<0x10000;i++) lowcase_table[i] = i;
+ }
+
+ if (!valid_table) {
+ DEBUG(1,("creating lame valid table\n"));
+ valid_table = malloc(0x10000);
+ for (i=0;i<256;i++) valid_table[i] =
+ isalnum(i) && !strchr("*\\/?<>|\":", i);
+ for (;i<0x10000;i++) valid_table[i] = 0;
+ }
+}
+
+
/*******************************************************************
Write a string in (little-endian) unicode format. src is in
the current DOS codepage. len is the length in bytes of the
@@ -129,94 +176,46 @@ uint32 buffer2_to_uint32(BUFFER2 *str)
}
/*******************************************************************
- Mapping tables for UNICODE character. Allows toupper/tolower and
- isXXX functions to work.
-
- tridge: split into 2 pieces. This saves us 5/6 of the memory
- with a small speed penalty
- The magic constants are the lower/upper range of the tables two
- parts
+ Convert a wchar to upper case.
********************************************************************/
-typedef struct {
- smb_ucs2_t lower;
- smb_ucs2_t upper;
- unsigned char flags;
-} smb_unicode_table_t;
-
-#define TABLE1_BOUNDARY 9450
-#define TABLE2_BOUNDARY 64256
-
-static smb_unicode_table_t map_table1[] = {
-#include "unicode_map_table1.h"
-};
-
-static smb_unicode_table_t map_table2[] = {
-#include "unicode_map_table2.h"
-};
-
-static unsigned char map_table_flags(smb_ucs2_t v)
-{
- v = SVAL(&v,0);
- if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
- if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
- return 0;
-}
-
-static smb_ucs2_t map_table_lower(smb_ucs2_t v)
-{
- v = SVAL(&v,0);
- if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
- if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
- return v;
-}
-
-static smb_ucs2_t map_table_upper(smb_ucs2_t v)
+smb_ucs2_t toupper_w(smb_ucs2_t val)
{
- v = SVAL(&v,0);
- if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
- if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
- return v;
+ return upcase_table[val];
}
/*******************************************************************
- Is an upper case wchar.
+ Convert a wchar to lower case.
********************************************************************/
-int isupper_w( smb_ucs2_t val)
+smb_ucs2_t tolower_w( smb_ucs2_t val )
{
- return (map_table_flags(val) & UNI_UPPER);
+ return lowcase_table[val];
}
/*******************************************************************
- Is a lower case wchar.
+determine if a character is lowercase
********************************************************************/
-
-int islower_w( smb_ucs2_t val)
+BOOL islower_w(smb_ucs2_t c)
{
- return (map_table_flags(val) & UNI_LOWER);
+ return upcase_table[c] != c;
}
/*******************************************************************
- Convert a wchar to upper case.
+determine if a character is uppercase
********************************************************************/
-
-smb_ucs2_t toupper_w( smb_ucs2_t val )
+BOOL isupper_w(smb_ucs2_t c)
{
- val = map_table_upper(val);
- val = SVAL(&val,0);
- return val;
+ return lowcase_table[c] != c;
}
+
/*******************************************************************
- Convert a wchar to lower case.
+determine if a character is valid in a 8.3 name
********************************************************************/
-
-smb_ucs2_t tolower_w( smb_ucs2_t val )
+BOOL isvalid83_w(smb_ucs2_t c)
{
- val = map_table_lower(val);
- val = SVAL(&val,0);
- return val;
+ return valid_table[c] != 0;
}
/*******************************************************************
@@ -252,8 +251,9 @@ BOOL strlower_w(smb_ucs2_t *s)
{
BOOL ret = False;
while (*s) {
- if (isupper_w(*s)) {
- *s = tolower_w(*s);
+ smb_ucs2_t v = tolower_w(*s);
+ if (v != *s) {
+ *s = v;
ret = True;
}
s++;
@@ -269,8 +269,9 @@ BOOL strupper_w(smb_ucs2_t *s)
{
BOOL ret = False;
while (*s) {
- if (islower_w(*s)) {
- *s = toupper_w(*s);
+ smb_ucs2_t v = toupper_w(*s);
+ if (v != *s) {
+ *s = v;
ret = True;
}
s++;
@@ -283,7 +284,7 @@ case insensitive string comparison
********************************************************************/
int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
{
- while (*b && tolower_w(*a) == tolower_w(*b)) { a++; b++; }
+ while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
return (tolower_w(*a) - tolower_w(*b));
}
diff --git a/source3/smbd/server.c b/source3/smbd/server.c
index a851a8656d..f707a61376 100644
--- a/source3/smbd/server.c
+++ b/source3/smbd/server.c
@@ -663,6 +663,8 @@ static void usage(char *pname)
sec_init();
+ load_case_tables();
+
append_log = True;
TimeInit();
diff --git a/source3/torture/utable.c b/source3/torture/utable.c
index daf9bd49d6..fb262f91b5 100644
--- a/source3/torture/utable.c
+++ b/source3/torture/utable.c
@@ -29,8 +29,9 @@ BOOL torture_utable(int dummy)
fstring fname, alt_name;
int fnum;
smb_ucs2_t c2;
- int c, len;
+ int c, len, fd;
int chars_allowed=0, alt_allowed=0;
+ uint8 valid[0x10000];
printf("starting utable\n");
@@ -38,7 +39,10 @@ BOOL torture_utable(int dummy)
return False;
}
+ memset(valid, 0, sizeof(valid));
+
cli_mkdir(&cli, "\\utable");
+ cli_unlink(&cli, "\\utable\\*");
for (c=1; c < 0x10000; c++) {
char *p;
@@ -62,6 +66,7 @@ BOOL torture_utable(int dummy)
if (strncmp(alt_name, "X_A_L", 5) != 0) {
alt_allowed++;
+ valid[c] = 1;
/* d_printf("fname=[%s] alt_name=[%s]\n", fname, alt_name); */
}
@@ -78,6 +83,15 @@ BOOL torture_utable(int dummy)
d_printf("%d chars allowed %d alt chars allowed\n", chars_allowed, alt_allowed);
+ fd = open("valid.dat", O_WRONLY|O_CREAT|O_TRUNC, 0644);
+ if (fd == -1) {
+ d_printf("Failed to create valid.dat - %s", strerror(errno));
+ return False;
+ }
+ write(fd, valid, 0x10000);
+ close(fd);
+ d_printf("wrote valid.dat\n");
+
return True;
}
@@ -122,7 +136,9 @@ BOOL torture_casetable(int dummy)
for (c=1; c < 0x10000; c++) {
size_t size;
- if (c == '.') continue;
+ if (c == '.' || c == '\\') continue;
+
+ printf("%04x\n", c);
fname = form_name(c);
fnum = cli_nt_create_full(&cli, fname,