Change the multibyte character set support so that

Kanji support is one case of multibyte character support, rather than being a specific case in single byte character support. This allows us to add Big5 Chinese support (code page 950) and Korean Hangul support (code page 949) at very little cost. Also allows us to easily add future multibyte code pages. Makefile: Added codepages 949, 950 as we now support more multibyte codepages. asyncdns.c: Fixed problem with child being re-spawned when parent killed. charcnv.c charset.c client.c clitar.c kanji.c kanji.h smb.h util.c loadparm.c: Generic multibyte codepage support (adding Big5 Chinese and Korean Hangul). nmbd.c: Fixed problem with child being re-spawned when parent killed. mangle.c: Modified str_checksum so that first 15 characters have more effect on outcome. This helps with short name mangling as most 'long' names are still shorter than 15 chars (bug was foobar_mng and foobar_sum would hash to the same value, with the modified code they hash differently. Jeremy. (This used to be commit 299016338cfb47f0c585875ef9b468121fcee97d)
author: Jeremy Allison <jra@samba.org> 1998-03-03 20:19:14 +0000
committer: Jeremy Allison <jra@samba.org> 1998-03-03 20:19:14 +0000
commit: b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c (patch)
tree: ff98efd1c5b4f92782b9687791a321037e2f9e6d /source3/lib
parent: f0e121d100ef207b683fbb8d3079403e22929d0a (diff)
download: samba-b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c.tar.gz
samba-b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c.tar.bz2
samba-b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c.zip
4 files changed, 438 insertions, 39 deletions
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index f02fcb2f92..20db58e4ab 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -164,9 +164,6 @@ char *unix2dos_format(char *str,BOOL overwrite)
 
     if (!mapsinited) initmaps();
 
-    if(lp_client_code_page() == KANJI_CODEPAGE)
-      return (*_unix_to_dos)(str, overwrite);
-    else {
       if (overwrite) {
           for (p = str; *p; p++) *p = unix2dos[(unsigned char)*p];
           return str;
@@ -175,7 +172,6 @@ char *unix2dos_format(char *str,BOOL overwrite)
           *dp = 0;
           return cvtbuf;
       }
-    }
 }
 
 /*
@@ -188,9 +184,6 @@ char *dos2unix_format(char *str, BOOL overwrite)
 
     if (!mapsinited) initmaps();
 
-    if(lp_client_code_page() == KANJI_CODEPAGE)
-      return (*_dos_to_unix)(str, overwrite);
-    else {
       if (overwrite) {
           for (p = str; *p; p++) *p = dos2unix[(unsigned char)*p];
           return str;
@@ -199,7 +192,6 @@ char *dos2unix_format(char *str, BOOL overwrite)
           *dp = 0;
           return cvtbuf;
       }
-    }
 }
 
 
diff --git a/source3/lib/charset.c b/source3/lib/charset.c
index 79a82f8587..fe170bdcf5 100644
--- a/source3/lib/charset.c
+++ b/source3/lib/charset.c
@@ -347,14 +347,21 @@ void codepage_initialise(int client_codepage)
 for code page %d failed. Using default client codepage 932\n", 
              CODEPAGEDIR, client_codepage, client_codepage));
     cp = cp_932;
+    client_codepage = KANJI_CODEPAGE;
 #else /* KANJI */
     DEBUG(6,("codepage_initialise: loading dynamic codepage file %s/codepage.%d \
 for code page %d failed. Using default client codepage 850\n", 
              CODEPAGEDIR, client_codepage, client_codepage));
     cp = cp_850;
+    client_codepage = MSDOS_LATIN_1_CODEPAGE;
 #endif /* KANJI */
   }
 
+  /*
+   * Setup the function pointers for the loaded codepage.
+   */
+  initialize_multibyte_vectors( client_codepage );
+
   if(cp)
   {
     for(i = 0; !((cp[i][0] == '\0') && (cp[i][1] == '\0')); i++)
diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c
index d63798914e..9360405547 100644
--- a/source3/lib/kanji.c
+++ b/source3/lib/kanji.c
@@ -27,6 +27,55 @@
 #define _KANJI_C_
 #include "includes.h"
 
+/*
+ * Function pointers that get overridden when multi-byte code pages
+ * are loaded.
+ */
+
+char *(*multibyte_strchr)(char *, int ) = (char *(*)(char *, int )) strchr;
+char *(*multibyte_strrchr)(char *, int ) = (char *(*)(char *, int )) strrchr;
+char *(*multibyte_strstr)(char *, char *) = (char *(*)(char *, char *)) strstr;
+char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
+
+/*
+ * Kanji is treated differently here due to historical accident of
+ * it being the first non-English codepage added to Samba.
+ * The define 'KANJI' is being overloaded to mean 'use kanji codepage
+ * by default' and also 'this is the filename-to-disk conversion 
+ * method to use'. This really should be removed and all control
+ * over this left in the smb.conf parameters 'client codepage'
+ * and 'coding system'.
+ */
+
+#ifndef KANJI
+
+/*
+ * Set the default conversion to be the functions in
+ * charcnv.c.
+ */
+
+static int not_multibyte_char(char);
+
+char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
+char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
+int (*is_multibyte_char)(char) = not_multibyte_char;
+
+#else /* KANJI */
+
+/*
+ * Set the default conversion to be the function
+ * sj_to_sj in this file.
+ */
+
+static char *sj_to_sj(char *from, BOOL overwrite);
+static int kanji_multibyte_char(char);
+
+char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
+char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
+int (*is_multibyte_char)(char) = kanji_multibyte_char;
+
+#endif /* KANJI */
+
 /* jis si/so sequence */
 static char jis_kso = JIS_KSO;
 static char jis_ksi = JIS_KSI;
@@ -37,13 +86,10 @@ static char hex_tag = HEXTAG;
 ********************************************************************/
 /*******************************************************************
  search token from S1 separated any char of S2
- S1 contain SHIFT JIS chars.
+ S1 contains SHIFT JIS chars.
 ********************************************************************/
-char *sj_strtok(char *s1, char *s2)
+static char *sj_strtok(char *s1, char *s2)
 {
-  if (lp_client_code_page() != KANJI_CODEPAGE) {
-   return strtok(s1, s2);
-  } else {
     static char *s = NULL;
     char *q;
     if (!s1) {
@@ -75,18 +121,14 @@ char *sj_strtok(char *s1, char *s2)
 	return q;
     }
     return NULL;
-  }
 }
 
 /*******************************************************************
  search string S2 from S1
- S1 contain SHIFT JIS chars.
+ S1 contains SHIFT JIS chars.
 ********************************************************************/
-char *sj_strstr(char *s1, char *s2)
+static char *sj_strstr(char *s1, char *s2)
 {
-  if (lp_client_code_page() != KANJI_CODEPAGE) {
-    return strstr(s1, s2);
-  } else {
     int len = strlen ((char *) s2);
     if (!*s2) 
 	return (char *) s1;
@@ -102,18 +144,14 @@ char *sj_strstr(char *s1, char *s2)
 	}
     }
     return 0;
-  }
 }
 
 /*******************************************************************
  Search char C from beginning of S.
- S contain SHIFT JIS chars.
+ S contains SHIFT JIS chars.
 ********************************************************************/
-char *sj_strchr (char *s, int c)
+static char *sj_strchr (char *s, int c)
 {
-  if (lp_client_code_page() != KANJI_CODEPAGE) {
-    return strchr(s, c);
-  } else {
     for (; *s; ) {
 	if (*s == c)
 	    return (char *) s;
@@ -124,18 +162,14 @@ char *sj_strchr (char *s, int c)
 	}
     }
     return 0;
-  }
 }
 
 /*******************************************************************
  Search char C end of S.
- S contain SHIFT JIS chars.
+ S contains SHIFT JIS chars.
 ********************************************************************/
-char *sj_strrchr(char *s, int c)
+static char *sj_strrchr(char *s, int c)
 {
-  if (lp_client_code_page() != KANJI_CODEPAGE) {
-    return strrchr(s, c);
-  } else {
     char *q;
 
     for (q = 0; *s; ) {
@@ -149,7 +183,249 @@ char *sj_strrchr(char *s, int c)
 	}
     }
     return q;
+}
+
+/*******************************************************************
+ Kanji multibyte char function.
+*******************************************************************/
+   
+static int kanji_multibyte_char(char c)
+{
+  if(is_shift_jis(c)) {
+    return 2;
+  } else if (is_kana(c)) {
+    return 1;
+  }
+  return 0;
+}
+
+/*******************************************************************
+  Hangul (Korean - code page 949) functions
+********************************************************************/
+/*******************************************************************
+ search token from S1 separated any char of S2
+ S1 contains hangul chars.
+********************************************************************/
+static char *hangul_strtok(char *s1, char *s2)
+{
+    static char *s = NULL;
+    char *q;
+    if (!s1) {
+        if (!s) {
+            return NULL;
+        }
+        s1 = s;
+    }
+    for (q = s1; *s1; ) {
+        if (is_hangul (*s1)) {
+            s1 += 2;
+        } else {
+            char *p = strchr (s2, *s1);
+            if (p) {
+                if (s1 != q) {
+                    s = s1 + 1;
+                    *s1 = '\0';
+                    return q;
+                }
+                q = s1 + 1;
+            }
+            s1++;
+        }
+    }
+    s = NULL;
+    if (*q) {
+        return q;
+    }
+    return NULL;
+}
+
+/*******************************************************************
+ search string S2 from S1
+ S1 contains hangul chars.
+********************************************************************/
+static char *hangul_strstr(char *s1, char *s2)
+{
+    int len = strlen ((char *) s2);
+    if (!*s2)
+        return (char *) s1;
+    for (;*s1;) {
+        if (*s1 == *s2) {
+            if (strncmp (s1, s2, len) == 0)
+                return (char *) s1;
+        }
+        if (is_hangul (*s1)) {
+            s1 += 2;
+        } else {
+            s1++;
+        }
+    }
+    return 0;
+}
+
+/*******************************************************************
+ Search char C from beginning of S.
+ S contains hangul chars.
+********************************************************************/
+static char *hangul_strchr (char *s, int c)
+{
+    for (; *s; ) {
+        if (*s == c)
+            return (char *) s;
+        if (is_hangul (*s)) {
+            s += 2;
+        } else {
+            s++;
+        }
+    }
+    return 0;
+}
+
+/*******************************************************************
+ Search char C end of S.
+ S contains hangul chars.
+********************************************************************/
+static char *hangul_strrchr(char *s, int c)
+{
+    char *q;
+ 
+    for (q = 0; *s; ) {
+        if (*s == c) {
+            q = (char *) s;
+        }
+        if (is_hangul (*s)) {
+            s += 2;
+        } else {
+            s++;
+        }
+    }
+    return q;
+}
+
+/*******************************************************************
+ Hangul multibyte char function.
+*******************************************************************/
+
+static int hangul_multibyte_char(char c)
+{
+  if( is_hangul(c)) {
+    return 2;
+  }
+  return 0;
+}
+
+/*******************************************************************
+  Big5 Traditional Chinese (code page 950) functions
+********************************************************************/
+
+/*******************************************************************
+ search token from S1 separated any char of S2
+ S1 contains big5 chars.
+********************************************************************/
+static char *big5_strtok(char *s1, char *s2)
+{
+    static char *s = NULL;
+    char *q;
+    if (!s1) {
+        if (!s) {
+            return NULL;
+        }
+        s1 = s;
+    }
+    for (q = s1; *s1; ) {
+        if (is_big5_c1 (*s1)) {
+            s1 += 2;
+        } else {
+            char *p = strchr (s2, *s1);
+            if (p) {
+                if (s1 != q) {
+                    s = s1 + 1;
+                    *s1 = '\0';
+                    return q;
+                }
+                q = s1 + 1;
+            }
+            s1++;
+        }
+    }
+    s = NULL;
+    if (*q) {
+        return q;
+    }
+    return NULL;
+}
+
+/*******************************************************************
+ search string S2 from S1
+ S1 contains big5 chars.
+********************************************************************/
+static char *big5_strstr(char *s1, char *s2)
+{
+    int len = strlen ((char *) s2);
+    if (!*s2)
+        return (char *) s1;
+    for (;*s1;) {
+        if (*s1 == *s2) {
+            if (strncmp (s1, s2, len) == 0)
+                return (char *) s1;
+        }
+        if (is_big5_c1 (*s1)) {
+            s1 += 2;
+        } else {
+            s1++;
+        }
+    }
+    return 0;
+}
+
+/*******************************************************************
+ Search char C from beginning of S.
+ S contains big5 chars.
+********************************************************************/
+static char *big5_strchr (char *s, int c)
+{
+    for (; *s; ) {
+        if (*s == c)
+            return (char *) s;
+        if (is_big5_c1 (*s)) {
+            s += 2;
+        } else {
+            s++;
+        }
+    }
+    return 0;
+}
+
+/*******************************************************************
+ Search char C end of S.
+ S contains big5 chars.
+********************************************************************/
+static char *big5_strrchr(char *s, int c)
+{
+    char *q;
+ 
+    for (q = 0; *s; ) {
+        if (*s == c) {
+            q = (char *) s;
+        }
+        if (is_big5_c1 (*s)) {
+            s += 2;
+        } else {
+            s++;
+        }
+    }
+    return q;
+}
+
+/*******************************************************************
+ Big5 multibyte char function.
+*******************************************************************/
+
+static int big5_multibyte_char(char c)
+{
+  if( is_big5_c1(c)) {
+    return 2;
   }
+  return 0;
 }
 
 /*******************************************************************
@@ -770,17 +1046,17 @@ static char *sj_to_sj(char *from, BOOL overwrite)
  _dos_to_unix		_unix_to_dos
 ************************************************************************/
 
-char *(*_dos_to_unix)(char *str, BOOL overwrite) = sj_to_sj;
-char *(*_unix_to_dos)(char *str, BOOL overwrite) = sj_to_sj;
-
-static int setup_string_function(int codes)
+static void setup_string_function(int codes)
 {
     switch (codes) {
     default:
+        _dos_to_unix = dos2unix_format;
+        _unix_to_dos = unix2dos_format;
+        break;
+
     case SJIS_CODE:
 	_dos_to_unix = sj_to_sj;
 	_unix_to_dos = sj_to_sj;
-
 	break;
 	
     case EUC_CODE:
@@ -813,13 +1089,12 @@ static int setup_string_function(int codes)
 	_unix_to_dos = cap_to_sj;
 	break;
     }
-    return codes;
 }
 
 /*
  * Interpret coding system.
  */
-int interpret_coding_system(char *str)
+void interpret_coding_system(char *str)
 {
     int codes = UNKNOWN_CODE;
     
@@ -909,5 +1184,58 @@ int interpret_coding_system(char *str)
 	jis_kso = '@';
 	jis_ksi = 'H';
     }	
-    return setup_string_function (codes);
+    setup_string_function (codes);
+}
+
+/*******************************************************************
+ Non multibyte char function.
+*******************************************************************/
+   
+static int not_multibyte_char(char c)
+{
+  return 0;
+}
+
+/*******************************************************************
+ Setup the function pointers for the functions that are replaced
+ when multi-byte codepages are used.
+
+ The dos_to_unix and unix_to_dos function pointers are only
+ replaced by setup_string_function called by interpret_coding_system
+ above.
+*******************************************************************/
+
+void initialize_multibyte_vectors( int client_codepage)
+{
+  switch( client_codepage )
+  {
+  case KANJI_CODEPAGE:
+    multibyte_strchr = (char *(*)(char *, int )) sj_strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
+    is_multibyte_char = kanji_multibyte_char;
+    break;
+  case HANGUL_CODEPAGE:
+    multibyte_strchr = (char *(*)(char *, int )) hangul_strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok;
+    is_multibyte_char = hangul_multibyte_char;
+    break;
+  case BIG5_CODEPAGE:
+    multibyte_strchr = (char *(*)(char *, int )) big5_strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) big5_strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) big5_strtok;
+    is_multibyte_char = big5_multibyte_char;
+    break;
+  default:
+    multibyte_strchr = (char *(*)(char *, int )) strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) strtok;
+    is_multibyte_char = not_multibyte_char;
+    break; 
+  }
 }
diff --git a/source3/lib/util.c b/source3/lib/util.c
index 18614caeed..5af41cc06c 100644
--- a/source3/lib/util.c
+++ b/source3/lib/util.c
@@ -887,6 +887,15 @@ int StrCaseCmp(char *s, char *t)
      asynchronous upper to lower mapping.
    */
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA.
+   */
+
   if(lp_client_code_page() == KANJI_CODEPAGE)
   {
     /* Win95 treats full width ascii characters as case sensitive. */
@@ -951,6 +960,15 @@ int StrnCaseCmp(char *s, char *t, int n)
      asynchronous upper to lower mapping.
    */
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
   if(lp_client_code_page() == KANJI_CODEPAGE)
   {
     /* Win95 treats full width ascii characters as case sensitive. */
@@ -1058,6 +1076,15 @@ void strlower(char *s)
   while (*s)
   {
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
     if(lp_client_code_page() == KANJI_CODEPAGE)
     {
       /* Win95 treats full width ascii characters as case sensitive. */
@@ -1096,6 +1123,15 @@ void strupper(char *s)
   while (*s)
   {
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
     if(lp_client_code_page() == KANJI_CODEPAGE)
     {
       /* Win95 treats full width ascii characters as case sensitive. */
@@ -1157,6 +1193,15 @@ void string_replace(char *s,char oldc,char newc)
   while (*s)
   {
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
     if(lp_client_code_page() == KANJI_CODEPAGE)
     {
       /* Win95 treats full width ascii characters as case sensitive. */
@@ -1783,6 +1828,15 @@ BOOL strhasupper(char *s)
   while (*s) 
   {
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
     if(lp_client_code_page() == KANJI_CODEPAGE)
     {
       /* Win95 treats full width ascii characters as case sensitive. */
@@ -1816,6 +1870,15 @@ BOOL strhaslower(char *s)
   while (*s) 
   {
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
     if(lp_client_code_page() == KANJI_CODEPAGE)
     {
       /* Win95 treats full width ascii characters as case sensitive. */
@@ -1857,6 +1920,15 @@ int count_chars(char *s,char c)
   int count=0;
 
 #if !defined(KANJI_WIN95_COMPATIBILITY)
+  /*
+   * For completeness we should put in equivalent code for code pages
+   * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+   * doubt anyone wants Samba to behave differently from Win95 and WinNT
+   * here. They both treat full width ascii characters as case senstive
+   * filenames (ie. they don't do the work we do here).
+   * JRA. 
+   */
+
   if(lp_client_code_page() == KANJI_CODEPAGE)
   {
     /* Win95 treats full width ascii characters as case sensitive. */
author	Jeremy Allison <jra@samba.org>	1998-03-03 20:19:14 +0000
committer	Jeremy Allison <jra@samba.org>	1998-03-03 20:19:14 +0000
commit	b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c (patch)
tree	ff98efd1c5b4f92782b9687791a321037e2f9e6d /source3/lib
parent	f0e121d100ef207b683fbb8d3079403e22929d0a (diff)
download	samba-b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c.tar.gz samba-b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c.tar.bz2 samba-b7fb6c6b38784d25c9c85e9b27b08e30111dbd0c.zip