From 3339f170c2d8a40c8941555b3ea0ad8b8b2f457f Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Thu, 9 Apr 1998 00:07:17 +0000
Subject: Added codepage 936 (simplified Chineses).

In doing so I realized that much code was being
duplicated between Hangul, Big5 and Simplified
Chinese - so I re-arranged kanji.[ch] to go
through generic functions for all multibyte
characters that can be identified by a single
code range (not Kanji - but all the others).

Jeremy.
(This used to be commit b6c965c396eb3d4f0e6dfd863e70b28390c59f66)
---
 source3/codepages/codepage_def.936 |  24 ++++
 source3/include/kanji.h            |   6 +-
 source3/include/smb.h              |   1 +
 source3/lib/kanji.c                | 224 +++++++++++++++----------------------
 4 files changed, 123 insertions(+), 132 deletions(-)
 create mode 100644 source3/codepages/codepage_def.936

(limited to 'source3')

diff --git a/source3/codepages/codepage_def.936 b/source3/codepages/codepage_def.936
new file mode 100644
index 0000000000..25a317ffea
--- /dev/null
+++ b/source3/codepages/codepage_def.936
@@ -0,0 +1,24 @@
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#   
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#   
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+# Codepage definition file for IBM Code Page 949 - MS-DOS Simplified Chinese.
+# defines lower->upper mapping.
+# Written by Jeremy Allison <jallison@whistle.com>
+
+# The columns are :
+# lower    upper   map upper to lower    map lower to upper
+#
+# This file is intentionaly empty - no mappings are done.
diff --git a/source3/include/kanji.h b/source3/include/kanji.h
index 302db13a27..db3731e41b 100644
--- a/source3/include/kanji.h
+++ b/source3/include/kanji.h
@@ -109,6 +109,9 @@
 /* For traditional Chinese (known as Big5 encoding - code page 950). */
 #define is_big5_c1(c) ((0xa1 <= ((unsigned char) (c)) && ((unsigned char) (c)) <= 0xf9)) 
 
+/* For simplified Chinese (code page - 936). */
+#define is_simpch_c1(c) ((0xa1 <= ((unsigned char) (c)) && ((unsigned char) (c)) <= 0xf7))
+
 #else /* not _KANJI_C_ */
 
 /*
@@ -143,6 +146,7 @@ extern char *(*multibyte_strtok)(char *s1, char *s2);
 extern char *(*_dos_to_unix)(char *str, BOOL overwrite);
 extern char *(*_unix_to_dos)(char *str, BOOL overwrite);
 extern BOOL (*is_multibyte_char)(char c);
+extern int (*_skip_multibyte_char)(char c);
 
 #define strchr(s1, c) ((*multibyte_strchr)((s1), (c)))
 #define strrchr(s1, c) ((*multibyte_strrchr)((s1), (c)))
@@ -150,7 +154,7 @@ extern BOOL (*is_multibyte_char)(char c);
 #define strtok(s1, s2) ((*multibyte_strtok)((s1), (s2)))
 #define dos_to_unix(x,y) ((*_dos_to_unix)((x), (y)))
 #define unix_to_dos(x,y) ((*_unix_to_dos)((x), (y)))
-#define skip_multibyte_char(c) ((*is_multibyte_char)((c)))
+#define skip_multibyte_char(c) ((*_skip_multibyte_char)((c)))
 
 #endif /* _KANJI_C_ */
 
diff --git a/source3/include/smb.h b/source3/include/smb.h
index 88d9a9de09..21bf346c62 100644
--- a/source3/include/smb.h
+++ b/source3/include/smb.h
@@ -1115,6 +1115,7 @@ enum case_handling {CASE_LOWER,CASE_UPPER};
 #define KANJI_CODEPAGE 932
 #define HANGUL_CODEPAGE 949
 #define BIG5_CODEPAGE 950
+#define SIMPLIFIED_CHINESE_CODEPAGE 936
 
 #ifdef KANJI
 /* 
diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c
index 994cf6e1bd..e430c1a986 100644
--- a/source3/lib/kanji.c
+++ b/source3/lib/kanji.c
@@ -54,11 +54,13 @@ char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
  * charcnv.c.
  */
 
-static int not_multibyte_char(char);
+static int skip_non_multibyte_char(char);
+static BOOL not_multibyte_char_1(char);
 
 char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
 char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
-int (*is_multibyte_char)(char) = not_multibyte_char;
+int (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
+BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
 
 #else /* KANJI */
 
@@ -68,11 +70,13 @@ int (*is_multibyte_char)(char) = not_multibyte_char;
  */
 
 static char *sj_to_sj(char *from, BOOL overwrite);
-static int kanji_multibyte_char(char);
+static int skip_kanji_multibyte_char(char);
+static BOOL kanji_multibyte_char_1(char);
 
 char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
 char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
-int (*is_multibyte_char)(char) = kanji_multibyte_char;
+int (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
+int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
 
 #endif /* KANJI */
 
@@ -186,10 +190,10 @@ static char *sj_strrchr(char *s, int c)
 }
 
 /*******************************************************************
- Kanji multibyte char function.
+ Kanji multibyte char skip function.
 *******************************************************************/
    
-static int kanji_multibyte_char(char c)
+static int skip_kanji_multibyte_char(char c)
 {
   if(is_shift_jis(c)) {
     return 2;
@@ -200,128 +204,60 @@ static int kanji_multibyte_char(char c)
 }
 
 /*******************************************************************
-  Hangul (Korean - code page 949) functions
-********************************************************************/
-/*******************************************************************
- search token from S1 separated any char of S2
- S1 contains hangul chars.
-********************************************************************/
-static char *hangul_strtok(char *s1, char *s2)
+ Kanji multibyte char identification.
+*******************************************************************/
+   
+static BOOL is_kanji_multibyte_char_1(char c)
 {
-    static char *s = NULL;
-    char *q;
-    if (!s1) {
-        if (!s) {
-            return NULL;
-        }
-        s1 = s;
-    }
-    for (q = s1; *s1; ) {
-        if (is_hangul (*s1)) {
-            s1 += 2;
-        } else {
-            char *p = strchr (s2, *s1);
-            if (p) {
-                if (s1 != q) {
-                    s = s1 + 1;
-                    *s1 = '\0';
-                    return q;
-                }
-                q = s1 + 1;
-            }
-            s1++;
-        }
-    }
-    s = NULL;
-    if (*q) {
-        return q;
-    }
-    return NULL;
+  return is_shift_jis(c);
 }
 
 /*******************************************************************
- search string S2 from S1
- S1 contains hangul chars.
+ The following functions are the only ones needed to do multibyte
+ support for Hangul, Big5 and Simplified Chinese. Most of the
+ real work for these codepages is done in the generic multibyte
+ functions. The only reason these functions are needed at all
+ is that the is_xxx(c) calls are really preprocessor macros.
 ********************************************************************/
-static char *hangul_strstr(char *s1, char *s2)
-{
-    int len = strlen ((char *) s2);
-    if (!*s2)
-        return (char *) s1;
-    for (;*s1;) {
-        if (*s1 == *s2) {
-            if (strncmp (s1, s2, len) == 0)
-                return (char *) s1;
-        }
-        if (is_hangul (*s1)) {
-            s1 += 2;
-        } else {
-            s1++;
-        }
-    }
-    return 0;
-}
 
 /*******************************************************************
- Search char C from beginning of S.
- S contains hangul chars.
+  Hangul (Korean - code page 949) function.
 ********************************************************************/
-static char *hangul_strchr (char *s, int c)
+
+static BOOL hangul_is_multibyte_char_1(char c)
 {
-    for (; *s; ) {
-        if (*s == c)
-            return (char *) s;
-        if (is_hangul (*s)) {
-            s += 2;
-        } else {
-            s++;
-        }
-    }
-    return 0;
+  return is_hangul(c);
 }
 
 /*******************************************************************
- Search char C end of S.
- S contains hangul chars.
+  Big5 Traditional Chinese (code page 950) function.
 ********************************************************************/
-static char *hangul_strrchr(char *s, int c)
+
+static BOOL big5_is_multibyte_char_1(char c)
 {
-    char *q;
- 
-    for (q = 0; *s; ) {
-        if (*s == c) {
-            q = (char *) s;
-        }
-        if (is_hangul (*s)) {
-            s += 2;
-        } else {
-            s++;
-        }
-    }
-    return q;
+  return is_big5_c1(c);
 }
 
 /*******************************************************************
- Hangul multibyte char function.
-*******************************************************************/
+  Simplified Chinese (code page 936) function.
+********************************************************************/
 
-static int hangul_multibyte_char(char c)
+static BOOL simpch_is_multibyte_char_1(char c)
 {
-  if( is_hangul(c)) {
-    return 2;
-  }
-  return 0;
+  return is_simpch_c1(c);
 }
 
 /*******************************************************************
-  Big5 Traditional Chinese (code page 950) functions
+  Generic multibyte functions - used by Hangul, Big5 and Simplified
+  Chinese codepages.
 ********************************************************************/
 
 /*******************************************************************
  search token from S1 separated any char of S2
- S1 contains big5 chars.
+ S1 contains generic multibyte chars.
 ********************************************************************/
-static char *big5_strtok(char *s1, char *s2)
+
+static char *generic_multibyte_strtok(char *s1, char *s2)
 {
     static char *s = NULL;
     char *q;
@@ -332,7 +268,7 @@ static char *big5_strtok(char *s1, char *s2)
         s1 = s;
     }
     for (q = s1; *s1; ) {
-        if (is_big5_c1 (*s1)) {
+        if ((*is_multibyte_char_1)(*s1)) {
             s1 += 2;
         } else {
             char *p = strchr (s2, *s1);
@@ -356,9 +292,10 @@ static char *big5_strtok(char *s1, char *s2)
 
 /*******************************************************************
  search string S2 from S1
- S1 contains big5 chars.
+ S1 contains generic multibyte chars.
 ********************************************************************/
-static char *big5_strstr(char *s1, char *s2)
+
+static char *generic_multibyte_strstr(char *s1, char *s2)
 {
     int len = strlen ((char *) s2);
     if (!*s2)
@@ -368,7 +305,7 @@ static char *big5_strstr(char *s1, char *s2)
             if (strncmp (s1, s2, len) == 0)
                 return (char *) s1;
         }
-        if (is_big5_c1 (*s1)) {
+        if ((*is_multibyte_char_1)(*s1)) {
             s1 += 2;
         } else {
             s1++;
@@ -379,14 +316,15 @@ static char *big5_strstr(char *s1, char *s2)
 
 /*******************************************************************
  Search char C from beginning of S.
- S contains big5 chars.
+ S contains generic multibyte chars.
 ********************************************************************/
-static char *big5_strchr (char *s, int c)
+
+static char *generic_multibyte_strchr(char *s, int c)
 {
     for (; *s; ) {
         if (*s == c)
             return (char *) s;
-        if (is_big5_c1 (*s)) {
+        if ((*is_multibyte_char_1)(*s)) {
             s += 2;
         } else {
             s++;
@@ -397,9 +335,10 @@ static char *big5_strchr (char *s, int c)
 
 /*******************************************************************
  Search char C end of S.
- S contains big5 chars.
+ S contains generic multibyte chars.
 ********************************************************************/
-static char *big5_strrchr(char *s, int c)
+
+static char *generic_multibyte_strrchr(char *s, int c)
 {
     char *q;
  
@@ -407,7 +346,7 @@ static char *big5_strrchr(char *s, int c)
         if (*s == c) {
             q = (char *) s;
         }
-        if (is_big5_c1 (*s)) {
+        if ((*is_multibyte_char_1)(*s)) {
             s += 2;
         } else {
             s++;
@@ -417,12 +356,12 @@ static char *big5_strrchr(char *s, int c)
 }
 
 /*******************************************************************
- Big5 multibyte char function.
+ Generic multibyte char skip function.
 *******************************************************************/
 
-static int big5_multibyte_char(char c)
+static int skip_generic_multibyte_char(char c)
 {
-  if( is_big5_c1(c)) {
+  if( (*is_multibyte_char_1)(c)) {
     return 2;
   }
   return 0;
@@ -1091,9 +1030,10 @@ static void setup_string_function(int codes)
     }
 }
 
-/*
- * Interpret coding system.
- */
+/************************************************************************
+ Interpret coding system.
+************************************************************************/
+
 void interpret_coding_system(char *str)
 {
     int codes = UNKNOWN_CODE;
@@ -1191,11 +1131,20 @@ void interpret_coding_system(char *str)
  Non multibyte char function.
 *******************************************************************/
    
-static int not_multibyte_char(char c)
+static int skip_non_multibyte_char(char c)
 {
   return 0;
 }
 
+/*******************************************************************
+ Function that always says a character isn't multibyte.
+*******************************************************************/
+
+static BOOL not_multibyte_char_1(char c)
+{
+  return False;
+}
+
 /*******************************************************************
  Setup the function pointers for the functions that are replaced
  when multi-byte codepages are used.
@@ -1214,28 +1163,41 @@ void initialize_multibyte_vectors( int client_codepage)
     multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
     multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
     multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
-    is_multibyte_char = kanji_multibyte_char;
+    _skip_multibyte_char = skip_kanji_multibyte_char;
+    is_multibyte_char_1 = is_kanji_multibyte_char_1;
     break;
   case HANGUL_CODEPAGE:
-    multibyte_strchr = (char *(*)(char *, int )) hangul_strchr;
-    multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr;
-    multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr;
-    multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok;
-    is_multibyte_char = hangul_multibyte_char;
-    break;
+    multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+    _skip_multibyte_char = skip_generic_multibyte_char;
+    is_multibyte_char_1 = hangul_is_multibyte_char_1;
   case BIG5_CODEPAGE:
-    multibyte_strchr = (char *(*)(char *, int )) big5_strchr;
-    multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr;
-    multibyte_strstr = (char *(*)(char *, char *)) big5_strstr;
-    multibyte_strtok = (char *(*)(char *, char *)) big5_strtok;
-    is_multibyte_char = big5_multibyte_char;
+    multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+    _skip_multibyte_char = skip_generic_multibyte_char;
+    is_multibyte_char_1 = big5_is_multibyte_char_1;
+  case SIMPLIFIED_CHINESE_CODEPAGE:
+    multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+    multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+    multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+    multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+    _skip_multibyte_char = skip_generic_multibyte_char;
+    is_multibyte_char_1 = simpch_is_multibyte_char_1;
     break;
+  /*
+   * Single char size code page.
+   */
   default:
     multibyte_strchr = (char *(*)(char *, int )) strchr;
     multibyte_strrchr = (char *(*)(char *, int )) strrchr;
     multibyte_strstr = (char *(*)(char *, char *)) strstr;
     multibyte_strtok = (char *(*)(char *, char *)) strtok;
-    is_multibyte_char = not_multibyte_char;
+    _skip_multibyte_char = skip_non_multibyte_char;
+    is_multibyte_char_1 = not_multibyte_char_1;
     break; 
   }
 }
-- 
cgit