summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Allison <jra@samba.org>1998-04-09 00:07:17 +0000
committerJeremy Allison <jra@samba.org>1998-04-09 00:07:17 +0000
commit3339f170c2d8a40c8941555b3ea0ad8b8b2f457f (patch)
treeca4c9947eb2cd45e8c07bf58e7664ec240a9bd1e
parent1af95effe96284c30c0f3f1782677e8f1873ceca (diff)
downloadsamba-3339f170c2d8a40c8941555b3ea0ad8b8b2f457f.tar.gz
samba-3339f170c2d8a40c8941555b3ea0ad8b8b2f457f.tar.bz2
samba-3339f170c2d8a40c8941555b3ea0ad8b8b2f457f.zip
Added codepage 936 (simplified Chineses).
In doing so I realized that much code was being duplicated between Hangul, Big5 and Simplified Chinese - so I re-arranged kanji.[ch] to go through generic functions for all multibyte characters that can be identified by a single code range (not Kanji - but all the others). Jeremy. (This used to be commit b6c965c396eb3d4f0e6dfd863e70b28390c59f66)
-rw-r--r--source3/codepages/codepage_def.93624
-rw-r--r--source3/include/kanji.h6
-rw-r--r--source3/include/smb.h1
-rw-r--r--source3/lib/kanji.c224
4 files changed, 123 insertions, 132 deletions
diff --git a/source3/codepages/codepage_def.936 b/source3/codepages/codepage_def.936
new file mode 100644
index 0000000000..25a317ffea
--- /dev/null
+++ b/source3/codepages/codepage_def.936
@@ -0,0 +1,24 @@
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+# Codepage definition file for IBM Code Page 949 - MS-DOS Simplified Chinese.
+# defines lower->upper mapping.
+# Written by Jeremy Allison <jallison@whistle.com>
+
+# The columns are :
+# lower upper map upper to lower map lower to upper
+#
+# This file is intentionaly empty - no mappings are done.
diff --git a/source3/include/kanji.h b/source3/include/kanji.h
index 302db13a27..db3731e41b 100644
--- a/source3/include/kanji.h
+++ b/source3/include/kanji.h
@@ -109,6 +109,9 @@
/* For traditional Chinese (known as Big5 encoding - code page 950). */
#define is_big5_c1(c) ((0xa1 <= ((unsigned char) (c)) && ((unsigned char) (c)) <= 0xf9))
+/* For simplified Chinese (code page - 936). */
+#define is_simpch_c1(c) ((0xa1 <= ((unsigned char) (c)) && ((unsigned char) (c)) <= 0xf7))
+
#else /* not _KANJI_C_ */
/*
@@ -143,6 +146,7 @@ extern char *(*multibyte_strtok)(char *s1, char *s2);
extern char *(*_dos_to_unix)(char *str, BOOL overwrite);
extern char *(*_unix_to_dos)(char *str, BOOL overwrite);
extern BOOL (*is_multibyte_char)(char c);
+extern int (*_skip_multibyte_char)(char c);
#define strchr(s1, c) ((*multibyte_strchr)((s1), (c)))
#define strrchr(s1, c) ((*multibyte_strrchr)((s1), (c)))
@@ -150,7 +154,7 @@ extern BOOL (*is_multibyte_char)(char c);
#define strtok(s1, s2) ((*multibyte_strtok)((s1), (s2)))
#define dos_to_unix(x,y) ((*_dos_to_unix)((x), (y)))
#define unix_to_dos(x,y) ((*_unix_to_dos)((x), (y)))
-#define skip_multibyte_char(c) ((*is_multibyte_char)((c)))
+#define skip_multibyte_char(c) ((*_skip_multibyte_char)((c)))
#endif /* _KANJI_C_ */
diff --git a/source3/include/smb.h b/source3/include/smb.h
index 88d9a9de09..21bf346c62 100644
--- a/source3/include/smb.h
+++ b/source3/include/smb.h
@@ -1115,6 +1115,7 @@ enum case_handling {CASE_LOWER,CASE_UPPER};
#define KANJI_CODEPAGE 932
#define HANGUL_CODEPAGE 949
#define BIG5_CODEPAGE 950
+#define SIMPLIFIED_CHINESE_CODEPAGE 936
#ifdef KANJI
/*
diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c
index 994cf6e1bd..e430c1a986 100644
--- a/source3/lib/kanji.c
+++ b/source3/lib/kanji.c
@@ -54,11 +54,13 @@ char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
* charcnv.c.
*/
-static int not_multibyte_char(char);
+static int skip_non_multibyte_char(char);
+static BOOL not_multibyte_char_1(char);
char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
-int (*is_multibyte_char)(char) = not_multibyte_char;
+int (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
+BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
#else /* KANJI */
@@ -68,11 +70,13 @@ int (*is_multibyte_char)(char) = not_multibyte_char;
*/
static char *sj_to_sj(char *from, BOOL overwrite);
-static int kanji_multibyte_char(char);
+static int skip_kanji_multibyte_char(char);
+static BOOL kanji_multibyte_char_1(char);
char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
-int (*is_multibyte_char)(char) = kanji_multibyte_char;
+int (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
+int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
#endif /* KANJI */
@@ -186,10 +190,10 @@ static char *sj_strrchr(char *s, int c)
}
/*******************************************************************
- Kanji multibyte char function.
+ Kanji multibyte char skip function.
*******************************************************************/
-static int kanji_multibyte_char(char c)
+static int skip_kanji_multibyte_char(char c)
{
if(is_shift_jis(c)) {
return 2;
@@ -200,128 +204,60 @@ static int kanji_multibyte_char(char c)
}
/*******************************************************************
- Hangul (Korean - code page 949) functions
-********************************************************************/
-/*******************************************************************
- search token from S1 separated any char of S2
- S1 contains hangul chars.
-********************************************************************/
-static char *hangul_strtok(char *s1, char *s2)
+ Kanji multibyte char identification.
+*******************************************************************/
+
+static BOOL is_kanji_multibyte_char_1(char c)
{
- static char *s = NULL;
- char *q;
- if (!s1) {
- if (!s) {
- return NULL;
- }
- s1 = s;
- }
- for (q = s1; *s1; ) {
- if (is_hangul (*s1)) {
- s1 += 2;
- } else {
- char *p = strchr (s2, *s1);
- if (p) {
- if (s1 != q) {
- s = s1 + 1;
- *s1 = '\0';
- return q;
- }
- q = s1 + 1;
- }
- s1++;
- }
- }
- s = NULL;
- if (*q) {
- return q;
- }
- return NULL;
+ return is_shift_jis(c);
}
/*******************************************************************
- search string S2 from S1
- S1 contains hangul chars.
+ The following functions are the only ones needed to do multibyte
+ support for Hangul, Big5 and Simplified Chinese. Most of the
+ real work for these codepages is done in the generic multibyte
+ functions. The only reason these functions are needed at all
+ is that the is_xxx(c) calls are really preprocessor macros.
********************************************************************/
-static char *hangul_strstr(char *s1, char *s2)
-{
- int len = strlen ((char *) s2);
- if (!*s2)
- return (char *) s1;
- for (;*s1;) {
- if (*s1 == *s2) {
- if (strncmp (s1, s2, len) == 0)
- return (char *) s1;
- }
- if (is_hangul (*s1)) {
- s1 += 2;
- } else {
- s1++;
- }
- }
- return 0;
-}
/*******************************************************************
- Search char C from beginning of S.
- S contains hangul chars.
+ Hangul (Korean - code page 949) function.
********************************************************************/
-static char *hangul_strchr (char *s, int c)
+
+static BOOL hangul_is_multibyte_char_1(char c)
{
- for (; *s; ) {
- if (*s == c)
- return (char *) s;
- if (is_hangul (*s)) {
- s += 2;
- } else {
- s++;
- }
- }
- return 0;
+ return is_hangul(c);
}
/*******************************************************************
- Search char C end of S.
- S contains hangul chars.
+ Big5 Traditional Chinese (code page 950) function.
********************************************************************/
-static char *hangul_strrchr(char *s, int c)
+
+static BOOL big5_is_multibyte_char_1(char c)
{
- char *q;
-
- for (q = 0; *s; ) {
- if (*s == c) {
- q = (char *) s;
- }
- if (is_hangul (*s)) {
- s += 2;
- } else {
- s++;
- }
- }
- return q;
+ return is_big5_c1(c);
}
/*******************************************************************
- Hangul multibyte char function.
-*******************************************************************/
+ Simplified Chinese (code page 936) function.
+********************************************************************/
-static int hangul_multibyte_char(char c)
+static BOOL simpch_is_multibyte_char_1(char c)
{
- if( is_hangul(c)) {
- return 2;
- }
- return 0;
+ return is_simpch_c1(c);
}
/*******************************************************************
- Big5 Traditional Chinese (code page 950) functions
+ Generic multibyte functions - used by Hangul, Big5 and Simplified
+ Chinese codepages.
********************************************************************/
/*******************************************************************
search token from S1 separated any char of S2
- S1 contains big5 chars.
+ S1 contains generic multibyte chars.
********************************************************************/
-static char *big5_strtok(char *s1, char *s2)
+
+static char *generic_multibyte_strtok(char *s1, char *s2)
{
static char *s = NULL;
char *q;
@@ -332,7 +268,7 @@ static char *big5_strtok(char *s1, char *s2)
s1 = s;
}
for (q = s1; *s1; ) {
- if (is_big5_c1 (*s1)) {
+ if ((*is_multibyte_char_1)(*s1)) {
s1 += 2;
} else {
char *p = strchr (s2, *s1);
@@ -356,9 +292,10 @@ static char *big5_strtok(char *s1, char *s2)
/*******************************************************************
search string S2 from S1
- S1 contains big5 chars.
+ S1 contains generic multibyte chars.
********************************************************************/
-static char *big5_strstr(char *s1, char *s2)
+
+static char *generic_multibyte_strstr(char *s1, char *s2)
{
int len = strlen ((char *) s2);
if (!*s2)
@@ -368,7 +305,7 @@ static char *big5_strstr(char *s1, char *s2)
if (strncmp (s1, s2, len) == 0)
return (char *) s1;
}
- if (is_big5_c1 (*s1)) {
+ if ((*is_multibyte_char_1)(*s1)) {
s1 += 2;
} else {
s1++;
@@ -379,14 +316,15 @@ static char *big5_strstr(char *s1, char *s2)
/*******************************************************************
Search char C from beginning of S.
- S contains big5 chars.
+ S contains generic multibyte chars.
********************************************************************/
-static char *big5_strchr (char *s, int c)
+
+static char *generic_multibyte_strchr(char *s, int c)
{
for (; *s; ) {
if (*s == c)
return (char *) s;
- if (is_big5_c1 (*s)) {
+ if ((*is_multibyte_char_1)(*s)) {
s += 2;
} else {
s++;
@@ -397,9 +335,10 @@ static char *big5_strchr (char *s, int c)
/*******************************************************************
Search char C end of S.
- S contains big5 chars.
+ S contains generic multibyte chars.
********************************************************************/
-static char *big5_strrchr(char *s, int c)
+
+static char *generic_multibyte_strrchr(char *s, int c)
{
char *q;
@@ -407,7 +346,7 @@ static char *big5_strrchr(char *s, int c)
if (*s == c) {
q = (char *) s;
}
- if (is_big5_c1 (*s)) {
+ if ((*is_multibyte_char_1)(*s)) {
s += 2;
} else {
s++;
@@ -417,12 +356,12 @@ static char *big5_strrchr(char *s, int c)
}
/*******************************************************************
- Big5 multibyte char function.
+ Generic multibyte char skip function.
*******************************************************************/
-static int big5_multibyte_char(char c)
+static int skip_generic_multibyte_char(char c)
{
- if( is_big5_c1(c)) {
+ if( (*is_multibyte_char_1)(c)) {
return 2;
}
return 0;
@@ -1091,9 +1030,10 @@ static void setup_string_function(int codes)
}
}
-/*
- * Interpret coding system.
- */
+/************************************************************************
+ Interpret coding system.
+************************************************************************/
+
void interpret_coding_system(char *str)
{
int codes = UNKNOWN_CODE;
@@ -1191,12 +1131,21 @@ void interpret_coding_system(char *str)
Non multibyte char function.
*******************************************************************/
-static int not_multibyte_char(char c)
+static int skip_non_multibyte_char(char c)
{
return 0;
}
/*******************************************************************
+ Function that always says a character isn't multibyte.
+*******************************************************************/
+
+static BOOL not_multibyte_char_1(char c)
+{
+ return False;
+}
+
+/*******************************************************************
Setup the function pointers for the functions that are replaced
when multi-byte codepages are used.
@@ -1214,28 +1163,41 @@ void initialize_multibyte_vectors( int client_codepage)
multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
- is_multibyte_char = kanji_multibyte_char;
+ _skip_multibyte_char = skip_kanji_multibyte_char;
+ is_multibyte_char_1 = is_kanji_multibyte_char_1;
break;
case HANGUL_CODEPAGE:
- multibyte_strchr = (char *(*)(char *, int )) hangul_strchr;
- multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr;
- multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr;
- multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok;
- is_multibyte_char = hangul_multibyte_char;
- break;
+ multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+ _skip_multibyte_char = skip_generic_multibyte_char;
+ is_multibyte_char_1 = hangul_is_multibyte_char_1;
case BIG5_CODEPAGE:
- multibyte_strchr = (char *(*)(char *, int )) big5_strchr;
- multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr;
- multibyte_strstr = (char *(*)(char *, char *)) big5_strstr;
- multibyte_strtok = (char *(*)(char *, char *)) big5_strtok;
- is_multibyte_char = big5_multibyte_char;
+ multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+ _skip_multibyte_char = skip_generic_multibyte_char;
+ is_multibyte_char_1 = big5_is_multibyte_char_1;
+ case SIMPLIFIED_CHINESE_CODEPAGE:
+ multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
+ _skip_multibyte_char = skip_generic_multibyte_char;
+ is_multibyte_char_1 = simpch_is_multibyte_char_1;
break;
+ /*
+ * Single char size code page.
+ */
default:
multibyte_strchr = (char *(*)(char *, int )) strchr;
multibyte_strrchr = (char *(*)(char *, int )) strrchr;
multibyte_strstr = (char *(*)(char *, char *)) strstr;
multibyte_strtok = (char *(*)(char *, char *)) strtok;
- is_multibyte_char = not_multibyte_char;
+ _skip_multibyte_char = skip_non_multibyte_char;
+ is_multibyte_char_1 = not_multibyte_char_1;
break;
}
}