From 3339f170c2d8a40c8941555b3ea0ad8b8b2f457f Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 9 Apr 1998 00:07:17 +0000 Subject: Added codepage 936 (simplified Chineses). In doing so I realized that much code was being duplicated between Hangul, Big5 and Simplified Chinese - so I re-arranged kanji.[ch] to go through generic functions for all multibyte characters that can be identified by a single code range (not Kanji - but all the others). Jeremy. (This used to be commit b6c965c396eb3d4f0e6dfd863e70b28390c59f66) --- source3/codepages/codepage_def.936 | 24 ++++ source3/include/kanji.h | 6 +- source3/include/smb.h | 1 + source3/lib/kanji.c | 224 +++++++++++++++---------------------- 4 files changed, 123 insertions(+), 132 deletions(-) create mode 100644 source3/codepages/codepage_def.936 (limited to 'source3') diff --git a/source3/codepages/codepage_def.936 b/source3/codepages/codepage_def.936 new file mode 100644 index 0000000000..25a317ffea --- /dev/null +++ b/source3/codepages/codepage_def.936 @@ -0,0 +1,24 @@ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +# Codepage definition file for IBM Code Page 949 - MS-DOS Simplified Chinese. +# defines lower->upper mapping. +# Written by Jeremy Allison + +# The columns are : +# lower upper map upper to lower map lower to upper +# +# This file is intentionaly empty - no mappings are done. diff --git a/source3/include/kanji.h b/source3/include/kanji.h index 302db13a27..db3731e41b 100644 --- a/source3/include/kanji.h +++ b/source3/include/kanji.h @@ -109,6 +109,9 @@ /* For traditional Chinese (known as Big5 encoding - code page 950). */ #define is_big5_c1(c) ((0xa1 <= ((unsigned char) (c)) && ((unsigned char) (c)) <= 0xf9)) +/* For simplified Chinese (code page - 936). */ +#define is_simpch_c1(c) ((0xa1 <= ((unsigned char) (c)) && ((unsigned char) (c)) <= 0xf7)) + #else /* not _KANJI_C_ */ /* @@ -143,6 +146,7 @@ extern char *(*multibyte_strtok)(char *s1, char *s2); extern char *(*_dos_to_unix)(char *str, BOOL overwrite); extern char *(*_unix_to_dos)(char *str, BOOL overwrite); extern BOOL (*is_multibyte_char)(char c); +extern int (*_skip_multibyte_char)(char c); #define strchr(s1, c) ((*multibyte_strchr)((s1), (c))) #define strrchr(s1, c) ((*multibyte_strrchr)((s1), (c))) @@ -150,7 +154,7 @@ extern BOOL (*is_multibyte_char)(char c); #define strtok(s1, s2) ((*multibyte_strtok)((s1), (s2))) #define dos_to_unix(x,y) ((*_dos_to_unix)((x), (y))) #define unix_to_dos(x,y) ((*_unix_to_dos)((x), (y))) -#define skip_multibyte_char(c) ((*is_multibyte_char)((c))) +#define skip_multibyte_char(c) ((*_skip_multibyte_char)((c))) #endif /* _KANJI_C_ */ diff --git a/source3/include/smb.h b/source3/include/smb.h index 88d9a9de09..21bf346c62 100644 --- a/source3/include/smb.h +++ b/source3/include/smb.h @@ -1115,6 +1115,7 @@ enum case_handling {CASE_LOWER,CASE_UPPER}; #define KANJI_CODEPAGE 932 #define HANGUL_CODEPAGE 949 #define BIG5_CODEPAGE 950 +#define SIMPLIFIED_CHINESE_CODEPAGE 936 #ifdef KANJI /* diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c index 994cf6e1bd..e430c1a986 100644 --- a/source3/lib/kanji.c +++ b/source3/lib/kanji.c @@ -54,11 +54,13 @@ char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok; * charcnv.c. */ -static int not_multibyte_char(char); +static int skip_non_multibyte_char(char); +static BOOL not_multibyte_char_1(char); char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format; char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format; -int (*is_multibyte_char)(char) = not_multibyte_char; +int (*_skip_multibyte_char)(char) = skip_non_multibyte_char; +BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1; #else /* KANJI */ @@ -68,11 +70,13 @@ int (*is_multibyte_char)(char) = not_multibyte_char; */ static char *sj_to_sj(char *from, BOOL overwrite); -static int kanji_multibyte_char(char); +static int skip_kanji_multibyte_char(char); +static BOOL kanji_multibyte_char_1(char); char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj; char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj; -int (*is_multibyte_char)(char) = kanji_multibyte_char; +int (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char; +int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1; #endif /* KANJI */ @@ -186,10 +190,10 @@ static char *sj_strrchr(char *s, int c) } /******************************************************************* - Kanji multibyte char function. + Kanji multibyte char skip function. *******************************************************************/ -static int kanji_multibyte_char(char c) +static int skip_kanji_multibyte_char(char c) { if(is_shift_jis(c)) { return 2; @@ -200,128 +204,60 @@ static int kanji_multibyte_char(char c) } /******************************************************************* - Hangul (Korean - code page 949) functions -********************************************************************/ -/******************************************************************* - search token from S1 separated any char of S2 - S1 contains hangul chars. -********************************************************************/ -static char *hangul_strtok(char *s1, char *s2) + Kanji multibyte char identification. +*******************************************************************/ + +static BOOL is_kanji_multibyte_char_1(char c) { - static char *s = NULL; - char *q; - if (!s1) { - if (!s) { - return NULL; - } - s1 = s; - } - for (q = s1; *s1; ) { - if (is_hangul (*s1)) { - s1 += 2; - } else { - char *p = strchr (s2, *s1); - if (p) { - if (s1 != q) { - s = s1 + 1; - *s1 = '\0'; - return q; - } - q = s1 + 1; - } - s1++; - } - } - s = NULL; - if (*q) { - return q; - } - return NULL; + return is_shift_jis(c); } /******************************************************************* - search string S2 from S1 - S1 contains hangul chars. + The following functions are the only ones needed to do multibyte + support for Hangul, Big5 and Simplified Chinese. Most of the + real work for these codepages is done in the generic multibyte + functions. The only reason these functions are needed at all + is that the is_xxx(c) calls are really preprocessor macros. ********************************************************************/ -static char *hangul_strstr(char *s1, char *s2) -{ - int len = strlen ((char *) s2); - if (!*s2) - return (char *) s1; - for (;*s1;) { - if (*s1 == *s2) { - if (strncmp (s1, s2, len) == 0) - return (char *) s1; - } - if (is_hangul (*s1)) { - s1 += 2; - } else { - s1++; - } - } - return 0; -} /******************************************************************* - Search char C from beginning of S. - S contains hangul chars. + Hangul (Korean - code page 949) function. ********************************************************************/ -static char *hangul_strchr (char *s, int c) + +static BOOL hangul_is_multibyte_char_1(char c) { - for (; *s; ) { - if (*s == c) - return (char *) s; - if (is_hangul (*s)) { - s += 2; - } else { - s++; - } - } - return 0; + return is_hangul(c); } /******************************************************************* - Search char C end of S. - S contains hangul chars. + Big5 Traditional Chinese (code page 950) function. ********************************************************************/ -static char *hangul_strrchr(char *s, int c) + +static BOOL big5_is_multibyte_char_1(char c) { - char *q; - - for (q = 0; *s; ) { - if (*s == c) { - q = (char *) s; - } - if (is_hangul (*s)) { - s += 2; - } else { - s++; - } - } - return q; + return is_big5_c1(c); } /******************************************************************* - Hangul multibyte char function. -*******************************************************************/ + Simplified Chinese (code page 936) function. +********************************************************************/ -static int hangul_multibyte_char(char c) +static BOOL simpch_is_multibyte_char_1(char c) { - if( is_hangul(c)) { - return 2; - } - return 0; + return is_simpch_c1(c); } /******************************************************************* - Big5 Traditional Chinese (code page 950) functions + Generic multibyte functions - used by Hangul, Big5 and Simplified + Chinese codepages. ********************************************************************/ /******************************************************************* search token from S1 separated any char of S2 - S1 contains big5 chars. + S1 contains generic multibyte chars. ********************************************************************/ -static char *big5_strtok(char *s1, char *s2) + +static char *generic_multibyte_strtok(char *s1, char *s2) { static char *s = NULL; char *q; @@ -332,7 +268,7 @@ static char *big5_strtok(char *s1, char *s2) s1 = s; } for (q = s1; *s1; ) { - if (is_big5_c1 (*s1)) { + if ((*is_multibyte_char_1)(*s1)) { s1 += 2; } else { char *p = strchr (s2, *s1); @@ -356,9 +292,10 @@ static char *big5_strtok(char *s1, char *s2) /******************************************************************* search string S2 from S1 - S1 contains big5 chars. + S1 contains generic multibyte chars. ********************************************************************/ -static char *big5_strstr(char *s1, char *s2) + +static char *generic_multibyte_strstr(char *s1, char *s2) { int len = strlen ((char *) s2); if (!*s2) @@ -368,7 +305,7 @@ static char *big5_strstr(char *s1, char *s2) if (strncmp (s1, s2, len) == 0) return (char *) s1; } - if (is_big5_c1 (*s1)) { + if ((*is_multibyte_char_1)(*s1)) { s1 += 2; } else { s1++; @@ -379,14 +316,15 @@ static char *big5_strstr(char *s1, char *s2) /******************************************************************* Search char C from beginning of S. - S contains big5 chars. + S contains generic multibyte chars. ********************************************************************/ -static char *big5_strchr (char *s, int c) + +static char *generic_multibyte_strchr(char *s, int c) { for (; *s; ) { if (*s == c) return (char *) s; - if (is_big5_c1 (*s)) { + if ((*is_multibyte_char_1)(*s)) { s += 2; } else { s++; @@ -397,9 +335,10 @@ static char *big5_strchr (char *s, int c) /******************************************************************* Search char C end of S. - S contains big5 chars. + S contains generic multibyte chars. ********************************************************************/ -static char *big5_strrchr(char *s, int c) + +static char *generic_multibyte_strrchr(char *s, int c) { char *q; @@ -407,7 +346,7 @@ static char *big5_strrchr(char *s, int c) if (*s == c) { q = (char *) s; } - if (is_big5_c1 (*s)) { + if ((*is_multibyte_char_1)(*s)) { s += 2; } else { s++; @@ -417,12 +356,12 @@ static char *big5_strrchr(char *s, int c) } /******************************************************************* - Big5 multibyte char function. + Generic multibyte char skip function. *******************************************************************/ -static int big5_multibyte_char(char c) +static int skip_generic_multibyte_char(char c) { - if( is_big5_c1(c)) { + if( (*is_multibyte_char_1)(c)) { return 2; } return 0; @@ -1091,9 +1030,10 @@ static void setup_string_function(int codes) } } -/* - * Interpret coding system. - */ +/************************************************************************ + Interpret coding system. +************************************************************************/ + void interpret_coding_system(char *str) { int codes = UNKNOWN_CODE; @@ -1191,11 +1131,20 @@ void interpret_coding_system(char *str) Non multibyte char function. *******************************************************************/ -static int not_multibyte_char(char c) +static int skip_non_multibyte_char(char c) { return 0; } +/******************************************************************* + Function that always says a character isn't multibyte. +*******************************************************************/ + +static BOOL not_multibyte_char_1(char c) +{ + return False; +} + /******************************************************************* Setup the function pointers for the functions that are replaced when multi-byte codepages are used. @@ -1214,28 +1163,41 @@ void initialize_multibyte_vectors( int client_codepage) multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr; multibyte_strstr = (char *(*)(char *, char *)) sj_strstr; multibyte_strtok = (char *(*)(char *, char *)) sj_strtok; - is_multibyte_char = kanji_multibyte_char; + _skip_multibyte_char = skip_kanji_multibyte_char; + is_multibyte_char_1 = is_kanji_multibyte_char_1; break; case HANGUL_CODEPAGE: - multibyte_strchr = (char *(*)(char *, int )) hangul_strchr; - multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr; - multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr; - multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok; - is_multibyte_char = hangul_multibyte_char; - break; + multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr; + multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr; + multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr; + multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok; + _skip_multibyte_char = skip_generic_multibyte_char; + is_multibyte_char_1 = hangul_is_multibyte_char_1; case BIG5_CODEPAGE: - multibyte_strchr = (char *(*)(char *, int )) big5_strchr; - multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr; - multibyte_strstr = (char *(*)(char *, char *)) big5_strstr; - multibyte_strtok = (char *(*)(char *, char *)) big5_strtok; - is_multibyte_char = big5_multibyte_char; + multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr; + multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr; + multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr; + multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok; + _skip_multibyte_char = skip_generic_multibyte_char; + is_multibyte_char_1 = big5_is_multibyte_char_1; + case SIMPLIFIED_CHINESE_CODEPAGE: + multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr; + multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr; + multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr; + multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok; + _skip_multibyte_char = skip_generic_multibyte_char; + is_multibyte_char_1 = simpch_is_multibyte_char_1; break; + /* + * Single char size code page. + */ default: multibyte_strchr = (char *(*)(char *, int )) strchr; multibyte_strrchr = (char *(*)(char *, int )) strrchr; multibyte_strstr = (char *(*)(char *, char *)) strstr; multibyte_strtok = (char *(*)(char *, char *)) strtok; - is_multibyte_char = not_multibyte_char; + _skip_multibyte_char = skip_non_multibyte_char; + is_multibyte_char_1 = not_multibyte_char_1; break; } } -- cgit