summaryrefslogtreecommitdiff
path: root/source3/lib
diff options
context:
space:
mode:
Diffstat (limited to 'source3/lib')
-rw-r--r--source3/lib/charcnv.c8
-rw-r--r--source3/lib/charset.c7
-rw-r--r--source3/lib/kanji.c390
-rw-r--r--source3/lib/util.c72
4 files changed, 438 insertions, 39 deletions
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index f02fcb2f92..20db58e4ab 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -164,9 +164,6 @@ char *unix2dos_format(char *str,BOOL overwrite)
if (!mapsinited) initmaps();
- if(lp_client_code_page() == KANJI_CODEPAGE)
- return (*_unix_to_dos)(str, overwrite);
- else {
if (overwrite) {
for (p = str; *p; p++) *p = unix2dos[(unsigned char)*p];
return str;
@@ -175,7 +172,6 @@ char *unix2dos_format(char *str,BOOL overwrite)
*dp = 0;
return cvtbuf;
}
- }
}
/*
@@ -188,9 +184,6 @@ char *dos2unix_format(char *str, BOOL overwrite)
if (!mapsinited) initmaps();
- if(lp_client_code_page() == KANJI_CODEPAGE)
- return (*_dos_to_unix)(str, overwrite);
- else {
if (overwrite) {
for (p = str; *p; p++) *p = dos2unix[(unsigned char)*p];
return str;
@@ -199,7 +192,6 @@ char *dos2unix_format(char *str, BOOL overwrite)
*dp = 0;
return cvtbuf;
}
- }
}
diff --git a/source3/lib/charset.c b/source3/lib/charset.c
index 79a82f8587..fe170bdcf5 100644
--- a/source3/lib/charset.c
+++ b/source3/lib/charset.c
@@ -347,14 +347,21 @@ void codepage_initialise(int client_codepage)
for code page %d failed. Using default client codepage 932\n",
CODEPAGEDIR, client_codepage, client_codepage));
cp = cp_932;
+ client_codepage = KANJI_CODEPAGE;
#else /* KANJI */
DEBUG(6,("codepage_initialise: loading dynamic codepage file %s/codepage.%d \
for code page %d failed. Using default client codepage 850\n",
CODEPAGEDIR, client_codepage, client_codepage));
cp = cp_850;
+ client_codepage = MSDOS_LATIN_1_CODEPAGE;
#endif /* KANJI */
}
+ /*
+ * Setup the function pointers for the loaded codepage.
+ */
+ initialize_multibyte_vectors( client_codepage );
+
if(cp)
{
for(i = 0; !((cp[i][0] == '\0') && (cp[i][1] == '\0')); i++)
diff --git a/source3/lib/kanji.c b/source3/lib/kanji.c
index d63798914e..9360405547 100644
--- a/source3/lib/kanji.c
+++ b/source3/lib/kanji.c
@@ -27,6 +27,55 @@
#define _KANJI_C_
#include "includes.h"
+/*
+ * Function pointers that get overridden when multi-byte code pages
+ * are loaded.
+ */
+
+char *(*multibyte_strchr)(char *, int ) = (char *(*)(char *, int )) strchr;
+char *(*multibyte_strrchr)(char *, int ) = (char *(*)(char *, int )) strrchr;
+char *(*multibyte_strstr)(char *, char *) = (char *(*)(char *, char *)) strstr;
+char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
+
+/*
+ * Kanji is treated differently here due to historical accident of
+ * it being the first non-English codepage added to Samba.
+ * The define 'KANJI' is being overloaded to mean 'use kanji codepage
+ * by default' and also 'this is the filename-to-disk conversion
+ * method to use'. This really should be removed and all control
+ * over this left in the smb.conf parameters 'client codepage'
+ * and 'coding system'.
+ */
+
+#ifndef KANJI
+
+/*
+ * Set the default conversion to be the functions in
+ * charcnv.c.
+ */
+
+static int not_multibyte_char(char);
+
+char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
+char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
+int (*is_multibyte_char)(char) = not_multibyte_char;
+
+#else /* KANJI */
+
+/*
+ * Set the default conversion to be the function
+ * sj_to_sj in this file.
+ */
+
+static char *sj_to_sj(char *from, BOOL overwrite);
+static int kanji_multibyte_char(char);
+
+char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
+char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
+int (*is_multibyte_char)(char) = kanji_multibyte_char;
+
+#endif /* KANJI */
+
/* jis si/so sequence */
static char jis_kso = JIS_KSO;
static char jis_ksi = JIS_KSI;
@@ -37,13 +86,10 @@ static char hex_tag = HEXTAG;
********************************************************************/
/*******************************************************************
search token from S1 separated any char of S2
- S1 contain SHIFT JIS chars.
+ S1 contains SHIFT JIS chars.
********************************************************************/
-char *sj_strtok(char *s1, char *s2)
+static char *sj_strtok(char *s1, char *s2)
{
- if (lp_client_code_page() != KANJI_CODEPAGE) {
- return strtok(s1, s2);
- } else {
static char *s = NULL;
char *q;
if (!s1) {
@@ -75,18 +121,14 @@ char *sj_strtok(char *s1, char *s2)
return q;
}
return NULL;
- }
}
/*******************************************************************
search string S2 from S1
- S1 contain SHIFT JIS chars.
+ S1 contains SHIFT JIS chars.
********************************************************************/
-char *sj_strstr(char *s1, char *s2)
+static char *sj_strstr(char *s1, char *s2)
{
- if (lp_client_code_page() != KANJI_CODEPAGE) {
- return strstr(s1, s2);
- } else {
int len = strlen ((char *) s2);
if (!*s2)
return (char *) s1;
@@ -102,18 +144,14 @@ char *sj_strstr(char *s1, char *s2)
}
}
return 0;
- }
}
/*******************************************************************
Search char C from beginning of S.
- S contain SHIFT JIS chars.
+ S contains SHIFT JIS chars.
********************************************************************/
-char *sj_strchr (char *s, int c)
+static char *sj_strchr (char *s, int c)
{
- if (lp_client_code_page() != KANJI_CODEPAGE) {
- return strchr(s, c);
- } else {
for (; *s; ) {
if (*s == c)
return (char *) s;
@@ -124,18 +162,14 @@ char *sj_strchr (char *s, int c)
}
}
return 0;
- }
}
/*******************************************************************
Search char C end of S.
- S contain SHIFT JIS chars.
+ S contains SHIFT JIS chars.
********************************************************************/
-char *sj_strrchr(char *s, int c)
+static char *sj_strrchr(char *s, int c)
{
- if (lp_client_code_page() != KANJI_CODEPAGE) {
- return strrchr(s, c);
- } else {
char *q;
for (q = 0; *s; ) {
@@ -149,7 +183,249 @@ char *sj_strrchr(char *s, int c)
}
}
return q;
+}
+
+/*******************************************************************
+ Kanji multibyte char function.
+*******************************************************************/
+
+static int kanji_multibyte_char(char c)
+{
+ if(is_shift_jis(c)) {
+ return 2;
+ } else if (is_kana(c)) {
+ return 1;
+ }
+ return 0;
+}
+
+/*******************************************************************
+ Hangul (Korean - code page 949) functions
+********************************************************************/
+/*******************************************************************
+ search token from S1 separated any char of S2
+ S1 contains hangul chars.
+********************************************************************/
+static char *hangul_strtok(char *s1, char *s2)
+{
+ static char *s = NULL;
+ char *q;
+ if (!s1) {
+ if (!s) {
+ return NULL;
+ }
+ s1 = s;
+ }
+ for (q = s1; *s1; ) {
+ if (is_hangul (*s1)) {
+ s1 += 2;
+ } else {
+ char *p = strchr (s2, *s1);
+ if (p) {
+ if (s1 != q) {
+ s = s1 + 1;
+ *s1 = '\0';
+ return q;
+ }
+ q = s1 + 1;
+ }
+ s1++;
+ }
+ }
+ s = NULL;
+ if (*q) {
+ return q;
+ }
+ return NULL;
+}
+
+/*******************************************************************
+ search string S2 from S1
+ S1 contains hangul chars.
+********************************************************************/
+static char *hangul_strstr(char *s1, char *s2)
+{
+ int len = strlen ((char *) s2);
+ if (!*s2)
+ return (char *) s1;
+ for (;*s1;) {
+ if (*s1 == *s2) {
+ if (strncmp (s1, s2, len) == 0)
+ return (char *) s1;
+ }
+ if (is_hangul (*s1)) {
+ s1 += 2;
+ } else {
+ s1++;
+ }
+ }
+ return 0;
+}
+
+/*******************************************************************
+ Search char C from beginning of S.
+ S contains hangul chars.
+********************************************************************/
+static char *hangul_strchr (char *s, int c)
+{
+ for (; *s; ) {
+ if (*s == c)
+ return (char *) s;
+ if (is_hangul (*s)) {
+ s += 2;
+ } else {
+ s++;
+ }
+ }
+ return 0;
+}
+
+/*******************************************************************
+ Search char C end of S.
+ S contains hangul chars.
+********************************************************************/
+static char *hangul_strrchr(char *s, int c)
+{
+ char *q;
+
+ for (q = 0; *s; ) {
+ if (*s == c) {
+ q = (char *) s;
+ }
+ if (is_hangul (*s)) {
+ s += 2;
+ } else {
+ s++;
+ }
+ }
+ return q;
+}
+
+/*******************************************************************
+ Hangul multibyte char function.
+*******************************************************************/
+
+static int hangul_multibyte_char(char c)
+{
+ if( is_hangul(c)) {
+ return 2;
+ }
+ return 0;
+}
+
+/*******************************************************************
+ Big5 Traditional Chinese (code page 950) functions
+********************************************************************/
+
+/*******************************************************************
+ search token from S1 separated any char of S2
+ S1 contains big5 chars.
+********************************************************************/
+static char *big5_strtok(char *s1, char *s2)
+{
+ static char *s = NULL;
+ char *q;
+ if (!s1) {
+ if (!s) {
+ return NULL;
+ }
+ s1 = s;
+ }
+ for (q = s1; *s1; ) {
+ if (is_big5_c1 (*s1)) {
+ s1 += 2;
+ } else {
+ char *p = strchr (s2, *s1);
+ if (p) {
+ if (s1 != q) {
+ s = s1 + 1;
+ *s1 = '\0';
+ return q;
+ }
+ q = s1 + 1;
+ }
+ s1++;
+ }
+ }
+ s = NULL;
+ if (*q) {
+ return q;
+ }
+ return NULL;
+}
+
+/*******************************************************************
+ search string S2 from S1
+ S1 contains big5 chars.
+********************************************************************/
+static char *big5_strstr(char *s1, char *s2)
+{
+ int len = strlen ((char *) s2);
+ if (!*s2)
+ return (char *) s1;
+ for (;*s1;) {
+ if (*s1 == *s2) {
+ if (strncmp (s1, s2, len) == 0)
+ return (char *) s1;
+ }
+ if (is_big5_c1 (*s1)) {
+ s1 += 2;
+ } else {
+ s1++;
+ }
+ }
+ return 0;
+}
+
+/*******************************************************************
+ Search char C from beginning of S.
+ S contains big5 chars.
+********************************************************************/
+static char *big5_strchr (char *s, int c)
+{
+ for (; *s; ) {
+ if (*s == c)
+ return (char *) s;
+ if (is_big5_c1 (*s)) {
+ s += 2;
+ } else {
+ s++;
+ }
+ }
+ return 0;
+}
+
+/*******************************************************************
+ Search char C end of S.
+ S contains big5 chars.
+********************************************************************/
+static char *big5_strrchr(char *s, int c)
+{
+ char *q;
+
+ for (q = 0; *s; ) {
+ if (*s == c) {
+ q = (char *) s;
+ }
+ if (is_big5_c1 (*s)) {
+ s += 2;
+ } else {
+ s++;
+ }
+ }
+ return q;
+}
+
+/*******************************************************************
+ Big5 multibyte char function.
+*******************************************************************/
+
+static int big5_multibyte_char(char c)
+{
+ if( is_big5_c1(c)) {
+ return 2;
}
+ return 0;
}
/*******************************************************************
@@ -770,17 +1046,17 @@ static char *sj_to_sj(char *from, BOOL overwrite)
_dos_to_unix _unix_to_dos
************************************************************************/
-char *(*_dos_to_unix)(char *str, BOOL overwrite) = sj_to_sj;
-char *(*_unix_to_dos)(char *str, BOOL overwrite) = sj_to_sj;
-
-static int setup_string_function(int codes)
+static void setup_string_function(int codes)
{
switch (codes) {
default:
+ _dos_to_unix = dos2unix_format;
+ _unix_to_dos = unix2dos_format;
+ break;
+
case SJIS_CODE:
_dos_to_unix = sj_to_sj;
_unix_to_dos = sj_to_sj;
-
break;
case EUC_CODE:
@@ -813,13 +1089,12 @@ static int setup_string_function(int codes)
_unix_to_dos = cap_to_sj;
break;
}
- return codes;
}
/*
* Interpret coding system.
*/
-int interpret_coding_system(char *str)
+void interpret_coding_system(char *str)
{
int codes = UNKNOWN_CODE;
@@ -909,5 +1184,58 @@ int interpret_coding_system(char *str)
jis_kso = '@';
jis_ksi = 'H';
}
- return setup_string_function (codes);
+ setup_string_function (codes);
+}
+
+/*******************************************************************
+ Non multibyte char function.
+*******************************************************************/
+
+static int not_multibyte_char(char c)
+{
+ return 0;
+}
+
+/*******************************************************************
+ Setup the function pointers for the functions that are replaced
+ when multi-byte codepages are used.
+
+ The dos_to_unix and unix_to_dos function pointers are only
+ replaced by setup_string_function called by interpret_coding_system
+ above.
+*******************************************************************/
+
+void initialize_multibyte_vectors( int client_codepage)
+{
+ switch( client_codepage )
+ {
+ case KANJI_CODEPAGE:
+ multibyte_strchr = (char *(*)(char *, int )) sj_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
+ is_multibyte_char = kanji_multibyte_char;
+ break;
+ case HANGUL_CODEPAGE:
+ multibyte_strchr = (char *(*)(char *, int )) hangul_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok;
+ is_multibyte_char = hangul_multibyte_char;
+ break;
+ case BIG5_CODEPAGE:
+ multibyte_strchr = (char *(*)(char *, int )) big5_strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) big5_strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) big5_strtok;
+ is_multibyte_char = big5_multibyte_char;
+ break;
+ default:
+ multibyte_strchr = (char *(*)(char *, int )) strchr;
+ multibyte_strrchr = (char *(*)(char *, int )) strrchr;
+ multibyte_strstr = (char *(*)(char *, char *)) strstr;
+ multibyte_strtok = (char *(*)(char *, char *)) strtok;
+ is_multibyte_char = not_multibyte_char;
+ break;
+ }
}
diff --git a/source3/lib/util.c b/source3/lib/util.c
index 18614caeed..5af41cc06c 100644
--- a/source3/lib/util.c
+++ b/source3/lib/util.c
@@ -887,6 +887,15 @@ int StrCaseCmp(char *s, char *t)
asynchronous upper to lower mapping.
*/
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -951,6 +960,15 @@ int StrnCaseCmp(char *s, char *t, int n)
asynchronous upper to lower mapping.
*/
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -1058,6 +1076,15 @@ void strlower(char *s)
while (*s)
{
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -1096,6 +1123,15 @@ void strupper(char *s)
while (*s)
{
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -1157,6 +1193,15 @@ void string_replace(char *s,char oldc,char newc)
while (*s)
{
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -1783,6 +1828,15 @@ BOOL strhasupper(char *s)
while (*s)
{
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -1816,6 +1870,15 @@ BOOL strhaslower(char *s)
while (*s)
{
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */
@@ -1857,6 +1920,15 @@ int count_chars(char *s,char c)
int count=0;
#if !defined(KANJI_WIN95_COMPATIBILITY)
+ /*
+ * For completeness we should put in equivalent code for code pages
+ * 949 (Korean hangul) and 950 (Big5 Traditional Chinese) here - but
+ * doubt anyone wants Samba to behave differently from Win95 and WinNT
+ * here. They both treat full width ascii characters as case senstive
+ * filenames (ie. they don't do the work we do here).
+ * JRA.
+ */
+
if(lp_client_code_page() == KANJI_CODEPAGE)
{
/* Win95 treats full width ascii characters as case sensitive. */