From c6e63aa896a10656f6205828e744b722fc72f8ac Mon Sep 17 00:00:00 2001 From: Samba Release Account Date: Wed, 11 Jun 1997 01:03:06 +0000 Subject: Makefile: Added quoata changes for Linux from Thorvald Natvig Makefile.RPM: Added quoata changes for Linux from Thorvald Natvig charset.c: Large changes to add multiple client code pages. charset.h: Changed charset_initialise() proto. client.c: Fixed message sending bug. Changed charset_initialise(). ipc.c: Fixed #ifdef compile problems. loadparm.c: Added "client code page" option. nmbd.c: Changed charset_initialise(). Fixed lmhosts read. nmblookup.c: Changed charset_initialise(). proto.h: Added lp_client_code_page(void). quotas.c: Added quoata changes for Linux from Thorvald Natvig reply.c: Changed debug level. Made SMBecho ignore tid. server.c: Changed charset_initialise(). smb.h: Added DEFAULT_CLIENT_CODE_PAGE as 850. smbpasswd.c: Changed charset_initialise(). status.c: Changed charset_initialise(). testparm.c: Changed charset_initialise(). testprns.c: Changed charset_initialise(). Jeremy Allison (jallison@whistle.com) (This used to be commit 957025bace1bcff34d21a6caeca498e85abccb23) --- source3/lib/charset.c | 264 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 189 insertions(+), 75 deletions(-) (limited to 'source3/lib') diff --git a/source3/lib/charset.c b/source3/lib/charset.c index 1c396db241..a83d1133fe 100644 --- a/source3/lib/charset.c +++ b/source3/lib/charset.c @@ -24,6 +24,140 @@ extern int DEBUGLEVEL; +/* + * Codepage definitions. + */ + +/* lower->upper mapping for IBM Code Page 850 - MS-DOS Latin 1 */ +unsigned char cp_850[][4] = { +/* dec col/row oct hex description */ +/* 133 08/05 205 85 a grave */ +/* 183 11/07 267 B7 A grave */ {0x85,0xB7,1,1}, +/* 160 10/00 240 A0 a acute */ +/* 181 11/05 265 B5 A acute */ {0xA0,0xB5,1,1}, +/* 131 08/03 203 83 a circumflex */ +/* 182 11/06 266 B6 A circumflex */ {0x83,0xB6,1,1}, +/* 198 12/06 306 C6 a tilde */ +/* 199 12/07 307 C7 A tilde */ {0xC6,0xC7,1,1}, +/* 132 08/04 204 84 a diaeresis */ +/* 142 08/14 216 8E A diaeresis */ {0x84,0x8E,1,1}, +/* 134 08/06 206 86 a ring */ +/* 143 08/15 217 8F A ring */ {0x86,0x8F,1,1}, +/* 145 09/01 221 91 ae diphthong */ +/* 146 09/02 222 92 AE diphthong */ {0x91,0x92,1,1}, +/* 135 08/07 207 87 c cedilla */ +/* 128 08/00 200 80 C cedilla */ {0x87,0x80,1,1}, +/* 138 08/10 212 8A e grave */ +/* 212 13/04 324 D4 E grave */ {0x8A,0xD4,1,1}, +/* 130 08/02 202 82 e acute */ +/* 144 09/00 220 90 E acute */ {0x82,0x90,1,1}, +/* 136 08/08 210 88 e circumflex */ +/* 210 13/02 322 D2 E circumflex */ {0x88,0xD2,1,1}, +/* 137 08/09 211 89 e diaeresis */ +/* 211 13/03 323 D3 E diaeresis */ {0x89,0xD3,1,1}, +/* 141 08/13 215 8D i grave */ +/* 222 13/14 336 DE I grave */ {0x8D,0xDE,1,1}, +/* 161 10/01 241 A1 i acute */ +/* 214 13/06 326 D6 I acute */ {0xA1,0xD6,1,1}, +/* 140 08/12 214 8C i circumflex */ +/* 215 13/07 327 D7 I circumflex */ {0x8C,0xD7,1,1}, +/* 139 08/11 213 8B i diaeresis */ +/* 216 13/08 330 D8 I diaeresis */ {0x8B,0xD8,1,1}, +/* 208 13/00 320 D0 Icelandic eth */ +/* 209 13/01 321 D1 Icelandic Eth */ {0xD0,0xD1,1,1}, +/* 164 10/04 244 A4 n tilde */ +/* 165 10/05 245 A5 N tilde */ {0xA4,0xA5,1,1}, +/* 149 09/05 225 95 o grave */ +/* 227 14/03 343 E3 O grave */ {0x95,0xE3,1,1}, +/* 162 10/02 242 A2 o acute */ +/* 224 14/00 340 E0 O acute */ {0xA2,0xE0,1,1}, +/* 147 09/03 223 93 o circumflex */ +/* 226 14/02 342 E2 O circumflex */ {0x93,0xE2,1,1}, +/* 228 14/04 344 E4 o tilde */ +/* 229 14/05 345 E5 O tilde */ {0xE4,0xE5,1,1}, +/* 148 09/04 224 94 o diaeresis */ +/* 153 09/09 231 99 O diaeresis */ {0x94,0x99,1,1}, +/* 155 09/11 233 9B o slash */ +/* 157 09/13 235 9D O slash */ {0x9B,0x9D,1,1}, +/* 151 09/07 227 97 u grave */ +/* 235 14/11 353 EB U grave */ {0x97,0xEB,1,1}, +/* 163 10/03 243 A3 u acute */ +/* 233 14/09 351 E9 U acute */ {0xA3,0xE9,1,1}, +/* 150 09/06 226 96 u circumflex */ +/* 234 14/10 352 EA U circumflex */ {0x96,0xEA,1,1}, +/* 129 08/01 201 81 u diaeresis */ +/* 154 09/10 232 9A U diaeresis */ {0x81,0x9A,1,1}, +/* 236 14/12 354 EC y acute */ +/* 237 14/13 355 ED Y acute */ {0xEC,0xED,1,1}, +/* 231 14/07 347 E7 Icelandic thorn */ +/* 232 14/08 350 E8 Icelandic Thorn */ {0xE7,0xE8,1,1}, + + {0x9C,0,0,0}, /* Pound */ + {0,0,0,0} +}; + +/* lower->upper mapping for IBM Code Page 437 - MS-DOS Latin US */ +unsigned char cp_437[][4] = { +/* 135 08/07 207 87 c cedilla */ +/* 128 08/00 200 80 C cedilla */ {0x87,0x80,1,1}, +/* 129 08/01 201 81 u diaeresis */ +/* 154 09/10 232 9A U diaeresis */ {0x81,0x9A,1,1}, +/* 130 08/02 202 82 e acute */ +/* 144 09/00 220 90 E acute */ {0x82,0x90,1,1}, +/* 131 08/03 203 83 a circumflex */ {0x83,0x41,1,0}, +/* 132 08/04 204 84 a diaeresis */ +/* 142 08/14 216 8E A diaeresis */ {0x84,0x8E,1,1}, +/* 133 08/05 205 85 a grave */ {0x85,0x41,1,0}, +/* 134 08/06 206 86 a ring */ {0x86,0x8F,1,1}, +/* 136 08/08 210 88 e circumflex */ {0x88,0x45,1,0}, +/* 137 08/09 211 89 e diaeresis */ {0x89,0x45,1,0}, +/* 138 08/10 212 8A e grave */ {0x8A,0x45,1,0}, +/* 139 08/11 213 8B i diaeresis */ {0x8B,0x49,1,0}, +/* 140 08/12 214 8C i circumflex */ {0x8C,0x49,1,0}, +/* 141 08/13 215 8D i grave */ {0x8D,0x49,1,0}, +/* 145 09/01 221 91 ae diphthong */ +/* 146 09/02 222 92 AE diphthong */ {0x91,0x92,1,1}, +/* 147 09/03 223 93 o circumflex */ {0x93,0x4F,1,0}, +/* 148 09/04 224 94 o diaeresis */ +/* 153 09/09 231 99 O diaeresis */ {0x94,0x99,1,1}, +/* 149 09/05 225 95 o grave */ {0x95,0x4F,1,0}, +/* 150 09/06 226 96 u circumflex */ {0x96,0x55,1,0}, +/* 151 09/07 227 97 u grave */ {0x97,0x55,1,0}, +/* 152 ??/?? 201 98 u diaeresis */ + {0x9B,0,0,0}, /* Cent */ + {0x9C,0,0,0}, /* Pound */ + {0x9D,0,0,0}, /* Yen */ +/* 160 10/00 240 A0 a acute */ {0xA0,0x41,1,0}, +/* 161 10/01 241 A1 i acute */ {0xA1,0x49,1,0}, +/* 162 10/02 242 A2 o acute */ {0xA2,0x4F,1,0}, +/* 163 10/03 243 A3 u acute */ {0xA3,0x55,1,0}, +/* 164 10/04 244 A4 n tilde */ +/* 165 10/05 245 A5 N tilde */ {0xA4,0xA5,1,1}, +/* Punctuation... */ + {0xA8,0,0,0}, + {0xAD,0,0,0}, + {0xAE,0,0,0}, + {0xAF,0,0,0}, +/* Greek character set */ + {0xE0,0,0,0}, + {0xE1,0,0,0}, + {0xE2,0,0,0}, + {0xE3,0,0,0}, + {0xE4,0,0,0}, + {0xE5,0,0,0}, + {0xE6,0,0,0}, + {0xE7,0,0,0}, + {0xE8,0,0,0}, + {0xE9,0,0,0}, + {0xEA,0,0,0}, + {0xEB,0,0,0}, + {0xEC,0,0,0}, + {0xED,0,0,0}, + {0xEE,0,0,0}, + {0xEF,0,0,0}, + {0,0,0,0} +}; + char xx_dos_char_map[256]; char xx_upper_char_map[256]; char xx_lower_char_map[256]; @@ -32,25 +166,42 @@ char *dos_char_map = xx_dos_char_map; char *upper_char_map = xx_upper_char_map; char *lower_char_map = xx_lower_char_map; -static void add_dos_char(int lower, int upper) +/* + * This code has been extended to deal with ascynchronous mappings + * like MS-DOS Latin US (Code page 437) where things like : + * a acute are capitalized to 'A', but the reverse mapping + * must not hold true. This allows the filename case insensitive + * matching in do_match() to work, as the DOS/Win95/NT client + * uses 'A' as a mask to match against characters like a acute. + * This is the meaning behind the parameters that allow a + * mapping from lower to upper, but not upper to lower. + */ + +static void add_dos_char(int lower, BOOL map_lower_to_upper, + int upper, BOOL map_upper_to_lower) { lower &= 0xff; upper &= 0xff; - DEBUG(6,("Adding chars 0%o 0%o\n",lower,upper)); + DEBUG(6,("Adding chars 0x%x 0x%x (l->u = %s) (u->l = %s)\n",lower,upper, + map_lower_to_upper ? "True" : "False", + map_upper_to_lower ? "True" : "False")); if (lower) dos_char_map[lower] = 1; if (upper) dos_char_map[upper] = 1; if (lower && upper) { - lower_char_map[upper] = (char)lower; - upper_char_map[lower] = (char)upper; + if(map_upper_to_lower) + lower_char_map[upper] = (char)lower; + if(map_lower_to_upper) + upper_char_map[lower] = (char)upper; } } /**************************************************************************** initialise the charset arrays ****************************************************************************/ -void charset_initialise(void) +void charset_initialise(int client_codepage) { int i; + unsigned char (*cp)[4]; #ifdef LC_ALL /* include in includes.h if available for OS */ @@ -64,7 +215,7 @@ void charset_initialise(void) for (i=0;i<=127;i++) { if (isalnum((char)i) || strchr("._^$~!#%&-{}()@'`",(char)i)) - add_dos_char(i,0); + add_dos_char(i,0,False,False); } for (i=0; i<=255; i++) { @@ -74,74 +225,37 @@ void charset_initialise(void) if (islower(c)) upper_char_map[i] = toupper(c); } -#define CP850 -#ifdef CP850 -/* lower->upper mapping for IBM Code Page 850 */ + if(client_codepage != -1) + DEBUG(1,("charset_initialise: client code page = %d\n", client_codepage)); -/* dec col/row oct hex description */ -/* 133 08/05 205 85 a grave */ -/* 183 11/07 267 B7 A grave */ add_dos_char(0205,0267); -/* 160 10/00 240 A0 a acute */ -/* 181 11/05 265 B5 A acute */ add_dos_char(0240,0265); -/* 131 08/03 203 83 a circumflex */ -/* 182 11/06 266 B6 A circumflex */ add_dos_char(0203,0266); -/* 198 12/06 306 C6 a tilde */ -/* 199 12/07 307 C7 A tilde */ add_dos_char(0306,0307); -/* 132 08/04 204 84 a diaeresis */ -/* 142 08/14 216 8E A diaeresis */ add_dos_char(0204,0216); -/* 134 08/06 206 86 a ring */ -/* 143 08/15 217 8F A ring */ add_dos_char(0206,0217); -/* 145 09/01 221 91 ae diphthong */ -/* 146 09/02 222 92 AE diphthong */ add_dos_char(0221,0222); -/* 128 08/00 200 80 C cedilla */ -/* 135 08/07 207 87 c cedilla */ add_dos_char(0207,0200); -/* 138 08/10 212 8A e grave */ -/* 212 13/04 324 D4 E grave */ add_dos_char(0212,0324); -/* 130 08/02 202 82 e acute */ -/* 144 09/00 220 90 E acute */ add_dos_char(0202,0220); -/* 136 08/08 210 88 e circumflex */ -/* 210 13/02 322 D2 E circumflex */ add_dos_char(0210,0322); -/* 137 08/09 211 89 e diaeresis */ -/* 211 13/03 323 D3 E diaeresis */ add_dos_char(0211,0323); -/* 141 08/13 215 8D i grave */ -/* 222 13/14 336 DE I grave */ add_dos_char(0215,0336); -/* 161 10/01 241 A1 i acute */ -/* 214 13/06 326 D6 I acute */ add_dos_char(0241,0326); -/* 140 08/12 214 8C i circumflex */ -/* 215 13/07 327 D7 I circumflex */ add_dos_char(0214,0327); -/* 139 08/11 213 8B i diaeresis */ -/* 216 13/08 330 D8 I diaeresis */ add_dos_char(0213,0330); -/* 208 13/00 320 D0 Icelandic eth */ -/* 209 13/01 321 D1 Icelandic Eth */ add_dos_char(0320,0321); -/* 164 10/04 244 A4 n tilde */ -/* 165 10/05 245 A5 N tilde */ add_dos_char(0244,0245); -/* 149 09/05 225 95 o grave */ -/* 227 14/03 343 E3 O grave */ add_dos_char(0225,0343); -/* 162 10/02 242 A2 o acute */ -/* 224 14/00 340 E0 O acute */ add_dos_char(0242,0340); -/* 147 09/03 223 93 o circumflex */ -/* 226 14/02 342 E2 O circumflex */ add_dos_char(0223,0342); -/* 228 14/04 344 E4 o tilde */ -/* 229 14/05 345 E5 O tilde */ add_dos_char(0344,0345); -/* 148 09/04 224 94 o diaeresis */ -/* 153 09/09 231 99 O diaeresis */ add_dos_char(0224,0231); -/* 155 09/11 233 9B o slash */ -/* 157 09/13 235 9D O slash */ add_dos_char(0233,0235); -/* 151 09/07 227 97 u grave */ -/* 235 14/11 353 EB U grave */ add_dos_char(0227,0353); -/* 163 10/03 243 A3 u acute */ -/* 233 14/09 351 E9 U acute */ add_dos_char(0243,0351); -/* 150 09/06 226 96 u circumflex */ -/* 234 14/10 352 EA U circumflex */ add_dos_char(0226,0352); -/* 129 08/01 201 81 u diaeresis */ -/* 154 09/10 232 9A U diaeresis */ add_dos_char(0201,0232); -/* 236 14/12 354 EC y acute */ -/* 237 14/13 355 ED Y acute */ add_dos_char(0354,0355); -/* 231 14/07 347 E7 Icelandic thorn */ -/* 232 14/08 350 E8 Icelandic Thorn */ add_dos_char(0347,0350); - - add_dos_char(156,0); /* Pound */ -#endif + /* + * Known client codepages - these can be added to. + */ + switch(client_codepage) + { + case 850: + cp = cp_850; + break; + case 437: + cp = cp_437; + break; + case -1: /* pre-initialize call so that toupper/tolower work + before smb.conf is read. */ + cp = NULL; + break; + default: + /* Default charset - currently 850 */ + DEBUG(1,("charset_initialise: Using default client codepage %d\n", 850)); + cp = cp_850; + break; + + } + + if(cp) + { + for(i = 0; (cp[i][0] != '\0') && (cp[i][1] != '\0'); i++) + add_dos_char(cp[i][0], (BOOL)cp[i][2], cp[i][1], (BOOL)cp[i][3]); + } } /******************************************************************* @@ -158,10 +272,10 @@ void add_char_string(char *s) int i1=0,i2=0; if (isdigit(*t) || (*t)=='-') { sscanf(t,"%i:%i",&i1,&i2); - add_dos_char(i1,i2); + add_dos_char(i1,True,i2,True); } else { sscanf(t,"%c:%c",&c1,&c2); - add_dos_char(c1,c2); + add_dos_char(c1,True,c2, True); } } -- cgit