From f18cca9a0a690013eb609719c4798da9b0e1054b Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Fri, 13 May 2011 10:26:20 +0200 Subject: lib/util/charset Move built-in charset modules to the top level This removes the 'charset' subsystem and allows these modules to be used across the whole of Samba. Andrew Bartlett --- lib/util/charset/CP437.c | 135 ++++++++ lib/util/charset/CP850.c | 121 ++++++++ lib/util/charset/charset.h | 2 +- lib/util/charset/charset_macosxfs.c | 604 ++++++++++++++++++++++++++++++++++++ lib/util/charset/weird.c | 134 ++++++++ lib/util/charset/wscript_build | 33 ++ 6 files changed, 1028 insertions(+), 1 deletion(-) create mode 100644 lib/util/charset/CP437.c create mode 100644 lib/util/charset/CP850.c create mode 100644 lib/util/charset/charset_macosxfs.c create mode 100644 lib/util/charset/weird.c (limited to 'lib/util') diff --git a/lib/util/charset/CP437.c b/lib/util/charset/CP437.c new file mode 100644 index 0000000000..1e478d678f --- /dev/null +++ b/lib/util/charset/CP437.c @@ -0,0 +1,135 @@ +/* + * Conversion table for CP437 charset also known as IBM437 + * + * Copyright (C) Alexander Bokovoy 2003 + * + * Conversion tables are generated using GNU libc 2.2.5's + * localedata/charmaps/IBM437 table and source/script/gen-8bit-gap.sh script + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "includes.h" + +static const uint16_t to_ucs2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, + 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, + 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, + 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, + 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, +}; + +static const struct charset_gap_table from_idx[] = { + { 0x0000, 0x007f, 0 }, + { 0x00a0, 0x00c9, -32 }, + { 0x00d1, 0x00ff, -39 }, + { 0x0192, 0x0192, -185 }, + { 0x0393, 0x0398, -697 }, + { 0x03a3, 0x03a9, -707 }, + { 0x03b1, 0x03b5, -714 }, + { 0x03c0, 0x03c6, -724 }, + { 0x207f, 0x207f, -8076 }, + { 0x20a7, 0x20a7, -8115 }, + { 0x2219, 0x221e, -8484 }, + { 0x2229, 0x2229, -8494 }, + { 0x2248, 0x2248, -8524 }, + { 0x2261, 0x2265, -8548 }, + { 0x2310, 0x2310, -8718 }, + { 0x2320, 0x2321, -8733 }, + { 0x2500, 0x2502, -9211 }, + { 0x250c, 0x251c, -9220 }, + { 0x2524, 0x2524, -9227 }, + { 0x252c, 0x252c, -9234 }, + { 0x2534, 0x2534, -9241 }, + { 0x253c, 0x253c, -9248 }, + { 0x2550, 0x256c, -9267 }, + { 0x2580, 0x2593, -9286 }, + { 0x25a0, 0x25a0, -9298 }, + { 0xffff, 0xffff, 0 } +}; + +static const unsigned char from_ucs2[] = { + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, + 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00, + 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, + 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8, + 0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, + 0x00, 0x90, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, + 0xe1, 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, + 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, + 0x8b, 0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94, + 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, + 0x98, 0x9f, 0xe2, 0x00, 0x00, 0x00, 0x00, 0xe9, + 0xe4, 0x00, 0x00, 0xe8, 0x00, 0x00, 0xea, 0xe0, + 0x00, 0x00, 0xeb, 0xee, 0xe3, 0x00, 0x00, 0xe5, + 0xe7, 0x00, 0xed, 0xfc, 0x9e, 0xf9, 0xfb, 0x00, + 0x00, 0x00, 0xec, 0xef, 0xf7, 0xf0, 0x00, 0x00, + 0xf3, 0xf2, 0xa9, 0xf4, 0xf5, 0xc4, 0x00, 0xb3, + 0xda, 0x00, 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, + 0xc0, 0x00, 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, + 0xc3, 0xb4, 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0xd5, + 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, 0xd4, 0xd3, 0xc8, + 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, 0xcc, 0xb5, 0xb6, + 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, 0xd0, 0xca, 0xd8, + 0xd7, 0xce, 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, + 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, + 0x00, 0x00, 0xde, 0xb0, 0xb1, 0xb2, 0xfe, +}; + +SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP437) diff --git a/lib/util/charset/CP850.c b/lib/util/charset/CP850.c new file mode 100644 index 0000000000..87a76f4cdf --- /dev/null +++ b/lib/util/charset/CP850.c @@ -0,0 +1,121 @@ +/* + * Conversion table for CP850 charset also known as IBM850. + * + * Copyright (C) Alexander Bokovoy 2003 + * + * Conversion tables are generated using GNU libc 2.2.5's + * localedata/charmaps/IBM850 table and source/script/gen-8bit-gap.sh script + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "includes.h" + +static const uint16_t to_ucs2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, + 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, + 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, + 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, + 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, + 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, + 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, + 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0, +}; + +static const struct charset_gap_table from_idx[] = { + /* start, end, idx */ + { 0x0000, 0x007f, 0 }, + { 0x00a0, 0x00ff, -32 }, + { 0x0131, 0x0131, -81 }, + { 0x0192, 0x0192, -177 }, + { 0x2017, 0x2017, -7989 }, + { 0x2500, 0x2502, -9245 }, + { 0x250c, 0x251c, -9254 }, + { 0x2524, 0x2524, -9261 }, + { 0x252c, 0x252c, -9268 }, + { 0x2534, 0x2534, -9275 }, + { 0x253c, 0x253c, -9282 }, + { 0x2550, 0x256c, -9301 }, + { 0x2580, 0x2588, -9320 }, + { 0x2591, 0x2593, -9328 }, + { 0x25a0, 0x25a0, -9340 }, + { 0xffff, 0xffff, 0 } +}; +static const unsigned char from_ucs2[] = { + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, + 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, + 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, + 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, + 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, + 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, + 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, + 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, + 0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87, + 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, + 0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6, + 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98, + 0xd5, 0x9f, 0xf2, 0xc4, 0x00, 0xb3, 0xda, 0x00, + 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, + 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, 0xc3, 0xb4, + 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0x00, 0x00, 0xc9, + 0x00, 0x00, 0xbb, 0x00, 0x00, 0xc8, 0x00, 0x00, + 0xbc, 0x00, 0x00, 0xcc, 0x00, 0x00, 0xb9, 0x00, + 0x00, 0xcb, 0x00, 0x00, 0xca, 0x00, 0x00, 0xce, + 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, + 0xdb, 0xb0, 0xb1, 0xb2, 0xfe, +}; + +SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP850) + diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index e5fd596f8f..a7e554204b 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -279,7 +279,7 @@ static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytes int i; \ int done = 0; \ \ - uint16 ch = SVAL(*inbuf,0); \ + uint16_t ch = SVAL(*inbuf,0); \ \ for (i=0; from_idx[i].start != 0xffff; i++) { \ if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \ diff --git a/lib/util/charset/charset_macosxfs.c b/lib/util/charset/charset_macosxfs.c new file mode 100644 index 0000000000..8c2fdc7776 --- /dev/null +++ b/lib/util/charset/charset_macosxfs.c @@ -0,0 +1,604 @@ +/* + Unix SMB/CIFS implementation. + Samba charset module for Mac OS X/Darwin + Copyright (C) Benjamin Riefenstahl 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * modules/charset_macosxfs.c + * + * A Samba charset module to use on Mac OS X/Darwin as the filesystem + * and display encoding. + * + * Actually two implementations are provided here. The default + * implementation is based on the official CFString API. The other is + * based on internal CFString APIs as defined in the OpenDarwin + * source. + */ + +#include "includes.h" + +/* + * Include OS frameworks. These are only needed in this module. + */ +#include + +/* + * See if autoconf has found us the internal headers in some form. + */ +#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H +# include +# include +# define USE_INTERNAL_API 1 +#elif HAVE_CFSTRINGENCODINGCONVERTER_H +# include +# include +# define USE_INTERNAL_API 1 +#endif + +/* + * Compile time configuration: Do we want debug output? + */ +/* #define DEBUG_STRINGS 1 */ + +/* + * A simple, but efficient memory provider for our buffers. + */ +static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize) +{ + if (newsize > *size) { + *size = newsize + 128; + buffer = SMB_REALLOC(buffer, *size); + } + return buffer; +} + +/* + * While there is a version of OpenDarwin for intel, the usual case is + * big-endian PPC. So we need byte swapping to handle the + * little-endian byte order of the network protocol. We also need an + * additional dynamic buffer to do this work for incoming data blocks, + * because we have to consider the original data as constant. + * + * We abstract the differences away by providing a simple facade with + * these functions/macros: + * + * le_to_native(dst,src,len) + * native_to_le(cp,len) + * set_ucbuffer_with_le(buffer,bufsize,data,size) + * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve) + */ +#ifdef WORDS_BIGENDIAN + +static inline void swap_bytes (char * dst, const char * src, size_t len) +{ + const char *srcend = src + len; + while (src < srcend) { + dst[0] = src[1]; + dst[1] = src[0]; + dst += 2; + src += 2; + } +} +static inline void swap_bytes_inplace (char * cp, size_t len) +{ + char temp; + char *end = cp + len; + while (cp < end) { + temp = cp[1]; + cp[1] = cp[0]; + cp[0] = temp; + cp += 2; + } +} + +#define le_to_native(dst,src,len) swap_bytes(dst,src,len) +#define native_to_le(cp,len) swap_bytes_inplace(cp,len) +#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ + set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0) + +#else /* ! WORDS_BIGENDIAN */ + +#define le_to_native(dst,src,len) memcpy(dst,src,len) +#define native_to_le(cp,len) /* nothing */ +#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ + (((void)(bufsize)),(UniChar*)(data)) + +#endif + +static inline UniChar *set_ucbuffer_with_le_copy ( + UniChar *buffer, size_t *bufsize, + const void *data, size_t size, size_t reserve) +{ + buffer = resize_buffer(buffer, bufsize, size+reserve); + le_to_native((char*)buffer,data,size); + return buffer; +} + + +/* + * A simple hexdump function for debugging error conditions. + */ +#define debug_out(s) DEBUG(0,(s)) + +#ifdef DEBUG_STRINGS + +static void hexdump( const char * label, const char * s, size_t len ) +{ + size_t restlen = len; + debug_out("<<<<<<<\n"); + debug_out(label); + debug_out("\n"); + while (restlen > 0) { + char line[100]; + size_t i, j; + char * d = line; +#undef sprintf + d += sprintf(d, "%04X ", (unsigned)(len-restlen)); + *d++ = ' '; + for( i = 0; i= 0x7F || !isprint(s[i])) + *d++ = '.'; + else + *d++ = s[i]; + } + *d++ = '\n'; + *d = 0; + restlen -= i; + s += i; + debug_out(line); + } + debug_out(">>>>>>>\n"); +} + +#else /* !DEBUG_STRINGS */ + +#define hexdump(label,s,len) /* nothing */ + +#endif + + +#if !USE_INTERNAL_API + +/* + * An implementation based on documented Mac OS X APIs. + * + * This does a certain amount of memory management, creating and + * manipulating CFString objects. We try to minimize the impact by + * keeping those objects around and re-using them. We also use + * external backing store for the CFStrings where this is possible and + * benficial. + * + * The Unicode normalizations forms available at this level are + * generic, not specifically for the file system. So they may not be + * perfect fits. + */ +static size_t macosxfs_encoding_pull( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* Script string */ + char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static CFMutableStringRef cfstring = NULL; + size_t outsize; + CFRange range; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + if (NULL == cfstring) { + /* + * A version with an external backing store as in the + * push function should have been more efficient, but + * testing shows, that it is actually slower (!). + * Maybe kCFAllocatorDefault gets shortcut evaluation + * internally, while kCFAllocatorNull doesn't. + */ + cfstring = CFStringCreateMutable(kCFAllocatorDefault,0); + } + + /* + * Three methods of appending to a CFString, choose the most + * efficient. + */ + if (0 == (*inbuf)[*inbytesleft-1]) { + CFStringAppendCString(cfstring, *inbuf, script_code); + } else if (*inbytesleft <= 255) { + Str255 buffer; + buffer[0] = *inbytesleft; + memcpy(buffer+1, *inbuf, buffer[0]); + CFStringAppendPascalString(cfstring, buffer, script_code); + } else { + /* + * We would like to use a fixed buffer and a loop + * here, but than we can't garantee that the input is + * well-formed UTF-8, as we are supposed to do. + */ + static char *buffer = NULL; + static size_t buflen = 0; + buffer = resize_buffer(buffer, &buflen, *inbytesleft+1); + memcpy(buffer, *inbuf, *inbytesleft); + buffer[*inbytesleft] = 0; + CFStringAppendCString(cfstring, *inbuf, script_code); + } + + /* + * Compose characters, using the non-canonical composition + * form. + */ + CFStringNormalize(cfstring, kCFStringNormalizationFormC); + + outsize = CFStringGetLength(cfstring); + range = CFRangeMake(0,outsize); + + if (outsize == 0) { + /* + * HACK: smbd/mangle_hash2.c:is_legal_name() expects + * errors here. That function will always pass 2 + * characters. smbd/open.c:check_for_pipe() cuts a + * patchname to 10 characters blindly. Suppress the + * debug output in those cases. + */ + if(2 != *inbytesleft && 10 != *inbytesleft) { + debug_out("String conversion: " + "An unknown error occurred\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; /* Not sure, but this is what we have + * actually seen. */ + return -1; + } + if (outsize*2 > *outbytesleft) { + CFStringDelete(cfstring, range); + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf); + CFStringDelete(cfstring, range); + + native_to_le(*outbuf, outsize*2); + + /* + * Add a converted null byte, if the CFString conversions + * prevented that until now. + */ + if (0 == (*inbuf)[*inbytesleft-1] && + (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) { + + if ((outsize*2+2) > *outbytesleft) { + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0; + outsize += 2; + } + + *inbuf += *inbytesleft; + *inbytesleft = 0; + *outbuf += outsize*2; + *outbytesleft -= outsize*2; + + return 0; +} + +static size_t macosxfs_encoding_push( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ + char **outbuf, size_t *outbytesleft) /* Script string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static CFMutableStringRef cfstring = NULL; + static UniChar *buffer = NULL; + static size_t buflen = 0; + CFIndex outsize, cfsize, charsconverted; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + /* + * We need a buffer that can hold 4 times the original data, + * because that is the theoretical maximum that decomposition + * can create currently (in Unicode 4.0). + */ + buffer = set_ucbuffer_with_le_copy( + buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft); + + if (NULL == cfstring) { + cfstring = CFStringCreateMutableWithExternalCharactersNoCopy( + kCFAllocatorDefault, + buffer, *inbytesleft/2, buflen/2, + kCFAllocatorNull); + } else { + CFStringSetExternalCharactersNoCopy( + cfstring, + buffer, *inbytesleft/2, buflen/2); + } + + /* + * Decompose characters, using the non-canonical decomposition + * form. + * + * NB: This isn't exactly what HFS+ wants (see note on + * kCFStringEncodingUseHFSPlusCanonical in + * CFStringEncodingConverter.h), but AFAIK it's the best that + * the official API can do. + */ + CFStringNormalize(cfstring, kCFStringNormalizationFormD); + + cfsize = CFStringGetLength(cfstring); + charsconverted = CFStringGetBytes( + cfstring, CFRangeMake(0,cfsize), + script_code, 0, False, + *outbuf, *outbytesleft, &outsize); + + if (0 == charsconverted) { + debug_out("String conversion: " + "Buffer too small or not convertable\n"); + hexdump("UTF16LE->UTF8 (old) input", + *inbuf, *inbytesleft); + errno = EILSEQ; /* Probably more likely. */ + return -1; + } + + /* + * Add a converted null byte, if the CFString conversions + * prevented that until now. + */ + if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] && + (0 != (*outbuf)[outsize-1])) { + + if (((size_t)outsize+1) > *outbytesleft) { + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF16LE->UTF8 (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + (*outbuf)[outsize] = 0; + ++outsize; + } + + *inbuf += *inbytesleft; + *inbytesleft = 0; + *outbuf += outsize; + *outbytesleft -= outsize; + + return 0; +} + +#else /* USE_INTERNAL_API */ + +/* + * An implementation based on internal code as known from the + * OpenDarwin CVS. + * + * This code doesn't need much memory management because it uses + * functions that operate on the raw memory directly. + * + * The push routine here is faster and more compatible with HFS+ than + * the other implementation above. The pull routine is only faster + * for some strings, slightly slower for others. The pull routine + * looses because it has to iterate over the data twice, once to + * decode UTF-8 and than to do the character composition required by + * Windows. + */ +static size_t macosxfs_encoding_pull( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* Script string */ + char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ +{ + static const int script_code = kCFStringEncodingUTF8; + UInt32 srcCharsUsed = 0; + UInt32 dstCharsUsed = 0; + UInt32 result; + uint32_t dstDecomposedUsed = 0; + uint32_t dstPrecomposedUsed = 0; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + result = CFStringEncodingBytesToUnicode( + script_code, kCFStringEncodingComposeCombinings, + *inbuf, *inbytesleft, &srcCharsUsed, + (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed); + + switch(result) { + case kCFStringEncodingConversionSuccess: + if (*inbytesleft == srcCharsUsed) + break; + else + ; /*fall through*/ + case kCFStringEncodingInsufficientOutputBufferLength: + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + case kCFStringEncodingInvalidInputStream: + /* + * HACK: smbd/mangle_hash2.c:is_legal_name() expects + * errors here. That function will always pass 2 + * characters. smbd/open.c:check_for_pipe() cuts a + * patchname to 10 characters blindly. Suppress the + * debug output in those cases. + */ + if(2 != *inbytesleft && 10 != *inbytesleft) { + debug_out("String conversion: " + "Invalid input sequence\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; + return -1; + case kCFStringEncodingConverterUnavailable: + debug_out("String conversion: " + "Unknown encoding\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + errno = EINVAL; + return -1; + } + + /* + * It doesn't look like CFStringEncodingBytesToUnicode() can + * produce precomposed characters (flags=ComposeCombinings + * doesn't do it), so we need another pass over the data here. + * We can do this in-place, as the string can only get + * shorter. + * + * (Actually in theory there should be an internal + * decomposition and reordering before the actual composition + * step. But we should be able to rely on that we always get + * fully decomposed strings for input, so this can't create + * problems in reality.) + */ + CFUniCharPrecompose( + (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed, + (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed); + + native_to_le(*outbuf, dstPrecomposedUsed*2); + + *inbuf += srcCharsUsed; + *inbytesleft -= srcCharsUsed; + *outbuf += dstPrecomposedUsed*2; + *outbytesleft -= dstPrecomposedUsed*2; + + return 0; +} + +static size_t macosxfs_encoding_push( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ + char **outbuf, size_t *outbytesleft) /* Script string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static UniChar *buffer = NULL; + static size_t buflen = 0; + UInt32 srcCharsUsed=0, dstCharsUsed=0, result; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + buffer = set_ucbuffer_with_le( + buffer, &buflen, *inbuf, *inbytesleft); + + result = CFStringEncodingUnicodeToBytes( + script_code, kCFStringEncodingUseHFSPlusCanonical, + buffer, *inbytesleft/2, &srcCharsUsed, + *outbuf, *outbytesleft, &dstCharsUsed); + + switch(result) { + case kCFStringEncodingConversionSuccess: + if (*inbytesleft/2 == srcCharsUsed) + break; + else + ; /*fall through*/ + case kCFStringEncodingInsufficientOutputBufferLength: + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + case kCFStringEncodingInvalidInputStream: + /* + * HACK: smbd/open.c:check_for_pipe():is_legal_name() + * cuts a pathname to 10 characters blindly. Suppress + * the debug output in those cases. + */ + if(10 != *inbytesleft) { + debug_out("String conversion: " + "Invalid input sequence\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; + return -1; + case kCFStringEncodingConverterUnavailable: + debug_out("String conversion: " + "Unknown encoding\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + errno = EINVAL; + return -1; + } + + *inbuf += srcCharsUsed*2; + *inbytesleft -= srcCharsUsed*2; + *outbuf += dstCharsUsed; + *outbytesleft -= dstCharsUsed; + + return 0; +} + +#endif /* USE_INTERNAL_API */ + +/* + * For initialization, actually install the encoding as "macosxfs". + */ +static struct charset_functions macosxfs_encoding_functions = { + "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push +}; + +NTSTATUS charset_macosxfs_init(void) +{ + if (!smb_register_charset(&macosxfs_encoding_functions)) { + return NT_STATUS_INTERNAL_ERROR; + } + return NT_STATUS_OK; +} + +/* eof */ diff --git a/lib/util/charset/weird.c b/lib/util/charset/weird.c new file mode 100644 index 0000000000..5db8cdcecd --- /dev/null +++ b/lib/util/charset/weird.c @@ -0,0 +1,134 @@ +/* + Unix SMB/CIFS implementation. + Samba module with developer tools + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Jelmer Vernooij 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "includes.h" + +static struct { + char from; + const char *to; + int len; +} weird_table[] = { + {'q', "^q^", 3}, + {'Q', "^Q^", 3}, + {0, NULL} +}; + +static size_t weird_pull(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + int i; + int done = 0; + for (i=0;weird_table[i].from;i++) { + if (strncmp((*inbuf), + weird_table[i].to, + weird_table[i].len) == 0) { + if (*inbytesleft < weird_table[i].len) { + DEBUG(0,("ERROR: truncated weird string\n")); + /* smb_panic("weird_pull"); */ + + } else { + (*outbuf)[0] = weird_table[i].from; + (*outbuf)[1] = 0; + (*inbytesleft) -= weird_table[i].len; + (*outbytesleft) -= 2; + (*inbuf) += weird_table[i].len; + (*outbuf) += 2; + done = 1; + break; + } + } + } + if (done) continue; + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +static size_t weird_push(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + int ir_count=0; + + while (*inbytesleft >= 2 && *outbytesleft >= 1) { + int i; + int done=0; + for (i=0;weird_table[i].from;i++) { + if ((*inbuf)[0] == weird_table[i].from && + (*inbuf)[1] == 0) { + if (*outbytesleft < weird_table[i].len) { + DEBUG(0,("No room for weird character\n")); + /* smb_panic("weird_push"); */ + } else { + memcpy(*outbuf, weird_table[i].to, + weird_table[i].len); + (*inbytesleft) -= 2; + (*outbytesleft) -= weird_table[i].len; + (*inbuf) += 2; + (*outbuf) += weird_table[i].len; + done = 1; + break; + } + } + } + if (done) continue; + + (*outbuf)[0] = (*inbuf)[0]; + if ((*inbuf)[1]) ir_count++; + (*inbytesleft) -= 2; + (*outbytesleft) -= 1; + (*inbuf) += 2; + (*outbuf) += 1; + } + + if (*inbytesleft == 1) { + errno = EINVAL; + return -1; + } + + if (*inbytesleft > 1) { + errno = E2BIG; + return -1; + } + + return ir_count; +} + +struct charset_functions weird_functions = {"WEIRD", weird_pull, weird_push}; + +NTSTATUS charset_weird_init(void); +NTSTATUS charset_weird_init(void) +{ + if (!smb_register_charset(&weird_functions)) { + return NT_STATUS_INTERNAL_ERROR; + } + return NT_STATUS_OK; +} diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build index 7623131146..3b29ace6d3 100644 --- a/lib/util/charset/wscript_build +++ b/lib/util/charset/wscript_build @@ -9,3 +9,36 @@ bld.SAMBA_SUBSYSTEM('CHARSET', source='codepoints.c convert_string.c util_str.c util_unistr_w.c charcnv.c pull_push.c util_unistr.c', deps='DYNCONFIG ICONV_WRAPPER' ) + +bld.SAMBA_MODULE('charset_weird', + subsystem='CHARSET', + source='weird.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_weird'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_weird')) + +bld.SAMBA_MODULE('charset_CP850', + subsystem='CHARSET', + source='CP850.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP850'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP850')) + +bld.SAMBA_MODULE('charset_CP437', + subsystem='CHARSET', + source='CP437.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP437'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP437')) + +bld.SAMBA_MODULE('charset_macosxfs', + subsystem='CHARSET', + source='charset_macosxfs.c', + init_function='', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_macosxfs'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_macosxfs')) + + -- cgit