summaryrefslogtreecommitdiff
path: root/lib/util/charset
diff options
context:
space:
mode:
authorAndrew Bartlett <abartlet@samba.org>2011-06-24 16:26:23 +1000
committerAndrew Bartlett <abartlet@samba.org>2011-06-24 16:26:23 +1000
commit6da26870e0ae5acd6ff49a30ec2f6886b44d095e (patch)
tree850c71039563c16a5d563c47e7ba2ab645baf198 /lib/util/charset
parent6925a799d04c6fa59dd2ddef1f5510f9bb7d17d1 (diff)
parent2610c05b5b95cc7036b3d6dfb894c6cfbdb68483 (diff)
downloadsamba-6da26870e0ae5acd6ff49a30ec2f6886b44d095e.tar.gz
samba-6da26870e0ae5acd6ff49a30ec2f6886b44d095e.tar.bz2
samba-6da26870e0ae5acd6ff49a30ec2f6886b44d095e.zip
Merge 2610c05b5b95cc7036b3d6dfb894c6cfbdb68483 as Samba-4.0alpha16
Diffstat (limited to 'lib/util/charset')
-rw-r--r--lib/util/charset/CP437.c135
-rw-r--r--lib/util/charset/CP850.c121
-rw-r--r--lib/util/charset/charcnv.c135
-rw-r--r--lib/util/charset/charset.h16
-rw-r--r--lib/util/charset/charset_macosxfs.c605
-rw-r--r--lib/util/charset/codepoints.c50
-rw-r--r--lib/util/charset/convert_string.c54
-rw-r--r--lib/util/charset/pull_push.c150
-rw-r--r--lib/util/charset/tests/convert_string.c547
-rw-r--r--lib/util/charset/util_str.c88
-rw-r--r--lib/util/charset/util_unistr.c248
-rw-r--r--lib/util/charset/util_unistr_w.c42
-rw-r--r--lib/util/charset/weird.c134
-rw-r--r--lib/util/charset/wscript_build50
14 files changed, 1821 insertions, 554 deletions
diff --git a/lib/util/charset/CP437.c b/lib/util/charset/CP437.c
new file mode 100644
index 0000000000..1e478d678f
--- /dev/null
+++ b/lib/util/charset/CP437.c
@@ -0,0 +1,135 @@
+/*
+ * Conversion table for CP437 charset also known as IBM437
+ *
+ * Copyright (C) Alexander Bokovoy 2003
+ *
+ * Conversion tables are generated using GNU libc 2.2.5's
+ * localedata/charmaps/IBM437 table and source/script/gen-8bit-gap.sh script
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "includes.h"
+
+static const uint16_t to_ucs2[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+ 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+ 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+ 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+ 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192,
+ 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+ 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
+ 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
+ 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
+ 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
+ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
+ 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
+ 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
+ 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
+ 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
+ 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0,
+};
+
+static const struct charset_gap_table from_idx[] = {
+ { 0x0000, 0x007f, 0 },
+ { 0x00a0, 0x00c9, -32 },
+ { 0x00d1, 0x00ff, -39 },
+ { 0x0192, 0x0192, -185 },
+ { 0x0393, 0x0398, -697 },
+ { 0x03a3, 0x03a9, -707 },
+ { 0x03b1, 0x03b5, -714 },
+ { 0x03c0, 0x03c6, -724 },
+ { 0x207f, 0x207f, -8076 },
+ { 0x20a7, 0x20a7, -8115 },
+ { 0x2219, 0x221e, -8484 },
+ { 0x2229, 0x2229, -8494 },
+ { 0x2248, 0x2248, -8524 },
+ { 0x2261, 0x2265, -8548 },
+ { 0x2310, 0x2310, -8718 },
+ { 0x2320, 0x2321, -8733 },
+ { 0x2500, 0x2502, -9211 },
+ { 0x250c, 0x251c, -9220 },
+ { 0x2524, 0x2524, -9227 },
+ { 0x252c, 0x252c, -9234 },
+ { 0x2534, 0x2534, -9241 },
+ { 0x253c, 0x253c, -9248 },
+ { 0x2550, 0x256c, -9267 },
+ { 0x2580, 0x2593, -9286 },
+ { 0x25a0, 0x25a0, -9298 },
+ { 0xffff, 0xffff, 0 }
+};
+
+static const unsigned char from_ucs2[] = {
+
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00,
+ 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00,
+ 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa,
+ 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8,
+ 0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80,
+ 0x00, 0x90, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00,
+ 0xe1, 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91,
+ 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c,
+ 0x8b, 0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94,
+ 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00,
+ 0x98, 0x9f, 0xe2, 0x00, 0x00, 0x00, 0x00, 0xe9,
+ 0xe4, 0x00, 0x00, 0xe8, 0x00, 0x00, 0xea, 0xe0,
+ 0x00, 0x00, 0xeb, 0xee, 0xe3, 0x00, 0x00, 0xe5,
+ 0xe7, 0x00, 0xed, 0xfc, 0x9e, 0xf9, 0xfb, 0x00,
+ 0x00, 0x00, 0xec, 0xef, 0xf7, 0xf0, 0x00, 0x00,
+ 0xf3, 0xf2, 0xa9, 0xf4, 0xf5, 0xc4, 0x00, 0xb3,
+ 0xda, 0x00, 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00,
+ 0xc0, 0x00, 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00,
+ 0xc3, 0xb4, 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0xd5,
+ 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, 0xd4, 0xd3, 0xc8,
+ 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, 0xcc, 0xb5, 0xb6,
+ 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, 0xd0, 0xca, 0xd8,
+ 0xd7, 0xce, 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00,
+ 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00,
+ 0x00, 0x00, 0xde, 0xb0, 0xb1, 0xb2, 0xfe,
+};
+
+SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP437)
diff --git a/lib/util/charset/CP850.c b/lib/util/charset/CP850.c
new file mode 100644
index 0000000000..87a76f4cdf
--- /dev/null
+++ b/lib/util/charset/CP850.c
@@ -0,0 +1,121 @@
+/*
+ * Conversion table for CP850 charset also known as IBM850.
+ *
+ * Copyright (C) Alexander Bokovoy 2003
+ *
+ * Conversion tables are generated using GNU libc 2.2.5's
+ * localedata/charmaps/IBM850 table and source/script/gen-8bit-gap.sh script
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "includes.h"
+
+static const uint16_t to_ucs2[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+ 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+ 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+ 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+ 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
+ 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+ 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
+ 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
+ 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
+ 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
+ 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
+ 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
+ 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
+ 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
+ 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
+ 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0,
+};
+
+static const struct charset_gap_table from_idx[] = {
+ /* start, end, idx */
+ { 0x0000, 0x007f, 0 },
+ { 0x00a0, 0x00ff, -32 },
+ { 0x0131, 0x0131, -81 },
+ { 0x0192, 0x0192, -177 },
+ { 0x2017, 0x2017, -7989 },
+ { 0x2500, 0x2502, -9245 },
+ { 0x250c, 0x251c, -9254 },
+ { 0x2524, 0x2524, -9261 },
+ { 0x252c, 0x252c, -9268 },
+ { 0x2534, 0x2534, -9275 },
+ { 0x253c, 0x253c, -9282 },
+ { 0x2550, 0x256c, -9301 },
+ { 0x2580, 0x2588, -9320 },
+ { 0x2591, 0x2593, -9328 },
+ { 0x25a0, 0x25a0, -9340 },
+ { 0xffff, 0xffff, 0 }
+};
+static const unsigned char from_ucs2[] = {
+
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5,
+ 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee,
+ 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa,
+ 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8,
+ 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80,
+ 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8,
+ 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e,
+ 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1,
+ 0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87,
+ 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b,
+ 0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6,
+ 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98,
+ 0xd5, 0x9f, 0xf2, 0xc4, 0x00, 0xb3, 0xda, 0x00,
+ 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00,
+ 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, 0xc3, 0xb4,
+ 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0x00, 0x00, 0xc9,
+ 0x00, 0x00, 0xbb, 0x00, 0x00, 0xc8, 0x00, 0x00,
+ 0xbc, 0x00, 0x00, 0xcc, 0x00, 0x00, 0xb9, 0x00,
+ 0x00, 0xcb, 0x00, 0x00, 0xca, 0x00, 0x00, 0xce,
+ 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00,
+ 0xdb, 0xb0, 0xb1, 0xb2, 0xfe,
+};
+
+SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP850)
+
diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c
index 998bb08fd7..076795a0b2 100644
--- a/lib/util/charset/charcnv.c
+++ b/lib/util/charset/charcnv.c
@@ -113,138 +113,3 @@ convert:
return destlen;
}
-
-/**
- * Convert string from one encoding to another, making error checking etc
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string
- * @returns the number of bytes occupied in the destination
- * on error, returns -1, and sets errno
- **/
-_PUBLIC_ bool convert_string_error_handle(struct smb_iconv_handle *ic,
- charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen,
- size_t *converted_size)
-{
- size_t i_len, o_len;
- ssize_t retval;
- const char* inbuf = (const char*)src;
- char* outbuf = (char*)dest;
- smb_iconv_t descriptor;
-
- if (srclen == (size_t)-1)
- srclen = strlen(inbuf)+1;
-
- descriptor = get_conv_handle(ic, from, to);
- if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
- if (converted_size) {
- *converted_size = 0;
- }
- errno = EINVAL;
- return -1;
- }
-
- i_len=srclen;
- o_len=destlen;
-
- retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
-
- if (converted_size != NULL)
- *converted_size = destlen-o_len;
- return (retval != (ssize_t)-1);
-}
-
-
-/**
- * Convert string from one encoding to another, making error checking etc
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string
- * @returns the number of bytes occupied in the destination
- **/
-_PUBLIC_ bool convert_string_handle(struct smb_iconv_handle *ic,
- charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen, size_t *converted_size)
-{
- bool retval;
-
- retval = convert_string_error_handle(ic, from, to, src, srclen, dest, destlen, converted_size);
- if(retval==false) {
- const char *reason;
- switch(errno) {
- case EINVAL:
- reason="Incomplete multibyte sequence";
- return false;
- case E2BIG:
- reason="No more room";
- if (from == CH_UNIX) {
- DEBUG(0,("E2BIG: convert_string_handle(%s,%s): srclen=%d destlen=%d - '%s'\n",
- charset_name(ic, from), charset_name(ic, to),
- (int)srclen, (int)destlen,
- (const char *)src));
- } else {
- DEBUG(0,("E2BIG: convert_string_handle(%s,%s): srclen=%d destlen=%d\n",
- charset_name(ic, from), charset_name(ic, to),
- (int)srclen, (int)destlen));
- }
- return false;
- case EILSEQ:
- reason="Illegal multibyte sequence";
- return false;
- default:
- return false;
- }
- }
- return true;
-}
-
-/**
- * Convert between character sets, allocating a new buffer using talloc for the result.
- *
- * @param srclen length of source buffer.
- * @param dest always set at least to NULL
- * @note -1 is not accepted for srclen.
- *
- * @returns Size in bytes of the converted string; or -1 in case of error.
- **/
-
-_PUBLIC_ bool convert_string_talloc_handle(TALLOC_CTX *ctx,
- struct smb_iconv_handle *ic,
- charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dst, size_t *converted_size)
-{
- void **dest = (void **)dst;
- smb_iconv_t descriptor;
- ssize_t ret;
-
- *dest = NULL;
-
- if (src == NULL || srclen == (size_t)-1 || srclen == 0)
- return false;
-
- descriptor = get_conv_handle(ic, from, to);
-
- if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
- /* conversion not supported, return -1*/
- DEBUG(3, ("convert_string_talloc_handle: conversion from %s to %s not supported!\n",
- charset_name(ic, from),
- charset_name(ic, to)));
- return false;
- }
-
- ret = iconv_talloc(ctx, descriptor, src, srclen, dest);
- if (ret == -1)
- return false;
- if (converted_size != NULL)
- *converted_size = ret;
- return true;
-}
-
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 1078035592..b36c461003 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -28,7 +28,7 @@
#include <talloc.h>
/* this defines the charset types used in samba */
-typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
+typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
#define NUM_CHARSETS 7
@@ -105,11 +105,6 @@ typedef struct smb_iconv_s {
struct loadparm_context;
struct smb_iconv_handle;
-/* replace some string functions with multi-byte
- versions */
-#define strlower(s) strlower_m(s)
-#define strupper(s) strupper_m(s)
-
char *strchr_m(const char *s, char c);
/**
* Calculate the number of units (8 or 16-bit, depending on the
@@ -137,8 +132,6 @@ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
const char *s1, const char *s2);
int strcasecmp_m(const char *s1, const char *s2);
size_t count_chars_m(const char *s, char c);
-void strupper_m(char *s);
-void strlower_m(char *s);
char *strupper_talloc(TALLOC_CTX *ctx, const char *src);
char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src);
char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
@@ -155,6 +148,7 @@ bool strhasupper_handle(struct smb_iconv_handle *ic,
const char *string);
char *strrchr_m(const char *s, char c);
char *strchr_m(const char *s, char c);
+char *strstr_m(const char *src, const char *findstr);
bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size);
@@ -188,8 +182,7 @@ extern struct smb_iconv_handle *global_iconv_handle;
struct smb_iconv_handle *get_iconv_handle(void);
struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx,
const char *dos_charset,
- const char *unix_charset,
- const char *display_charset);
+ const char *unix_charset);
smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic,
charset_t from, charset_t to);
const char *charset_name(struct smb_iconv_handle *ic, charset_t ch);
@@ -218,7 +211,6 @@ int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx,
const char *dos_charset,
const char *unix_charset,
- const char *display_charset,
bool native_iconv,
struct smb_iconv_handle *old_ic);
@@ -285,7 +277,7 @@ static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytes
int i; \
int done = 0; \
\
- uint16 ch = SVAL(*inbuf,0); \
+ uint16_t ch = SVAL(*inbuf,0); \
\
for (i=0; from_idx[i].start != 0xffff; i++) { \
if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \
diff --git a/lib/util/charset/charset_macosxfs.c b/lib/util/charset/charset_macosxfs.c
new file mode 100644
index 0000000000..4d2ba5b6ff
--- /dev/null
+++ b/lib/util/charset/charset_macosxfs.c
@@ -0,0 +1,605 @@
+/*
+ Unix SMB/CIFS implementation.
+ Samba charset module for Mac OS X/Darwin
+ Copyright (C) Benjamin Riefenstahl 2003
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * modules/charset_macosxfs.c
+ *
+ * A Samba charset module to use on Mac OS X/Darwin as the filesystem
+ * and display encoding.
+ *
+ * Actually two implementations are provided here. The default
+ * implementation is based on the official CFString API. The other is
+ * based on internal CFString APIs as defined in the OpenDarwin
+ * source.
+ */
+
+#include "includes.h"
+#undef realloc
+
+/*
+ * Include OS frameworks. These are only needed in this module.
+ */
+#include <CoreFoundation/CFString.h>
+
+/*
+ * See if autoconf has found us the internal headers in some form.
+ */
+#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
+# include <CoreFoundation/CFStringEncodingConverter.h>
+# include <CoreFoundation/CFUnicodePrecomposition.h>
+# define USE_INTERNAL_API 1
+#elif HAVE_CFSTRINGENCODINGCONVERTER_H
+# include <CFStringEncodingConverter.h>
+# include <CFUnicodePrecomposition.h>
+# define USE_INTERNAL_API 1
+#endif
+
+/*
+ * Compile time configuration: Do we want debug output?
+ */
+/* #define DEBUG_STRINGS 1 */
+
+/*
+ * A simple, but efficient memory provider for our buffers.
+ */
+static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
+{
+ if (newsize > *size) {
+ *size = newsize + 128;
+ buffer = realloc(buffer, *size);
+ }
+ return buffer;
+}
+
+/*
+ * While there is a version of OpenDarwin for intel, the usual case is
+ * big-endian PPC. So we need byte swapping to handle the
+ * little-endian byte order of the network protocol. We also need an
+ * additional dynamic buffer to do this work for incoming data blocks,
+ * because we have to consider the original data as constant.
+ *
+ * We abstract the differences away by providing a simple facade with
+ * these functions/macros:
+ *
+ * le_to_native(dst,src,len)
+ * native_to_le(cp,len)
+ * set_ucbuffer_with_le(buffer,bufsize,data,size)
+ * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
+ */
+#ifdef WORDS_BIGENDIAN
+
+static inline void swap_bytes (char * dst, const char * src, size_t len)
+{
+ const char *srcend = src + len;
+ while (src < srcend) {
+ dst[0] = src[1];
+ dst[1] = src[0];
+ dst += 2;
+ src += 2;
+ }
+}
+static inline void swap_bytes_inplace (char * cp, size_t len)
+{
+ char temp;
+ char *end = cp + len;
+ while (cp < end) {
+ temp = cp[1];
+ cp[1] = cp[0];
+ cp[0] = temp;
+ cp += 2;
+ }
+}
+
+#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
+#define native_to_le(cp,len) swap_bytes_inplace(cp,len)
+#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
+ set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
+
+#else /* ! WORDS_BIGENDIAN */
+
+#define le_to_native(dst,src,len) memcpy(dst,src,len)
+#define native_to_le(cp,len) /* nothing */
+#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
+ (((void)(bufsize)),(UniChar*)(data))
+
+#endif
+
+static inline UniChar *set_ucbuffer_with_le_copy (
+ UniChar *buffer, size_t *bufsize,
+ const void *data, size_t size, size_t reserve)
+{
+ buffer = resize_buffer(buffer, bufsize, size+reserve);
+ le_to_native((char*)buffer,data,size);
+ return buffer;
+}
+
+
+/*
+ * A simple hexdump function for debugging error conditions.
+ */
+#define debug_out(s) DEBUG(0,(s))
+
+#ifdef DEBUG_STRINGS
+
+static void hexdump( const char * label, const char * s, size_t len )
+{
+ size_t restlen = len;
+ debug_out("<<<<<<<\n");
+ debug_out(label);
+ debug_out("\n");
+ while (restlen > 0) {
+ char line[100];
+ size_t i, j;
+ char * d = line;
+#undef sprintf
+ d += sprintf(d, "%04X ", (unsigned)(len-restlen));
+ *d++ = ' ';
+ for( i = 0; i<restlen && i<8; ++i ) {
+ d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
+ }
+ for( j = i; j<8; ++j ) {
+ d += sprintf(d, " ");
+ }
+ *d++ = ' ';
+ for( i = 8; i<restlen && i<16; ++i ) {
+ d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
+ }
+ for( j = i; j<16; ++j ) {
+ d += sprintf(d, " ");
+ }
+ *d++ = ' ';
+ for( i = 0; i<restlen && i<16; ++i ) {
+ if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
+ *d++ = '.';
+ else
+ *d++ = s[i];
+ }
+ *d++ = '\n';
+ *d = 0;
+ restlen -= i;
+ s += i;
+ debug_out(line);
+ }
+ debug_out(">>>>>>>\n");
+}
+
+#else /* !DEBUG_STRINGS */
+
+#define hexdump(label,s,len) /* nothing */
+
+#endif
+
+
+#if !USE_INTERNAL_API
+
+/*
+ * An implementation based on documented Mac OS X APIs.
+ *
+ * This does a certain amount of memory management, creating and
+ * manipulating CFString objects. We try to minimize the impact by
+ * keeping those objects around and re-using them. We also use
+ * external backing store for the CFStrings where this is possible and
+ * benficial.
+ *
+ * The Unicode normalizations forms available at this level are
+ * generic, not specifically for the file system. So they may not be
+ * perfect fits.
+ */
+static size_t macosxfs_encoding_pull(
+ void *cd, /* Encoder handle */
+ char **inbuf, size_t *inbytesleft, /* Script string */
+ char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
+{
+ static const int script_code = kCFStringEncodingUTF8;
+ static CFMutableStringRef cfstring = NULL;
+ size_t outsize;
+ CFRange range;
+
+ (void) cd; /* UNUSED */
+
+ if (0 == *inbytesleft) {
+ return 0;
+ }
+
+ if (NULL == cfstring) {
+ /*
+ * A version with an external backing store as in the
+ * push function should have been more efficient, but
+ * testing shows, that it is actually slower (!).
+ * Maybe kCFAllocatorDefault gets shortcut evaluation
+ * internally, while kCFAllocatorNull doesn't.
+ */
+ cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
+ }
+
+ /*
+ * Three methods of appending to a CFString, choose the most
+ * efficient.
+ */
+ if (0 == (*inbuf)[*inbytesleft-1]) {
+ CFStringAppendCString(cfstring, *inbuf, script_code);
+ } else if (*inbytesleft <= 255) {
+ Str255 buffer;
+ buffer[0] = *inbytesleft;
+ memcpy(buffer+1, *inbuf, buffer[0]);
+ CFStringAppendPascalString(cfstring, buffer, script_code);
+ } else {
+ /*
+ * We would like to use a fixed buffer and a loop
+ * here, but than we can't garantee that the input is
+ * well-formed UTF-8, as we are supposed to do.
+ */
+ static char *buffer = NULL;
+ static size_t buflen = 0;
+ buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
+ memcpy(buffer, *inbuf, *inbytesleft);
+ buffer[*inbytesleft] = 0;
+ CFStringAppendCString(cfstring, *inbuf, script_code);
+ }
+
+ /*
+ * Compose characters, using the non-canonical composition
+ * form.
+ */
+ CFStringNormalize(cfstring, kCFStringNormalizationFormC);
+
+ outsize = CFStringGetLength(cfstring);
+ range = CFRangeMake(0,outsize);
+
+ if (outsize == 0) {
+ /*
+ * HACK: smbd/mangle_hash2.c:is_legal_name() expects
+ * errors here. That function will always pass 2
+ * characters. smbd/open.c:check_for_pipe() cuts a
+ * patchname to 10 characters blindly. Suppress the
+ * debug output in those cases.
+ */
+ if(2 != *inbytesleft && 10 != *inbytesleft) {
+ debug_out("String conversion: "
+ "An unknown error occurred\n");
+ hexdump("UTF8->UTF16LE (old) input",
+ *inbuf, *inbytesleft);
+ }
+ errno = EILSEQ; /* Not sure, but this is what we have
+ * actually seen. */
+ return -1;
+ }
+ if (outsize*2 > *outbytesleft) {
+ CFStringDelete(cfstring, range);
+ debug_out("String conversion: "
+ "Output buffer too small\n");
+ hexdump("UTF8->UTF16LE (old) input",
+ *inbuf, *inbytesleft);
+ errno = E2BIG;
+ return -1;
+ }
+
+ CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
+ CFStringDelete(cfstring, range);
+
+ native_to_le(*outbuf, outsize*2);
+
+ /*
+ * Add a converted null byte, if the CFString conversions
+ * prevented that until now.
+ */
+ if (0 == (*inbuf)[*inbytesleft-1] &&
+ (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
+
+ if ((outsize*2+2) > *outbytesleft) {
+ debug_out("String conversion: "
+ "Output buffer too small\n");
+ hexdump("UTF8->UTF16LE (old) input",
+ *inbuf, *inbytesleft);
+ errno = E2BIG;
+ return -1;
+ }
+
+ (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
+ outsize += 2;
+ }
+
+ *inbuf += *inbytesleft;
+ *inbytesleft = 0;
+ *outbuf += outsize*2;
+ *outbytesleft -= outsize*2;
+
+ return 0;
+}
+
+static size_t macosxfs_encoding_push(
+ void *cd, /* Encoder handle */
+ char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
+ char **outbuf, size_t *outbytesleft) /* Script string */
+{
+ static const int script_code = kCFStringEncodingUTF8;
+ static CFMutableStringRef cfstring = NULL;
+ static UniChar *buffer = NULL;
+ static size_t buflen = 0;
+ CFIndex outsize, cfsize, charsconverted;
+
+ (void) cd; /* UNUSED */
+
+ if (0 == *inbytesleft) {
+ return 0;
+ }
+
+ /*
+ * We need a buffer that can hold 4 times the original data,
+ * because that is the theoretical maximum that decomposition
+ * can create currently (in Unicode 4.0).
+ */
+ buffer = set_ucbuffer_with_le_copy(
+ buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
+
+ if (NULL == cfstring) {
+ cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
+ kCFAllocatorDefault,
+ buffer, *inbytesleft/2, buflen/2,
+ kCFAllocatorNull);
+ } else {
+ CFStringSetExternalCharactersNoCopy(
+ cfstring,
+ buffer, *inbytesleft/2, buflen/2);
+ }
+
+ /*
+ * Decompose characters, using the non-canonical decomposition
+ * form.
+ *
+ * NB: This isn't exactly what HFS+ wants (see note on
+ * kCFStringEncodingUseHFSPlusCanonical in
+ * CFStringEncodingConverter.h), but AFAIK it's the best that
+ * the official API can do.
+ */
+ CFStringNormalize(cfstring, kCFStringNormalizationFormD);
+
+ cfsize = CFStringGetLength(cfstring);
+ charsconverted = CFStringGetBytes(
+ cfstring, CFRangeMake(0,cfsize),
+ script_code, 0, false,
+ *outbuf, *outbytesleft, &outsize);
+
+ if (0 == charsconverted) {
+ debug_out("String conversion: "
+ "Buffer too small or not convertable\n");
+ hexdump("UTF16LE->UTF8 (old) input",
+ *inbuf, *inbytesleft);
+ errno = EILSEQ; /* Probably more likely. */
+ return -1;
+ }
+
+ /*
+ * Add a converted null byte, if the CFString conversions
+ * prevented that until now.
+ */
+ if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
+ (0 != (*outbuf)[outsize-1])) {
+
+ if (((size_t)outsize+1) > *outbytesleft) {
+ debug_out("String conversion: "
+ "Output buffer too small\n");
+ hexdump("UTF16LE->UTF8 (old) input",
+ *inbuf, *inbytesleft);
+ errno = E2BIG;
+ return -1;
+ }
+
+ (*outbuf)[outsize] = 0;
+ ++outsize;
+ }
+
+ *inbuf += *inbytesleft;
+ *inbytesleft = 0;
+ *outbuf += outsize;
+ *outbytesleft -= outsize;
+
+ return 0;
+}
+
+#else /* USE_INTERNAL_API */
+
+/*
+ * An implementation based on internal code as known from the
+ * OpenDarwin CVS.
+ *
+ * This code doesn't need much memory management because it uses
+ * functions that operate on the raw memory directly.
+ *
+ * The push routine here is faster and more compatible with HFS+ than
+ * the other implementation above. The pull routine is only faster
+ * for some strings, slightly slower for others. The pull routine
+ * looses because it has to iterate over the data twice, once to
+ * decode UTF-8 and than to do the character composition required by
+ * Windows.
+ */
+static size_t macosxfs_encoding_pull(
+ void *cd, /* Encoder handle */
+ char **inbuf, size_t *inbytesleft, /* Script string */
+ char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
+{
+ static const int script_code = kCFStringEncodingUTF8;
+ UInt32 srcCharsUsed = 0;
+ UInt32 dstCharsUsed = 0;
+ UInt32 result;
+ uint32_t dstDecomposedUsed = 0;
+ uint32_t dstPrecomposedUsed = 0;
+
+ (void) cd; /* UNUSED */
+
+ if (0 == *inbytesleft) {
+ return 0;
+ }
+
+ result = CFStringEncodingBytesToUnicode(
+ script_code, kCFStringEncodingComposeCombinings,
+ *inbuf, *inbytesleft, &srcCharsUsed,
+ (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
+
+ switch(result) {
+ case kCFStringEncodingConversionSuccess:
+ if (*inbytesleft == srcCharsUsed)
+ break;
+ else
+ ; /*fall through*/
+ case kCFStringEncodingInsufficientOutputBufferLength:
+ debug_out("String conversion: "
+ "Output buffer too small\n");
+ hexdump("UTF8->UTF16LE (new) input",
+ *inbuf, *inbytesleft);
+ errno = E2BIG;
+ return -1;
+ case kCFStringEncodingInvalidInputStream:
+ /*
+ * HACK: smbd/mangle_hash2.c:is_legal_name() expects
+ * errors here. That function will always pass 2
+ * characters. smbd/open.c:check_for_pipe() cuts a
+ * patchname to 10 characters blindly. Suppress the
+ * debug output in those cases.
+ */
+ if(2 != *inbytesleft && 10 != *inbytesleft) {
+ debug_out("String conversion: "
+ "Invalid input sequence\n");
+ hexdump("UTF8->UTF16LE (new) input",
+ *inbuf, *inbytesleft);
+ }
+ errno = EILSEQ;
+ return -1;
+ case kCFStringEncodingConverterUnavailable:
+ debug_out("String conversion: "
+ "Unknown encoding\n");
+ hexdump("UTF8->UTF16LE (new) input",
+ *inbuf, *inbytesleft);
+ errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * It doesn't look like CFStringEncodingBytesToUnicode() can
+ * produce precomposed characters (flags=ComposeCombinings
+ * doesn't do it), so we need another pass over the data here.
+ * We can do this in-place, as the string can only get
+ * shorter.
+ *
+ * (Actually in theory there should be an internal
+ * decomposition and reordering before the actual composition
+ * step. But we should be able to rely on that we always get
+ * fully decomposed strings for input, so this can't create
+ * problems in reality.)
+ */
+ CFUniCharPrecompose(
+ (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
+ (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
+
+ native_to_le(*outbuf, dstPrecomposedUsed*2);
+
+ *inbuf += srcCharsUsed;
+ *inbytesleft -= srcCharsUsed;
+ *outbuf += dstPrecomposedUsed*2;
+ *outbytesleft -= dstPrecomposedUsed*2;
+
+ return 0;
+}
+
+static size_t macosxfs_encoding_push(
+ void *cd, /* Encoder handle */
+ char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
+ char **outbuf, size_t *outbytesleft) /* Script string */
+{
+ static const int script_code = kCFStringEncodingUTF8;
+ static UniChar *buffer = NULL;
+ static size_t buflen = 0;
+ UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
+
+ (void) cd; /* UNUSED */
+
+ if (0 == *inbytesleft) {
+ return 0;
+ }
+
+ buffer = set_ucbuffer_with_le(
+ buffer, &buflen, *inbuf, *inbytesleft);
+
+ result = CFStringEncodingUnicodeToBytes(
+ script_code, kCFStringEncodingUseHFSPlusCanonical,
+ buffer, *inbytesleft/2, &srcCharsUsed,
+ *outbuf, *outbytesleft, &dstCharsUsed);
+
+ switch(result) {
+ case kCFStringEncodingConversionSuccess:
+ if (*inbytesleft/2 == srcCharsUsed)
+ break;
+ else
+ ; /*fall through*/
+ case kCFStringEncodingInsufficientOutputBufferLength:
+ debug_out("String conversion: "
+ "Output buffer too small\n");
+ hexdump("UTF16LE->UTF8 (new) input",
+ *inbuf, *inbytesleft);
+ errno = E2BIG;
+ return -1;
+ case kCFStringEncodingInvalidInputStream:
+ /*
+ * HACK: smbd/open.c:check_for_pipe():is_legal_name()
+ * cuts a pathname to 10 characters blindly. Suppress
+ * the debug output in those cases.
+ */
+ if(10 != *inbytesleft) {
+ debug_out("String conversion: "
+ "Invalid input sequence\n");
+ hexdump("UTF16LE->UTF8 (new) input",
+ *inbuf, *inbytesleft);
+ }
+ errno = EILSEQ;
+ return -1;
+ case kCFStringEncodingConverterUnavailable:
+ debug_out("String conversion: "
+ "Unknown encoding\n");
+ hexdump("UTF16LE->UTF8 (new) input",
+ *inbuf, *inbytesleft);
+ errno = EINVAL;
+ return -1;
+ }
+
+ *inbuf += srcCharsUsed*2;
+ *inbytesleft -= srcCharsUsed*2;
+ *outbuf += dstCharsUsed;
+ *outbytesleft -= dstCharsUsed;
+
+ return 0;
+}
+
+#endif /* USE_INTERNAL_API */
+
+/*
+ * For initialization, actually install the encoding as "macosxfs".
+ */
+static struct charset_functions macosxfs_encoding_functions = {
+ "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
+};
+
+NTSTATUS charset_macosxfs_init(void)
+{
+ if (!smb_register_charset(&macosxfs_encoding_functions)) {
+ return NT_STATUS_INTERNAL_ERROR;
+ }
+ return NT_STATUS_OK;
+}
+
+/* eof */
diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c
index cd54420e8e..8cc33a9782 100644
--- a/lib/util/charset/codepoints.c
+++ b/lib/util/charset/codepoints.c
@@ -23,7 +23,7 @@
#include "includes.h"
#include "lib/util/charset/charset.h"
#include "system/locale.h"
-#include "dynconfig.h"
+#include "dynconfig/dynconfig.h"
#ifdef strcasecmp
#undef strcasecmp
@@ -168,17 +168,16 @@ struct smb_iconv_handle *get_iconv_handle(void)
{
if (global_iconv_handle == NULL)
global_iconv_handle = smb_iconv_handle_reinit(talloc_autofree_context(),
- "ASCII", "UTF-8", "ASCII", true, NULL);
+ "ASCII", "UTF-8", true, NULL);
return global_iconv_handle;
}
struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx,
const char *dos_charset,
- const char *unix_charset,
- const char *display_charset)
+ const char *unix_charset)
{
return smb_iconv_handle_reinit(mem_ctx,
- dos_charset, unix_charset, display_charset, true, NULL);
+ dos_charset, unix_charset, true, NULL);
}
/**
@@ -190,7 +189,6 @@ const char *charset_name(struct smb_iconv_handle *ic, charset_t ch)
case CH_UTF16: return "UTF-16LE";
case CH_UNIX: return ic->unix_charset;
case CH_DOS: return ic->dos_charset;
- case CH_DISPLAY: return ic->display_charset;
case CH_UTF8: return "UTF8";
case CH_UTF16BE: return "UTF-16BE";
case CH_UTF16MUNGED: return "UTF16_MUNGED";
@@ -219,37 +217,6 @@ static int close_iconv_handle(struct smb_iconv_handle *data)
return 0;
}
-static const char *map_locale(const char *charset)
-{
- if (strcmp(charset, "LOCALE") != 0) {
- return charset;
- }
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
- {
- const char *ln;
- smb_iconv_t handle;
-
- ln = nl_langinfo(CODESET);
- if (ln == NULL) {
- DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n"));
- return "ASCII";
- }
- /* Check whether the charset name is supported
- by iconv */
- handle = smb_iconv_open(ln, "UCS-2LE");
- if (handle == (smb_iconv_t) -1) {
- DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
- return "ASCII";
- } else {
- DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
- smb_iconv_close(handle);
- }
- return ln;
- }
-#endif
- return "ASCII";
-}
-
/*
the old_ic is passed in here as the smb_iconv_handle structure
is used as a global pointer in some places (eg. python modules). We
@@ -261,14 +228,11 @@ static const char *map_locale(const char *charset)
_PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx,
const char *dos_charset,
const char *unix_charset,
- const char *display_charset,
bool native_iconv,
struct smb_iconv_handle *old_ic)
{
struct smb_iconv_handle *ret;
- display_charset = map_locale(display_charset);
-
if (old_ic != NULL) {
ret = old_ic;
close_iconv_handle(ret);
@@ -290,9 +254,13 @@ _PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx,
talloc_set_destructor(ret, close_iconv_handle);
+ if (strcasecmp(dos_charset, "UTF8") == 0 || strcasecmp(dos_charset, "UTF-8") == 0) {
+ DEBUG(0,("ERROR: invalid DOS charset: 'dos charset' must not be UTF8, using (default value) CP850 instead\n"));
+ dos_charset = "CP850";
+ }
+
ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset);
ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset);
- ret->display_charset = talloc_strdup(ret->child_ctx, display_charset);
ret->native_iconv = native_iconv;
return ret;
diff --git a/lib/util/charset/convert_string.c b/lib/util/charset/convert_string.c
index e51add2aaf..51f9fec137 100644
--- a/lib/util/charset/convert_string.c
+++ b/lib/util/charset/convert_string.c
@@ -2,7 +2,8 @@
Unix SMB/CIFS implementation.
Character set conversion Extensions
Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
- Copyright (C) Andrew Tridgell 2001
+ Copyright (C) Andrew Tridgell 2001-2011
+ Copyright (C) Andrew Bartlett 2011
Copyright (C) Simo Sorce 2001
Copyright (C) Martin Pool 2003
@@ -21,6 +22,7 @@
*/
#include "includes.h"
+#include "system/iconv.h"
/**
* @file
@@ -177,28 +179,29 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic,
size_t slen = srclen;
size_t dlen = destlen;
unsigned char lastp = '\0';
+ bool ret;
- /* If all characters are ascii, fast path here. */
- while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
- if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
+ if (slen == (size_t)-1) {
+ while (dlen &&
+ ((lastp = *p) <= 0x7f) && (p[1] == 0)) {
*q++ = *p;
- if (slen != (size_t)-1) {
- slen -= 2;
- }
p += 2;
dlen--;
retval++;
if (!lastp)
break;
- } else {
-#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
- goto general_case;
-#else
- bool ret = convert_string_internal(ic, from, to, p, slen, q, dlen, converted_size);
- *converted_size += retval;
- return ret;
-#endif
}
+ if (lastp != 0) goto slow_path;
+ } else {
+ while (slen >= 2 && dlen &&
+ (*p <= 0x7f) && (p[1] == 0)) {
+ *q++ = *p;
+ slen -= 2;
+ p += 2;
+ dlen--;
+ retval++;
+ }
+ if (slen != 0) goto slow_path;
}
*converted_size = retval;
@@ -212,6 +215,19 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic,
}
}
return true;
+
+ slow_path:
+ /* come here when we hit a character we can't deal
+ * with in the fast path
+ */
+#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
+ goto general_case;
+#else
+ ret = convert_string_internal(ic, from, to, p, slen, q, dlen, converted_size);
+ *converted_size += retval;
+ return ret;
+#endif
+
} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
const unsigned char *p = (const unsigned char *)src;
unsigned char *q = (unsigned char *)dest;
@@ -221,8 +237,8 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic,
unsigned char lastp = '\0';
/* If all characters are ascii, fast path here. */
- while (slen && (dlen >= 2)) {
- if ((lastp = *p) <= 0x7F) {
+ while (slen && (dlen >= 1)) {
+ if (dlen >=2 && (lastp = *p) <= 0x7F) {
*q++ = *p++;
*q++ = '\0';
if (slen != (size_t)-1) {
@@ -387,7 +403,7 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic,
}
/* +2 is for ucs2 null termination. */
- ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
+ ob = talloc_realloc(ctx, ob, char, destlen + 2);
if (!ob) {
DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
@@ -428,7 +444,7 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic,
*/
if (o_len > 1024) {
/* We're shrinking here so we know the +2 is safe from wrap. */
- ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
+ ob = talloc_realloc(ctx,ob, char, destlen + 2);
}
if (destlen && !ob) {
diff --git a/lib/util/charset/pull_push.c b/lib/util/charset/pull_push.c
new file mode 100644
index 0000000000..b7a5bcdc65
--- /dev/null
+++ b/lib/util/charset/pull_push.c
@@ -0,0 +1,150 @@
+/*
+ Unix SMB/CIFS implementation.
+ Character set conversion Extensions
+ Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
+ Copyright (C) Andrew Tridgell 2001
+ Copyright (C) Simo Sorce 2001
+ Copyright (C) Martin Pool 2003
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+#include "includes.h"
+#include "system/locale.h"
+
+/**
+ * Copy a string from a unix char* src to a UCS2 destination,
+ * allocating a buffer using talloc().
+ *
+ * @param dest always set at least to NULL
+ * @parm converted_size set to the number of bytes occupied by the string in
+ * the destination on success.
+ *
+ * @return true if new buffer was correctly allocated, and string was
+ * converted.
+ **/
+bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
+ size_t *converted_size)
+{
+ size_t src_len = strlen(src)+1;
+
+ *dest = NULL;
+ return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
+ (void **)dest, converted_size);
+}
+
+/**
+ * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
+ *
+ * @param dest always set at least to NULL
+ * @parm converted_size set to the number of bytes occupied by the string in
+ * the destination on success.
+ *
+ * @return true if new buffer was correctly allocated, and string was
+ * converted.
+ **/
+
+bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
+ size_t *converted_size)
+{
+ size_t src_len = strlen(src)+1;
+
+ *dest = NULL;
+ return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
+ (void**)dest, converted_size);
+}
+
+/**
+ * Copy a string from a unix char* src to an ASCII destination,
+ * allocating a buffer using talloc().
+ *
+ * @param dest always set at least to NULL
+ *
+ * @param converted_size The number of bytes occupied by the string in the destination
+ * @returns boolean indicating if the conversion was successful
+ **/
+bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
+{
+ size_t src_len = strlen(src)+1;
+
+ *dest = NULL;
+ return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
+ (void **)dest, converted_size);
+}
+
+/**
+ * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
+ *
+ * @param dest always set at least to NULL
+ * @parm converted_size set to the number of bytes occupied by the string in
+ * the destination on success.
+ *
+ * @return true if new buffer was correctly allocated, and string was
+ * converted.
+ **/
+
+bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
+ size_t *converted_size)
+{
+ size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
+
+ *dest = NULL;
+ return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
+ (void **)dest, converted_size);
+}
+
+
+/**
+ * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
+ *
+ * @param dest always set at least to NULL
+ * @parm converted_size set to the number of bytes occupied by the string in
+ * the destination on success.
+ *
+ * @return true if new buffer was correctly allocated, and string was
+ * converted.
+ **/
+
+bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
+ size_t *converted_size)
+{
+ size_t src_len = strlen(src)+1;
+
+ *dest = NULL;
+ return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
+ (void **)dest, converted_size);
+}
+
+
+/**
+ * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
+ *
+ * @param dest always set at least to NULL
+ * @parm converted_size set to the number of bytes occupied by the string in
+ * the destination on success.
+ *
+ * @return true if new buffer was correctly allocated, and string was
+ * converted.
+ **/
+
+bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
+ size_t *converted_size)
+{
+ size_t src_len = strlen(src)+1;
+
+ *dest = NULL;
+ return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
+ (void **)dest, converted_size);
+}
diff --git a/lib/util/charset/tests/convert_string.c b/lib/util/charset/tests/convert_string.c
index 32fc11f527..9a5d974fe3 100644
--- a/lib/util/charset/tests/convert_string.c
+++ b/lib/util/charset/tests/convert_string.c
@@ -105,7 +105,7 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx)
talloc_steal(tctx, gd_iso8859_1.data);
talloc_steal(tctx, gd_utf16le.data);
- iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850");
torture_assert(tctx, iconv_handle, "getting iconv handle");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
@@ -199,11 +199,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx)
torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF8, CH_DISPLAY,
+ CH_UTF8, CH_UTF8,
gd_utf8.data, gd_utf8.length,
(void *)&gd_output.data, &gd_output.length),
- "conversion from UTF8 to (display charset) UTF8");
- torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect");
+ "conversion from UTF8 to UTF8");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_UTF16LE, CH_DOS,
@@ -227,11 +227,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx)
torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF16LE, CH_DISPLAY,
+ CH_UTF16LE, CH_UTF8,
gd_utf16le.data, gd_utf16le.length,
(void *)&gd_output.data, &gd_output.length),
- "conversion from UTF16LE to (display charset) UTF8");
- torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect");
+ "conversion from UTF16LE to UTF8");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_DOS, CH_DOS,
@@ -248,11 +248,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx)
torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_DOS, CH_DISPLAY,
+ CH_DOS, CH_UTF8,
gd_iso8859_1.data, gd_iso8859_1.length,
(void *)&gd_output.data, &gd_output.length),
- "conversion from (dos charset) ISO8859-1 to (display charset) UTF8");
- torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect");
+ "conversion from (dos charset) ISO8859-1 to UTF8");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_DOS, CH_UTF16LE,
@@ -265,7 +265,7 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx)
(const char *)gd_iso8859_1.data,
CH_DOS, CH_UTF16LE),
gd_output.length / 2,
- "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again");
+ "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_DOS, CH_UTF8,
@@ -282,6 +282,191 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx)
return true;
}
+static bool test_gd_minus_1_handle(struct torture_context *tctx)
+{
+ struct smb_iconv_handle *iconv_handle;
+ DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64);
+ DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64);
+ DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64);
+ DATA_BLOB gd_output;
+ DATA_BLOB gd_utf8_terminated;
+ DATA_BLOB gd_cp850_terminated;
+ DATA_BLOB gd_utf16le_terminated;
+
+ talloc_steal(tctx, gd_utf8.data);
+ talloc_steal(tctx, gd_cp850.data);
+ talloc_steal(tctx, gd_utf16le.data);
+
+ iconv_handle = get_iconv_testing_handle(tctx, "CP850", "CP850");
+ torture_assert(tctx, iconv_handle, "getting iconv handle");
+
+ gd_utf8_terminated = data_blob_talloc(tctx, NULL, gd_utf8.length + 1);
+ memcpy(gd_utf8_terminated.data, gd_utf8.data, gd_utf8.length);
+ gd_utf8_terminated.data[gd_utf8.length] = '\0';
+
+ gd_cp850_terminated = data_blob_talloc(tctx, NULL, gd_cp850.length + 1);
+ memcpy(gd_cp850_terminated.data, gd_cp850.data, gd_cp850.length);
+ gd_cp850_terminated.data[gd_cp850.length] = '\0';
+
+ gd_utf16le_terminated = data_blob_talloc(tctx, NULL, gd_utf16le.length + 2);
+ memcpy(gd_utf16le_terminated.data, gd_utf16le.data, gd_utf16le.length);
+ gd_utf16le_terminated.data[gd_utf16le.length] = '\0';
+ gd_utf16le_terminated.data[gd_utf16le.length + 1] = '\0';
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ gd_utf8_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ gd_utf8_terminated.data, -1,
+ (void *)gd_output.data, gd_utf16le.length, &gd_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le, "conversion from UTF8 to UTF16LE null terminated");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ gd_utf8_terminated.data, -1,
+ (void *)gd_output.data, gd_utf16le.length - 1, &gd_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ gd_utf8_terminated.data, -1,
+ (void *)gd_output.data, gd_utf16le.length - 2, &gd_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10);
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_utf8.length, &gd_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 null terminated");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_utf8.length - 1, &gd_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_utf8.length - 2, &gd_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_DOS,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF16LE to CP850 (dos) null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to CP850 (dos) null terminated");
+
+ /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */
+ gd_utf8_terminated.data[3] = '\0';
+ gd_utf8_terminated.length = 4; /* used for the comparison only */
+
+ gd_cp850_terminated.data[2] = '\0';
+ gd_cp850_terminated.length = 3; /* used for the comparison only */
+
+ gd_utf16le_terminated.data[4] = '\0';
+ gd_utf16le_terminated.data[5] = '\0';
+ gd_utf16le_terminated.length = 6; /* used for the comparison only */
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ gd_utf8_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_DOS, CH_UTF16LE,
+ gd_cp850_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from CP850 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_DOS,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to UTF8 null terminated early");
+
+ /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */
+ gd_utf8_terminated.data[1] = '\0';
+ gd_utf8_terminated.length = 2; /* used for the comparison only */
+
+ gd_utf16le_terminated.data[2] = '\0';
+ gd_utf16le_terminated.data[3] = '\0';
+ gd_utf16le_terminated.length = 4; /* used for the comparison only */
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE,
+ gd_utf8_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early");
+
+ gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ gd_utf16le_terminated.data, -1,
+ (void *)gd_output.data, gd_output.length, &gd_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early");
+
+ return true;
+}
+
static bool test_gd_ascii_handle(struct torture_context *tctx)
{
struct smb_iconv_handle *iconv_handle;
@@ -296,7 +481,7 @@ static bool test_gd_ascii_handle(struct torture_context *tctx)
talloc_steal(tctx, gd_iso8859_1.data);
talloc_steal(tctx, gd_utf16le.data);
- iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8");
torture_assert(tctx, iconv_handle, "getting iconv handle");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
@@ -365,7 +550,7 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx
talloc_steal(tctx, plato_english_utf16le.data);
- iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850");
torture_assert(tctx, iconv_handle, "getting iconv handle");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
@@ -383,11 +568,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx
torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF8, CH_DISPLAY,
+ CH_UTF8, CH_UTF8,
plato_english_utf8.data, plato_english_utf8.length,
(void *)&plato_english_output.data, &plato_english_output.length),
- "conversion from UTF8 to (display charset) UTF8");
- torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect");
+ "conversion from UTF8 to UTF8");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_UTF16LE, CH_DOS,
@@ -436,11 +621,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx
torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF16LE, CH_DISPLAY,
+ CH_UTF16LE, CH_UTF8,
plato_english_utf16le.data, plato_english_utf16le.length,
(void *)&plato_english_output.data, &plato_english_output.length),
- "conversion from UTF16LE to (display charset) UTF8");
- torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect");
+ "conversion from UTF16LE to UTF8");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_DOS, CH_DOS,
@@ -457,11 +642,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx
torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_DOS, CH_DISPLAY,
+ CH_DOS, CH_UTF8,
plato_english_iso8859_1.data, plato_english_iso8859_1.length,
(void *)&plato_english_output.data, &plato_english_output.length),
- "conversion from (dos charset) ISO8859-1 to (display charset) UTF8");
- torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect");
+ "conversion from (dos charset) ISO8859-1 to UTF8");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_DOS, CH_UTF16LE,
@@ -472,6 +657,261 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx
return true;
}
+static bool test_plato_english_minus_1_handle(struct torture_context *tctx)
+{
+ struct smb_iconv_handle *iconv_handle;
+ DATA_BLOB plato_english_utf8 = data_blob_string_const(plato_english_ascii);
+ DATA_BLOB plato_english_utf16le = base64_decode_data_blob(plato_english_utf16le_base64);
+ DATA_BLOB plato_english_output;
+ DATA_BLOB plato_english_utf8_terminated;
+ DATA_BLOB plato_english_utf16le_terminated;
+
+ talloc_steal(tctx, plato_english_utf16le.data);
+
+ iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850");
+ torture_assert(tctx, iconv_handle, "getting iconv handle");
+
+ plato_english_utf8_terminated = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 1);
+ memcpy(plato_english_utf8_terminated.data, plato_english_utf8.data, plato_english_utf8.length);
+ plato_english_utf8_terminated.data[plato_english_utf8.length] = '\0';
+
+ plato_english_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 2);
+ memcpy(plato_english_utf16le_terminated.data, plato_english_utf16le.data, plato_english_utf16le.length);
+ plato_english_utf16le_terminated.data[plato_english_utf16le.length] = '\0';
+ plato_english_utf16le_terminated.data[plato_english_utf16le.length + 1] = '\0';
+
+ plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_english_utf8_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_english_utf8_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_utf16le.length, &plato_english_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le, "conversion from UTF8 to UTF16LE null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_english_utf8_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_utf16le.length - 1, &plato_english_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_english_utf8_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_utf16le.length - 2, &plato_english_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+
+ plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_english_utf16le_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_english_utf16le_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_utf8.length, &plato_english_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_english_utf16le_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_utf8.length - 1, &plato_english_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_english_utf16le_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_utf8.length - 2, &plato_english_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+
+ /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */
+ plato_english_utf8_terminated.data[3] = '\0';
+ plato_english_utf8_terminated.length = 4; /* used for the comparison only */
+
+ plato_english_utf16le_terminated.data[6] = '\0';
+ plato_english_utf16le_terminated.data[7] = '\0';
+ plato_english_utf16le_terminated.length = 8; /* used for the comparison only */
+
+ plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_english_utf8_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early");
+
+ plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_english_utf16le_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early");
+
+
+ /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */
+ plato_english_utf8_terminated.data[1] = '\0';
+ plato_english_utf8_terminated.length = 2; /* used for the comparison only */
+
+ plato_english_utf16le_terminated.data[2] = '\0';
+ plato_english_utf16le_terminated.data[3] = '\0';
+ plato_english_utf16le_terminated.length = 4; /* used for the comparison only */
+
+ plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE,
+ plato_english_utf8_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early");
+
+ plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_english_utf16le_terminated.data, -1,
+ (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early");
+
+ return true;
+}
+
+static bool test_plato_minus_1_handle(struct torture_context *tctx)
+{
+ struct smb_iconv_handle *iconv_handle;
+ DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64);
+ DATA_BLOB plato_utf16le = base64_decode_data_blob(plato_utf16le_base64);
+ DATA_BLOB plato_output;
+ DATA_BLOB plato_utf8_terminated;
+ DATA_BLOB plato_utf16le_terminated;
+
+ talloc_steal(tctx, plato_utf8.data);
+ talloc_steal(tctx, plato_utf16le.data);
+
+ iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850");
+ torture_assert(tctx, iconv_handle, "getting iconv handle");
+
+ plato_utf8_terminated = data_blob_talloc(tctx, NULL, plato_utf8.length + 1);
+ memcpy(plato_utf8_terminated.data, plato_utf8.data, plato_utf8.length);
+ plato_utf8_terminated.data[plato_utf8.length] = '\0';
+
+ plato_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_utf16le.length + 2);
+ memcpy(plato_utf16le_terminated.data, plato_utf16le.data, plato_utf16le.length);
+ plato_utf16le_terminated.data[plato_utf16le.length] = '\0';
+ plato_utf16le_terminated.data[plato_utf16le.length + 1] = '\0';
+
+ plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_utf8_terminated.data, -1,
+ (void *)plato_output.data, plato_output.length, &plato_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_utf8_terminated.data, -1,
+ (void *)plato_output.data, plato_utf16le.length, &plato_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_utf8_terminated.data, -1,
+ (void *)plato_output.data, plato_utf16le.length - 1, &plato_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_utf8_terminated.data, -1,
+ (void *)plato_output.data, plato_utf16le.length - 2, &plato_output.length) == false,
+ "conversion from UTF8 to UTF16LE null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG");
+
+ plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_utf16le_terminated.data, -1,
+ (void *)plato_output.data, plato_output.length, &plato_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_utf16le_terminated.data, -1,
+ (void *)plato_output.data, plato_utf8.length, &plato_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 null terminated");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_utf16le_terminated.data, -1,
+ (void *)plato_output.data, plato_utf8.length - 1, &plato_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_utf16le_terminated.data, -1,
+ (void *)plato_output.data, plato_utf8.length - 2, &plato_output.length) == false,
+ "conversion from UTF16LE to UTF8 null terminated should fail");
+ torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG");
+
+ /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */
+ plato_utf8_terminated.data[5] = '\0';
+ plato_utf8_terminated.length = 6; /* used for the comparison only */
+
+ plato_utf16le_terminated.data[4] = '\0';
+ plato_utf16le_terminated.data[5] = '\0';
+ plato_utf16le_terminated.length = 6; /* used for the comparison only */
+
+ plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF8, CH_UTF16LE,
+ plato_utf8_terminated.data, -1,
+ (void *)plato_output.data, plato_output.length, &plato_output.length),
+ "conversion from UTF8 to UTF16LE null terminated");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early");
+
+ plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10);
+
+ torture_assert(tctx, convert_string_error_handle(iconv_handle,
+ CH_UTF16LE, CH_UTF8,
+ plato_utf16le_terminated.data, -1,
+ (void *)plato_output.data, plato_output.length, &plato_output.length),
+ "conversion from UTF16LE to UTF8 null terminated");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early");
+
+ return true;
+}
+
static bool test_plato_cp850_utf8_handle(struct torture_context *tctx)
{
struct smb_iconv_handle *iconv_handle;
@@ -483,7 +923,7 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx)
talloc_steal(tctx, plato_utf8.data);
talloc_steal(tctx, plato_utf16le.data);
- iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8");
torture_assert(tctx, iconv_handle, "creating iconv handle");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
@@ -568,11 +1008,11 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx)
torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF8, CH_DISPLAY,
+ CH_UTF8, CH_UTF8,
plato_utf8.data, plato_utf8.length,
(void *)&plato_output.data, &plato_output.length),
"conversion of UTF16 ancient greek to unix charset UTF8 failed");
- torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_UTF16LE, CH_DOS,
@@ -627,39 +1067,39 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx)
"conversion of UTF16 ancient greek to UTF8 failed");
torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF16LE, CH_DISPLAY,
+ CH_UTF16LE, CH_UTF8,
plato_utf16le.data, plato_utf16le.length,
(void *)&plato_output.data, &plato_output.length),
- "conversion of UTF16 ancient greek to display charset UTF8 failed");
- torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect");
+ "conversion of UTF16 ancient greek to UTF8 failed");
+ torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_DISPLAY, CH_UTF16LE,
+ CH_UTF8, CH_UTF16LE,
plato_output.data, plato_output.length,
(void *)&plato_output2.data, &plato_output2.length),
- "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed");
+ "round trip conversion of UTF16 ancient greek to UTF8 and back again failed");
torture_assert_data_blob_equal(tctx, plato_output2, plato_utf16le,
- "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed");
+ "round trip conversion of UTF16 ancient greek to UTF8 and back again failed");
torture_assert_int_equal(tctx,
strlen_m_ext_handle(iconv_handle,
(const char *)plato_output.data,
- CH_DISPLAY, CH_UTF16LE),
+ CH_UTF8, CH_UTF16LE),
plato_output2.length / 2,
- "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again");
+ "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_DISPLAY, CH_UTF8,
+ CH_UTF8, CH_UTF8,
plato_output.data, plato_output.length,
(void *)&plato_output2.data, &plato_output2.length),
- "conversion of display charset UTF8 to UTF8");
+ "conversion of UTF8 to UTF8");
torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8,
- "conversion of display charset UTF8 to UTF8");
+ "conversion of UTF8 to UTF8");
torture_assert_int_equal(tctx,
strlen_m_ext_handle(iconv_handle,
(const char *)plato_output.data,
- CH_DISPLAY, CH_UTF8),
+ CH_UTF8, CH_UTF8),
plato_output2.length,
- "checking strlen_m_ext of conversion of display charset UTF8 to UTF8");
+ "checking strlen_m_ext of conversion of UTF8 to UTF8");
return true;
}
@@ -674,7 +1114,7 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx)
talloc_steal(tctx, plato_latin_utf8.data);
talloc_steal(tctx, plato_latin_utf16le.data);
- iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8");
torture_assert(tctx, iconv_handle, "creating iconv handle");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
@@ -691,11 +1131,11 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx)
torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF8, CH_DISPLAY,
+ CH_UTF8, CH_UTF8,
plato_latin_utf8.data, plato_latin_utf8.length,
(void *)&plato_latin_output.data, &plato_latin_output.length),
"conversion of UTF16 latin charset greek to unix charset UTF8 failed");
- torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect");
+ torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
CH_UTF16LE, CH_DOS,
@@ -711,25 +1151,25 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx)
torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (unix charset) CP850 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_UTF16LE, CH_DISPLAY,
+ CH_UTF16LE, CH_UTF8,
plato_latin_utf16le.data, plato_latin_utf16le.length,
(void *)&plato_latin_output.data, &plato_latin_output.length),
- "conversion of UTF16 latin charset greek to display charset UTF8 failed");
- torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect");
+ "conversion of UTF16 latin charset greek to UTF8 failed");
+ torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to UTF8 incorrect");
torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle,
- CH_DISPLAY, CH_UTF16LE,
+ CH_UTF8, CH_UTF16LE,
plato_latin_output.data, plato_latin_output.length,
(void *)&plato_latin_output2.data, &plato_latin_output2.length),
- "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed");
+ "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed");
torture_assert_data_blob_equal(tctx, plato_latin_output2, plato_latin_utf16le,
- "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed");
+ "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed");
torture_assert_int_equal(tctx,
strlen_m_ext_handle(iconv_handle,
(const char *)plato_latin_output.data,
- CH_DISPLAY, CH_UTF16LE),
+ CH_UTF8, CH_UTF16LE),
plato_latin_output2.length / 2,
- "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again");
+ "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again");
return true;
}
@@ -742,7 +1182,7 @@ static bool test_gd_case_utf8_handle(struct torture_context *tctx)
char *gd_lower, *gd_upper;
talloc_steal(tctx, gd_utf8.data);
- iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8");
torture_assert(tctx, iconv_handle, "getting utf8 iconv handle");
torture_assert(tctx,
@@ -805,7 +1245,7 @@ static bool test_gd_case_cp850_handle(struct torture_context *tctx)
char *gd_lower, *gd_upper;
talloc_steal(tctx, gd_cp850.data);
- iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850", "CP850");
+ iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850");
torture_assert(tctx, iconv_handle, "getting cp850 iconv handle");
torture_assert(tctx,
@@ -866,7 +1306,7 @@ static bool test_plato_case_utf8_handle(struct torture_context *tctx)
char *plato_lower, *plato_upper;
talloc_steal(tctx, plato_utf8.data);
- iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8");
+ iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8");
torture_assert(tctx, iconv_handle, "getting utf8 iconv handle");
torture_assert(tctx,
@@ -1248,9 +1688,12 @@ struct torture_suite *torture_local_convert_string_handle(TALLOC_CTX *mem_ctx)
struct torture_suite *suite = torture_suite_create(mem_ctx, "convert_string_handle");
torture_suite_add_simple_test(suite, "gd_ascii", test_gd_ascii_handle);
+ torture_suite_add_simple_test(suite, "gd_minus_1", test_gd_minus_1_handle);
torture_suite_add_simple_test(suite, "gd_iso8859_cp850", test_gd_iso8859_cp850_handle);
torture_suite_add_simple_test(suite, "plato_english_iso8859_cp850", test_plato_english_iso8859_cp850_handle);
+ torture_suite_add_simple_test(suite, "plato_english_minus_1", test_plato_english_minus_1_handle);
torture_suite_add_simple_test(suite, "plato_cp850_utf8", test_plato_cp850_utf8_handle);
+ torture_suite_add_simple_test(suite, "plato_minus_1", test_plato_minus_1_handle);
torture_suite_add_simple_test(suite, "plato_latin_cp850_utf8", test_plato_latin_cp850_utf8_handle);
return suite;
}
diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
index e8f0b788b1..688ab5a0a1 100644
--- a/lib/util/charset/util_str.c
+++ b/lib/util/charset/util_str.c
@@ -5,6 +5,8 @@
Copyright (C) Simo Sorce 2001
Copyright (C) Andrew Bartlett 2011
Copyright (C) Jeremy Allison 1992-2007
+ Copyright (C) Martin Pool 2003
+ Copyright (C) James Peach 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -167,7 +169,6 @@ _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
switch (dst_charset) {
case CH_DOS:
case CH_UNIX:
- case CH_DISPLAY:
smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)");
default:
break;
@@ -327,7 +328,7 @@ _PUBLIC_ char *strchr_m(const char *src, char c)
for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
if (*s == c)
- return (char *)s;
+ return discard_const_p(char, s);
}
if (!*s)
@@ -395,7 +396,7 @@ _PUBLIC_ char *strrchr_m(const char *s, char c)
break;
}
/* No - we have a match ! */
- return (char *)cp;
+ return discard_const_p(char , cp);
}
} while (cp-- != s);
if (!got_mb)
@@ -473,3 +474,84 @@ _PUBLIC_ bool strhasupper(const char *string)
struct smb_iconv_handle *ic = get_iconv_handle();
return strhasupper_handle(ic, string);
}
+
+/***********************************************************************
+ strstr_m - We convert via ucs2 for now.
+***********************************************************************/
+
+char *strstr_m(const char *src, const char *findstr)
+{
+ smb_ucs2_t *p;
+ smb_ucs2_t *src_w, *find_w;
+ const char *s;
+ char *s2;
+ char *retp;
+ size_t converted_size, findstr_len = 0;
+
+ TALLOC_CTX *frame; /* Only set up in the iconv case */
+
+ /* for correctness */
+ if (!findstr[0]) {
+ return discard_const_p(char, src);
+ }
+
+ /* Samba does single character findstr calls a *lot*. */
+ if (findstr[1] == '\0')
+ return strchr_m(src, *findstr);
+
+ /* We optimise for the ascii case, knowing that all our
+ supported multi-byte character sets are ascii-compatible
+ (ie. they match for the first 128 chars) */
+
+ for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
+ if (*s == *findstr) {
+ if (!findstr_len)
+ findstr_len = strlen(findstr);
+
+ if (strncmp(s, findstr, findstr_len) == 0) {
+ return discard_const_p(char, s);
+ }
+ }
+ }
+
+ if (!*s)
+ return NULL;
+
+#if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
+ /* 'make check' fails unless we do this */
+
+ /* With compose characters we must restart from the beginning. JRA. */
+ s = src;
+#endif
+
+ frame = talloc_stackframe();
+
+ if (!push_ucs2_talloc(frame, &src_w, src, &converted_size)) {
+ DEBUG(0,("strstr_m: src malloc fail\n"));
+ TALLOC_FREE(frame);
+ return NULL;
+ }
+
+ if (!push_ucs2_talloc(frame, &find_w, findstr, &converted_size)) {
+ DEBUG(0,("strstr_m: find malloc fail\n"));
+ TALLOC_FREE(frame);
+ return NULL;
+ }
+
+ p = strstr_w(src_w, find_w);
+
+ if (!p) {
+ TALLOC_FREE(frame);
+ return NULL;
+ }
+
+ *p = 0;
+ if (!pull_ucs2_talloc(frame, &s2, src_w, &converted_size)) {
+ TALLOC_FREE(frame);
+ DEBUG(0,("strstr_m: dest malloc fail\n"));
+ return NULL;
+ }
+ retp = discard_const_p(char, (s+strlen(s2)));
+ TALLOC_FREE(frame);
+ return retp;
+}
diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c
index a1be501c7c..e4ae65053c 100644
--- a/lib/util/charset/util_unistr.c
+++ b/lib/util/charset/util_unistr.c
@@ -161,85 +161,6 @@ _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
}
/**
- Convert a string to lower case.
-**/
-_PUBLIC_ void strlower_m(char *s)
-{
- char *d;
- struct smb_iconv_handle *iconv_handle;
-
- /* this is quite a common operation, so we want it to be
- fast. We optimise for the ascii case, knowing that all our
- supported multi-byte character sets are ascii-compatible
- (ie. they match for the first 128 chars) */
- while (*s && !(((uint8_t)*s) & 0x80)) {
- *s = tolower((uint8_t)*s);
- s++;
- }
-
- if (!*s)
- return;
-
- iconv_handle = get_iconv_handle();
-
- d = s;
-
- while (*s) {
- size_t c_size, c_size2;
- codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
- c_size2 = push_codepoint_handle(iconv_handle, d, tolower_m(c));
- if (c_size2 > c_size) {
- DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
- c, tolower_m(c), (int)c_size, (int)c_size2));
- smb_panic("codepoint expansion in strlower_m\n");
- }
- s += c_size;
- d += c_size2;
- }
- *d = 0;
-}
-
-/**
- Convert a string to UPPER case.
-**/
-_PUBLIC_ void strupper_m(char *s)
-{
- char *d;
- struct smb_iconv_handle *iconv_handle;
-
- /* this is quite a common operation, so we want it to be
- fast. We optimise for the ascii case, knowing that all our
- supported multi-byte character sets are ascii-compatible
- (ie. they match for the first 128 chars) */
- while (*s && !(((uint8_t)*s) & 0x80)) {
- *s = toupper((uint8_t)*s);
- s++;
- }
-
- if (!*s)
- return;
-
- iconv_handle = get_iconv_handle();
-
- d = s;
-
- while (*s) {
- size_t c_size, c_size2;
- codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size);
- c_size2 = push_codepoint_handle(iconv_handle, d, toupper_m(c));
- if (c_size2 > c_size) {
- DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
- c, toupper_m(c), (int)c_size, (int)c_size2));
- smb_panic("codepoint expansion in strupper_m\n");
- }
- s += c_size;
- d += c_size2;
- }
- *d = 0;
-}
-
-
-/**
Find the number of 'c' chars in a string
**/
_PUBLIC_ size_t count_chars_m(const char *s, char c)
@@ -273,7 +194,7 @@ _PUBLIC_ size_t count_chars_m(const char *s, char c)
* @param dest_len the maximum length in bytes allowed in the
* destination. If @p dest_len is -1 then no maximum is used.
**/
-static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
+static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
{
size_t src_len;
bool ret;
@@ -283,7 +204,7 @@ static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags,
if (tmpbuf == NULL) {
return false;
}
- ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
+ ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
talloc_free(tmpbuf);
return ret;
}
@@ -297,23 +218,6 @@ static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags,
}
/**
- * Copy a string from a unix char* src to an ASCII destination,
- * allocating a buffer using talloc().
- *
- * @param dest always set at least to NULL
- *
- * @returns The number of bytes occupied by the string in the destination
- * or -1 in case of error.
- **/
-_PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
-{
- size_t src_len = strlen(src)+1;
- *dest = NULL;
- return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size);
-}
-
-
-/**
* Copy a string from a dos codepage source to a unix char* destination.
*
* The resulting string in "dest" is always null terminated.
@@ -328,7 +232,7 @@ _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, s
* @param src_len is the length of the source area in bytes.
* @returns the number of bytes occupied by the string in @p src.
**/
-static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
+static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
{
size_t size = 0;
@@ -411,38 +315,6 @@ static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags
/**
- * Copy a string from a unix char* src to a UCS2 destination,
- * allocating a buffer using talloc().
- *
- * @param dest always set at least to NULL
- *
- * @returns The number of bytes occupied by the string in the destination
- * or -1 in case of error.
- **/
-_PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
-{
- size_t src_len = strlen(src)+1;
- *dest = NULL;
- return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size);
-}
-
-
-/**
- * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
- *
- * @param dest always set at least to NULL
- *
- * @returns The number of bytes occupied by the string in the destination
- **/
-
-_PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
-{
- size_t src_len = strlen(src)+1;
- *dest = NULL;
- return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size);
-}
-
-/**
Copy a string from a ucs2 source to a unix char* destination.
Flags can have:
STR_TERMINATE means the string in src is null terminated.
@@ -484,51 +356,6 @@ static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src
}
/**
- * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
- *
- * @param dest always set at least to NULL
- *
- * @returns The number of bytes occupied by the string in the destination
- **/
-
-_PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
-{
- size_t src_len = strlen(src)+1;
- *dest = NULL;
- return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size);
-}
-
-/**
- * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
- *
- * @param dest always set at least to NULL
- *
- * @returns The number of bytes occupied by the string in the destination
- **/
-
-_PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
-{
- size_t src_len = utf16_len(src);
- *dest = NULL;
- return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size);
-}
-
-/**
- * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
- *
- * @param dest always set at least to NULL
- *
- * @returns The number of bytes occupied by the string in the destination
- **/
-
-_PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
-{
- size_t src_len = strlen(src)+1;
- *dest = NULL;
- return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size);
-}
-
-/**
Copy a string from a char* src to a unicode or ascii
dos codepage destination choosing unicode or ascii based on the
flags in the SMB buffer starting at base_ptr.
@@ -546,7 +373,7 @@ _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int f
{
if (flags & STR_ASCII) {
size_t size = 0;
- if (push_ascii(dest, src, dest_len, flags, &size)) {
+ if (push_ascii_string(dest, src, dest_len, flags, &size)) {
return (ssize_t)size;
} else {
return (ssize_t)-1;
@@ -577,7 +404,7 @@ _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int f
_PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
{
if (flags & STR_ASCII) {
- return pull_ascii(dest, src, dest_len, src_len, flags);
+ return pull_ascii_string(dest, src, dest_len, src_len, flags);
} else if (flags & STR_UNICODE) {
return pull_ucs2(dest, src, dest_len, src_len, flags);
} else {
@@ -585,68 +412,3 @@ _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_
return -1;
}
}
-
-
-/**
- * Convert string from one encoding to another, making error checking etc
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string
- * @param converted_size the number of bytes occupied in the destination
- *
- * @returns true on success, false on fail.
- **/
-_PUBLIC_ bool convert_string(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen,
- size_t *converted_size)
-{
- return convert_string_handle(get_iconv_handle(), from, to,
- src, srclen,
- dest, destlen, converted_size);
-}
-
-/**
- * Convert string from one encoding to another, making error checking etc
- *
- * @param src pointer to source string (multibyte or singlebyte)
- * @param srclen length of the source string in bytes
- * @param dest pointer to destination string (multibyte or singlebyte)
- * @param destlen maximal length allowed for string
- * @param converted_size the number of bytes occupied in the destination
- *
- * @returns true on success, false on fail.
- **/
-_PUBLIC_ bool convert_string_error(charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t destlen,
- size_t *converted_size)
-{
- return convert_string_error_handle(get_iconv_handle(), from, to,
- src, srclen,
- dest, destlen, converted_size);
-}
-
-/**
- * Convert between character sets, allocating a new buffer using talloc for the result.
- *
- * @param srclen length of source buffer.
- * @param dest always set at least to NULL
- * @param converted_size Size in bytes of the converted string
- * @note -1 is not accepted for srclen.
- *
- * @returns boolean indication whether the conversion succeeded
- **/
-
-_PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
- charset_t from, charset_t to,
- void const *src, size_t srclen,
- void *dest, size_t *converted_size)
-{
- return convert_string_talloc_handle(ctx, get_iconv_handle(),
- from, to, src, srclen, dest,
- converted_size);
-}
-
diff --git a/lib/util/charset/util_unistr_w.c b/lib/util/charset/util_unistr_w.c
index a550e52776..3fbed7f67c 100644
--- a/lib/util/charset/util_unistr_w.c
+++ b/lib/util/charset/util_unistr_w.c
@@ -22,8 +22,8 @@
#include "includes.h"
/* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */
-#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\
- ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest))
+#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((const unsigned char *)(src))[0],\
+ ((unsigned char *)(dest))[1] = ((const unsigned char *)(src))[1], (dest))
/* return an ascii version of a ucs2 character */
@@ -72,12 +72,12 @@ smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
smb_ucs2_t cp;
while (*(COPY_UCS2_CHAR(&cp,s))) {
if (c == cp) {
- return (smb_ucs2_t *)s;
+ return discard_const_p(smb_ucs2_t, s);
}
s++;
}
if (c == cp) {
- return (smb_ucs2_t *)s;
+ return discard_const_p(smb_ucs2_t, s);
}
return NULL;
@@ -104,7 +104,7 @@ smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
p += (len - 1);
do {
if (c == *(COPY_UCS2_CHAR(&cp,p))) {
- return (smb_ucs2_t *)p;
+ return discard_const_p(smb_ucs2_t, p);
}
} while (p-- != s);
return NULL;
@@ -234,38 +234,6 @@ static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
}
-/*******************************************************************
- Case insensitive string comparison.
-********************************************************************/
-
-int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
-{
- smb_ucs2_t cpa, cpb;
-
- while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
- a++;
- b++;
- }
- return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
-}
-
-/*******************************************************************
- Case insensitive string comparison, length limited.
-********************************************************************/
-
-int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
-{
- smb_ucs2_t cpa, cpb;
- size_t n = 0;
-
- while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
- a++;
- b++;
- n++;
- }
- return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
-}
-
/*
The *_wa() functions take a combination of 7 bit ascii
and wide characters They are used so that you can use string
diff --git a/lib/util/charset/weird.c b/lib/util/charset/weird.c
new file mode 100644
index 0000000000..5db8cdcecd
--- /dev/null
+++ b/lib/util/charset/weird.c
@@ -0,0 +1,134 @@
+/*
+ Unix SMB/CIFS implementation.
+ Samba module with developer tools
+ Copyright (C) Andrew Tridgell 2001
+ Copyright (C) Jelmer Vernooij 2002
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+
+static struct {
+ char from;
+ const char *to;
+ int len;
+} weird_table[] = {
+ {'q', "^q^", 3},
+ {'Q', "^Q^", 3},
+ {0, NULL}
+};
+
+static size_t weird_pull(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ while (*inbytesleft >= 1 && *outbytesleft >= 2) {
+ int i;
+ int done = 0;
+ for (i=0;weird_table[i].from;i++) {
+ if (strncmp((*inbuf),
+ weird_table[i].to,
+ weird_table[i].len) == 0) {
+ if (*inbytesleft < weird_table[i].len) {
+ DEBUG(0,("ERROR: truncated weird string\n"));
+ /* smb_panic("weird_pull"); */
+
+ } else {
+ (*outbuf)[0] = weird_table[i].from;
+ (*outbuf)[1] = 0;
+ (*inbytesleft) -= weird_table[i].len;
+ (*outbytesleft) -= 2;
+ (*inbuf) += weird_table[i].len;
+ (*outbuf) += 2;
+ done = 1;
+ break;
+ }
+ }
+ }
+ if (done) continue;
+ (*outbuf)[0] = (*inbuf)[0];
+ (*outbuf)[1] = 0;
+ (*inbytesleft) -= 1;
+ (*outbytesleft) -= 2;
+ (*inbuf) += 1;
+ (*outbuf) += 2;
+ }
+
+ if (*inbytesleft > 0) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ return 0;
+}
+
+static size_t weird_push(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ int ir_count=0;
+
+ while (*inbytesleft >= 2 && *outbytesleft >= 1) {
+ int i;
+ int done=0;
+ for (i=0;weird_table[i].from;i++) {
+ if ((*inbuf)[0] == weird_table[i].from &&
+ (*inbuf)[1] == 0) {
+ if (*outbytesleft < weird_table[i].len) {
+ DEBUG(0,("No room for weird character\n"));
+ /* smb_panic("weird_push"); */
+ } else {
+ memcpy(*outbuf, weird_table[i].to,
+ weird_table[i].len);
+ (*inbytesleft) -= 2;
+ (*outbytesleft) -= weird_table[i].len;
+ (*inbuf) += 2;
+ (*outbuf) += weird_table[i].len;
+ done = 1;
+ break;
+ }
+ }
+ }
+ if (done) continue;
+
+ (*outbuf)[0] = (*inbuf)[0];
+ if ((*inbuf)[1]) ir_count++;
+ (*inbytesleft) -= 2;
+ (*outbytesleft) -= 1;
+ (*inbuf) += 2;
+ (*outbuf) += 1;
+ }
+
+ if (*inbytesleft == 1) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (*inbytesleft > 1) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ return ir_count;
+}
+
+struct charset_functions weird_functions = {"WEIRD", weird_pull, weird_push};
+
+NTSTATUS charset_weird_init(void);
+NTSTATUS charset_weird_init(void)
+{
+ if (!smb_register_charset(&weird_functions)) {
+ return NT_STATUS_INTERNAL_ERROR;
+ }
+ return NT_STATUS_OK;
+}
diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build
index 29e168dce1..1f2c8dfa7a 100644
--- a/lib/util/charset/wscript_build
+++ b/lib/util/charset/wscript_build
@@ -1,18 +1,44 @@
#!/usr/bin/env python
-
-if bld.env._SAMBA_BUILD_ == 4:
- bld.SAMBA_SUBSYSTEM('CHARSET',
- source='charcnv.c util_unistr.c',
- public_deps='CODEPOINTS',
- public_headers='charset.h',
- )
-
bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER',
source='iconv.c',
public_deps='iconv replace talloc')
-bld.SAMBA_SUBSYSTEM('CODEPOINTS',
- source='codepoints.c util_str.c util_unistr_w.c',
- deps='DYNCONFIG ICONV_WRAPPER'
- )
+bld.SAMBA_SUBSYSTEM('CHARSET',
+ public_headers='charset.h',
+ source='codepoints.c convert_string.c util_str.c util_unistr_w.c charcnv.c pull_push.c util_unistr.c',
+ deps='DYNCONFIG ICONV_WRAPPER',
+ public_deps='talloc')
+
+bld.SAMBA_MODULE('charset_weird',
+ subsystem='CHARSET',
+ source='weird.c',
+ init_function='',
+ deps='samba-util',
+ internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_weird'),
+ enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_weird'))
+
+bld.SAMBA_MODULE('charset_CP850',
+ subsystem='CHARSET',
+ source='CP850.c',
+ init_function='',
+ deps='samba-util',
+ internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP850'),
+ enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP850'))
+
+bld.SAMBA_MODULE('charset_CP437',
+ subsystem='CHARSET',
+ source='CP437.c',
+ init_function='',
+ deps='samba-util',
+ internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP437'),
+ enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP437'))
+
+bld.SAMBA_MODULE('charset_macosxfs',
+ subsystem='CHARSET',
+ source='charset_macosxfs.c',
+ init_function='',
+ internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_macosxfs'),
+ enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_macosxfs'))
+
+