summaryrefslogtreecommitdiff
path: root/source4/torture/local
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2004-09-01 04:39:06 +0000
committerGerald (Jerry) Carter <jerry@samba.org>2007-10-10 12:58:27 -0500
commit31c1c7846f6b6e5848bc39a28a65118bfa98e35d (patch)
tree7387bbf9234320ca2c60011b34c76c08e2b274b0 /source4/torture/local
parent48f3df41bb4c854b1049591e1c315c4c3b980984 (diff)
downloadsamba-31c1c7846f6b6e5848bc39a28a65118bfa98e35d.tar.gz
samba-31c1c7846f6b6e5848bc39a28a65118bfa98e35d.tar.bz2
samba-31c1c7846f6b6e5848bc39a28a65118bfa98e35d.zip
r2159: converted samba4 over to UTF-16.
I had previously thought this was unnecessary, as windows doesn't use standards compliant UTF-16, and for filesystem operations treats bytes as UCS-2, but Bjoern Jacke has pointed out to me that this means we don't correctly store extended UTF-16 characters as UTF-8 on disk. This can be seen with (for example) the gothic characters with codepoints above 64k. This commit also adds a LOCAL-ICONV torture test that tests the first 1 million codepoints against the system iconv library, and tests 5 million random UTF-16LE buffers for identical error handling to the system iconv library. the lib/iconv.c changes need backporting to samba3 (This used to be commit 756f28ac95feaa84b42402723d5f7286865c78db)
Diffstat (limited to 'source4/torture/local')
-rw-r--r--source4/torture/local/iconv.c298
1 files changed, 298 insertions, 0 deletions
diff --git a/source4/torture/local/iconv.c b/source4/torture/local/iconv.c
new file mode 100644
index 0000000000..0867be0bee
--- /dev/null
+++ b/source4/torture/local/iconv.c
@@ -0,0 +1,298 @@
+/*
+ Unix SMB/CIFS implementation.
+
+ local testing of iconv routines. This tests the system iconv code against
+ the built-in iconv code
+
+ Copyright (C) Andrew Tridgell 2004
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+/*
+ generate a UTF-16LE buffer for a given unicode codepoint
+*/
+static int gen_codepoint(unsigned int codepoint,
+ char *buf, size_t *size)
+{
+ static iconv_t cd;
+ uint8_t in[4];
+ char *ptr_in;
+ size_t size_in, size_out, ret;
+ if (!cd) {
+ cd = iconv_open("UTF-16LE", "UCS-4LE");
+ }
+
+ in[0] = codepoint & 0xFF;
+ in[1] = (codepoint>>8) & 0xFF;
+ in[2] = (codepoint>>16) & 0xFF;
+ in[3] = (codepoint>>24) & 0xFF;
+
+ ptr_in = in;
+ size_in = 4;
+ size_out = 8;
+
+ ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
+
+ *size = 8 - size_out;
+
+ return ret;
+}
+
+
+/*
+ work out the unicode codepoint of the first UTF-8 character in the buffer
+*/
+static unsigned int get_codepoint(char *buf, size_t size)
+{
+ static iconv_t cd;
+ uint8_t out[4];
+ char *ptr_out;
+ size_t size_out, size_in, ret;
+ if (!cd) {
+ cd = iconv_open("UCS-4LE", "UTF-8");
+ }
+
+ size_in = size;
+ ptr_out = out;
+ size_out = sizeof(out);
+ memset(out, 0, sizeof(out));
+
+ ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
+
+ return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
+}
+
+/*
+ display a buffer with name prefix
+*/
+static void show_buf(const char *name, uint8_t *buf, size_t size)
+{
+ int i;
+ printf("%s ", name);
+ for (i=0;i<size;i++) {
+ printf("%02x ", buf[i]);
+ }
+ printf("\n");
+}
+
+/*
+ given a UTF-16LE buffer, test the system and built-in iconv code
+ to make sure they do exactly the same thing in converting the buffer
+ to UTF-8, then convert it back again and ensure we get the same buffer
+ back
+*/
+static int test_buffer(uint8_t *inbuf, size_t size)
+{
+ uint8_t buf1[1000], buf2[1000], buf3[1000];
+ size_t outsize1, outsize2, outsize3;
+ const char *ptr_in;
+ char *ptr_out;
+ size_t size_in1, size_in2, size_in3;
+ size_t ret1, ret2, ret3, len1, len2;
+ int ok = 1;
+ int errno1, errno2;
+ static iconv_t cd;
+ static smb_iconv_t cd2, cd3;
+
+ if (!cd) {
+ cd = iconv_open("UTF-8", "UTF-16LE");
+ cd2 = smb_iconv_open("UTF-8", "UTF-16LE");
+ cd3 = smb_iconv_open("UTF-16LE", "UTF-8");
+ }
+
+#if 0
+ int i;
+ for (i=0;i<50;i++) {
+ ptr_in = inbuf;
+ ptr_out = buf1;
+ size_in1 = size;
+ outsize1 = sizeof(buf1);
+
+ memset(ptr_out, 0, outsize1);
+ errno = 0;
+ ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
+ errno1 = errno;
+ }
+#endif
+
+ ptr_in = inbuf;
+ ptr_out = buf1;
+ size_in1 = size;
+ outsize1 = sizeof(buf1);
+
+ memset(ptr_out, 0, outsize1);
+ errno = 0;
+ ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
+ errno1 = errno;
+
+ ptr_in = inbuf;
+ ptr_out = buf2;
+ size_in2 = size;
+ outsize2 = sizeof(buf2);
+
+ memset(ptr_out, 0, outsize2);
+ errno = 0;
+ ret2 = iconv(cd, &ptr_in, &size_in2, &ptr_out, &outsize2);
+ errno2 = errno;
+
+ len1 = sizeof(buf1) - outsize1;
+ len2 = sizeof(buf2) - outsize2;
+
+ /* codepoints above 1M are not interesting for now */
+ if (len2 > len1 &&
+ memcmp(buf1, buf2, len1) == 0 &&
+ get_codepoint(buf2+len1, len2-len1) >= (1<<20)) {
+ return ok;
+ }
+ if (len1 > len2 &&
+ memcmp(buf1, buf2, len2) == 0 &&
+ get_codepoint(buf1+len2, len1-len2) >= (1<<20)) {
+ return ok;
+ }
+
+ if (ret1 != ret2) {
+ printf("ret1=%d ret2=%d\n", ret1, ret2);
+ ok = 0;
+ }
+
+ if (errno1 != errno2) {
+ printf("e1=%s e2=%s\n", strerror(errno1), strerror(errno2));
+ show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
+ show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
+ ok = 0;
+ }
+
+ if (outsize1 != outsize2) {
+ printf("\noutsize mismatch outsize1=%d outsize2=%d\n",
+ outsize1, outsize2);
+ ok = 0;
+ }
+
+ if (size_in1 != size_in2) {
+ printf("\nsize_in mismatch size_in1=%d size_in2=%d\n",
+ size_in1, size_in2);
+ ok = 0;
+ }
+
+ if (!ok ||
+ len1 != len2 ||
+ memcmp(buf1, buf2, len1) != 0) {
+ printf("\nsize=%d ret1=%d ret2=%d\n", size, ret1, ret2);
+ show_buf(" IN1:", inbuf, size-size_in1);
+ show_buf(" IN2:", inbuf, size-size_in2);
+ show_buf("OUT1:", buf1, len1);
+ show_buf("OUT2:", buf2, len2);
+ if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
+ printf("next codepoint is %u\n", get_codepoint(buf2+len1, len2-len1));
+ }
+ if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
+ printf("next codepoint is %u\n", get_codepoint(buf1+len2,len1-len2));
+ }
+
+ ok = 0;
+ }
+
+ if (!ok) return ok;
+
+ size = size - size_in1;
+ ptr_in = buf1;
+ ptr_out = buf3;
+ size_in3 = len1;
+ outsize3 = sizeof(buf3);
+
+ memset(ptr_out, 0, outsize3);
+ ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
+
+ if (ret3 != 0) {
+ printf("pull failed - %s\n", strerror(errno));
+ ok = 0;
+ }
+
+ if (outsize3 != sizeof(buf3) - size) {
+ printf("wrong outsize3 - %d should be %d\n",
+ outsize3, sizeof(buf3) - size);
+ ok = 0;
+ }
+
+ if (memcmp(buf3, inbuf, size) != 0) {
+ int i;
+ printf("pull bytes mismatch:\n");
+ for (i=0;i<size;i++) {
+ printf("%02x ", inbuf[i]);
+ }
+ printf("\n");
+ for (i=0;i<size;i++) {
+ printf("%02x ", buf3[i]);
+ }
+ printf("\n");
+ ok = 0;
+ }
+
+ return ok;
+}
+
+BOOL torture_local_iconv(int dummy)
+{
+ size_t size;
+ unsigned char inbuf[1000];
+ int ok = 1;
+ unsigned int codepoint, i, c;
+
+ srandom(time(NULL));
+
+ printf("Testing first 1M codepoints\n");
+ for (codepoint=0;ok && codepoint<(1<<20);codepoint++) {
+ if (gen_codepoint(codepoint, inbuf, &size) != 0) {
+ continue;
+ }
+
+ if (codepoint % 100 == 0) {
+ printf("codepoint=%u \r", codepoint);
+ }
+
+ ok = test_buffer(inbuf, size);
+ }
+
+
+ printf("Testing 5M random UTF-16LE sequences\n");
+ for (i=0;ok && i<500000;i++) {
+ if (i % 100 == 0) {
+ printf("i=%u \r", i);
+ }
+
+ size = random() % 100;
+ for (c=0;c<size;c++) {
+ if (random() % 100 < 80) {
+ inbuf[c] = random() % 128;
+ } else {
+ inbuf[c] = random();
+ }
+ if (random() % 10 == 0) {
+ inbuf[c] |= 0xd8;
+ }
+ if (random() % 10 == 0) {
+ inbuf[c] |= 0xdc;
+ }
+ }
+ ok = test_buffer(inbuf, size);
+ }
+
+ return ok == 1;
+}
+
+