/* Unix SMB/CIFS implementation. local testing of iconv routines. This tests the system iconv code against the built-in iconv code Copyright (C) Andrew Tridgell 2004 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "includes.h" #include "torture/torture.h" #include "system/iconv.h" #include "system/time.h" #include "libcli/raw/libcliraw.h" #include "torture/util.h" #if HAVE_NATIVE_ICONV /* generate a UTF-16LE buffer for a given unicode codepoint */ static int gen_codepoint_utf16(unsigned int codepoint, char *buf, size_t *size) { static iconv_t cd; uint8_t in[4]; char *ptr_in; size_t size_in, size_out, ret; if (!cd) { cd = iconv_open("UTF-16LE", "UCS-4LE"); if (cd == (iconv_t)-1) { cd = NULL; return -1; } } in[0] = codepoint & 0xFF; in[1] = (codepoint>>8) & 0xFF; in[2] = (codepoint>>16) & 0xFF; in[3] = (codepoint>>24) & 0xFF; ptr_in = in; size_in = 4; size_out = 8; ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out); *size = 8 - size_out; return ret; } /* work out the unicode codepoint of the first UTF-8 character in the buffer */ static unsigned int get_codepoint(char *buf, size_t size, const char *charset) { iconv_t cd; uint8_t out[4]; char *ptr_out; size_t size_out, size_in, ret; cd = iconv_open("UCS-4LE", charset); size_in = size; ptr_out = out; size_out = sizeof(out); memset(out, 0, sizeof(out)); ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out); iconv_close(cd); return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24); } /* display a buffer with name prefix */ static void show_buf(const char *name, uint8_t *buf, size_t size) { int i; printf("%s ", name); for (i=0;i len1 && memcmp(buf1, buf2, len1) == 0 && get_codepoint(buf2+len1, len2-len1, charset) >= (1<<20)) { return ok; } if (len1 > len2 && memcmp(buf1, buf2, len2) == 0 && get_codepoint(buf1+len2, len1-len2, charset) >= (1<<20)) { return ok; } if (ret1 != ret2) { torture_fail(test, "ret1=%d ret2=%d", (int)ret1, (int)ret2); ok = 0; } if (errno1 != errno2) { torture_fail(test, "e1=%s e2=%s", strerror(errno1), strerror(errno2)); show_buf(" rem1:", inbuf+(size-size_in1), size_in1); show_buf(" rem2:", inbuf+(size-size_in2), size_in2); ok = 0; } if (outsize1 != outsize2) { torture_fail(test, "outsize mismatch outsize1=%d outsize2=%d", (int)outsize1, (int)outsize2); ok = 0; } if (size_in1 != size_in2) { torture_fail(test, "size_in mismatch size_in1=%d size_in2=%d", (int)size_in1, (int)size_in2); ok = 0; } if (!ok || len1 != len2 || memcmp(buf1, buf2, len1) != 0) { torture_fail(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2); show_buf(" IN1:", inbuf, size-size_in1); show_buf(" IN2:", inbuf, size-size_in2); show_buf("OUT1:", buf1, len1); show_buf("OUT2:", buf2, len2); if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) { torture_comment(test, "next codepoint is %u", get_codepoint(buf2+len1, len2-len1, charset)); } if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) { torture_comment(test, "next codepoint is %u", get_codepoint(buf1+len2,len1-len2, charset)); } ok = 0; } /* convert back to UTF-16, putting result in buf3 */ size = size - size_in1; ptr_in = buf1; ptr_out = buf3; size_in3 = len1; outsize3 = sizeof(buf3); memset(ptr_out, 0, outsize3); ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3); /* we only internally support the first 1M codepoints */ if (outsize3 != sizeof(buf3) - size && get_codepoint(inbuf+sizeof(buf3) - outsize3, size - (sizeof(buf3) - outsize3), "UTF-16LE") >= (1<<20)) { return ok; } if (ret3 != 0) { torture_fail(test, "pull failed - %s", strerror(errno)); ok = 0; } if (strncmp(charset, "UTF", 3) != 0) { /* don't expect perfect mappings for non UTF charsets */ return ok; } if (outsize3 != sizeof(buf3) - size) { torture_fail(test, "wrong outsize3 - %d should be %d", (int)outsize3, (int)(sizeof(buf3) - size)); ok = 0; } if (memcmp(buf3, inbuf, size) != 0) { torture_fail(test, "pull bytes mismatch:"); show_buf("inbuf", inbuf, size); show_buf(" buf3", buf3, sizeof(buf3) - outsize3); ok = 0; torture_comment(test, "next codepoint is %u\n", get_codepoint(inbuf+sizeof(buf3) - outsize3, size - (sizeof(buf3) - outsize3), "UTF-16LE")); } if (!ok) { torture_fail(test, "test_buffer failed for charset %s", charset); } return ok; } /* test the push_codepoint() and next_codepoint() functions for a given codepoint */ static int test_codepoint(struct torture_context *test, const void *data) { uint8_t buf[10]; size_t size, size2; unsigned int codepoint = *((const unsigned int *)data); codepoint_t c; size = push_codepoint(buf, codepoint); if (size == -1) { torture_assert(test, codepoint >= 0xd800 && codepoint <= 0x10000, NULL); return True; } buf[size] = random(); buf[size+1] = random(); buf[size+2] = random(); buf[size+3] = random(); c = next_codepoint(buf, &size2); if (c != codepoint) { torture_fail(test, "next_codepoint(%u) failed - gave %u", codepoint, c); return False; } if (size2 != size) { torture_fail(test, "next_codepoint(%u) gave wrong size %d (should be %d)\n", codepoint, (int)size2, (int)size); return False; } return True; } static BOOL test_next_codepoint(struct torture_context *test, const void *data) { unsigned int codepoint; for (codepoint=0;codepoint<(1<<20);codepoint++) { if (!test_codepoint(test, &codepoint)) return False; } return True; } static BOOL test_first_1m(struct torture_context *test, const void *data) { unsigned int codepoint; size_t size; unsigned char inbuf[1000]; for (codepoint=0;codepoint<(1<<20);codepoint++) { if (gen_codepoint_utf16(codepoint, inbuf, &size) != 0) { continue; } if (codepoint % 1000 == 0) { if (!lp_parm_bool(-1, "torture", "progress", True)) { printf("codepoint=%u \r", codepoint); } } if (!test_buffer(test, inbuf, size, "UTF-8")) return False; } return True; } static BOOL test_random_5m(struct torture_context *test, const void *data) { unsigned char inbuf[1000]; unsigned int i; for (i=0;i<500000;i++) { size_t size; unsigned int c; if (i % 1000 == 0) { if (!lp_parm_bool(-1, "torture", "progress", True)) { torture_comment(test, "i=%u \r", i); } } size = random() % 100; for (c=0;c UCS-4LE\n"); return suite; } iconv_close(cd); torture_suite_add_simple_tcase(suite, "next_codepoint()", test_next_codepoint, NULL); torture_suite_add_simple_tcase(suite, "first 1M codepoints", test_first_1m, NULL); torture_suite_add_simple_tcase(suite, "5M random UTF-16LE sequences", test_random_5m, NULL); return suite; } #else struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx) { printf("No native iconv library - can't run iconv test\n"); return NULL; } #endif