From 09a63accb8863a883ef8aba3a44776ac92ae5764 Mon Sep 17 00:00:00 2001 From: Jelmer Vernooij Date: Fri, 24 Oct 2008 00:06:35 +0200 Subject: Move charset library to top level. --- lib/util/charset/tests/charset.c | 272 +++++++++++++++++++++++++ lib/util/charset/tests/iconv.c | 424 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 696 insertions(+) create mode 100644 lib/util/charset/tests/charset.c create mode 100644 lib/util/charset/tests/iconv.c (limited to 'lib/util/charset/tests') diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c new file mode 100644 index 0000000000..5e42ca2932 --- /dev/null +++ b/lib/util/charset/tests/charset.c @@ -0,0 +1,272 @@ +/* + Unix SMB/CIFS implementation. + test suite for the charcnv functions + + Copyright (C) Jelmer Vernooij 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "includes.h" +#include "torture/torture.h" + +static bool test_toupper_w(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, toupper_w('c'), 'C', "c"); + torture_assert_int_equal(tctx, toupper_w('Z'), 'Z', "z"); + torture_assert_int_equal(tctx, toupper_w(0xFFFF4565), 0xFFFF4565, "0xFFFF4565"); + return true; +} + +static bool test_tolower_w(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, tolower_w('C'), 'c', "c"); + torture_assert_int_equal(tctx, tolower_w('z'), 'z', "z"); + torture_assert_int_equal(tctx, tolower_w(0xFFFF4565), 0xFFFF4565, "0xFFFF4565"); + return true; +} + +static bool test_codepoint_cmpi(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, codepoint_cmpi('a', 'a'), 0, "same char"); + torture_assert_int_equal(tctx, codepoint_cmpi('A', 'a'), 0, "upcase version"); + torture_assert_int_equal(tctx, codepoint_cmpi('b', 'a'), 1, "right diff"); + torture_assert_int_equal(tctx, codepoint_cmpi('a', 'b'), -1, "right diff"); + return true; +} + +static bool test_strcasecmp_m(struct torture_context *tctx) +{ + torture_assert(tctx, strcasecmp_m("foo", "bar") != 0, "different strings"); + torture_assert(tctx, strcasecmp_m("foo", "foo") == 0, "same case strings"); + torture_assert(tctx, strcasecmp_m("foo", "Foo") == 0, "different case strings"); + torture_assert(tctx, strcasecmp_m(NULL, "Foo") != 0, "one NULL"); + torture_assert(tctx, strcasecmp_m("foo", NULL) != 0, "other NULL"); + torture_assert(tctx, strcasecmp_m(NULL, NULL) == 0, "both NULL"); + return true; +} + + +static bool test_strequal_w(struct torture_context *tctx) +{ + torture_assert(tctx, !strequal_w("foo", "bar"), "different strings"); + torture_assert(tctx, strequal_w("foo", "foo"), "same case strings"); + torture_assert(tctx, strequal_w("foo", "Foo"), "different case strings"); + torture_assert(tctx, !strequal_w(NULL, "Foo"), "one NULL"); + torture_assert(tctx, !strequal_w("foo", NULL), "other NULL"); + torture_assert(tctx, strequal_w(NULL, NULL), "both NULL"); + return true; +} + +static bool test_strcsequal_w(struct torture_context *tctx) +{ + torture_assert(tctx, !strcsequal_w("foo", "bar"), "different strings"); + torture_assert(tctx, strcsequal_w("foo", "foo"), "same case strings"); + torture_assert(tctx, !strcsequal_w("foo", "Foo"), "different case strings"); + torture_assert(tctx, !strcsequal_w(NULL, "Foo"), "one NULL"); + torture_assert(tctx, !strcsequal_w("foo", NULL), "other NULL"); + torture_assert(tctx, strcsequal_w(NULL, NULL), "both NULL"); + return true; +} + +static bool test_string_replace_w(struct torture_context *tctx) +{ + char data[6] = "bla"; + string_replace_w(data, 'b', 'c'); + torture_assert_str_equal(tctx, data, "cla", "first char replaced"); + memcpy(data, "bab", 4); + string_replace_w(data, 'b', 'c'); + torture_assert_str_equal(tctx, data, "cac", "other chars replaced"); + memcpy(data, "bba", 4); + string_replace_w(data, 'b', 'c'); + torture_assert_str_equal(tctx, data, "cca", "other chars replaced"); + memcpy(data, "blala", 6); + string_replace_w(data, 'o', 'c'); + torture_assert_str_equal(tctx, data, "blala", "no chars replaced"); + string_replace_w(NULL, 'b', 'c'); + return true; +} + +static bool test_strncasecmp_m(struct torture_context *tctx) +{ + torture_assert(tctx, strncasecmp_m("foo", "bar", 3) != 0, "different strings"); + torture_assert(tctx, strncasecmp_m("foo", "foo", 3) == 0, "same case strings"); + torture_assert(tctx, strncasecmp_m("foo", "Foo", 3) == 0, "different case strings"); + torture_assert(tctx, strncasecmp_m("fool", "Foo", 3) == 0, "different case strings"); + torture_assert(tctx, strncasecmp_m("fool", "Fool", 40) == 0, "over size"); + torture_assert(tctx, strncasecmp_m("BLA", "Fool", 0) == 0, "empty"); + torture_assert(tctx, strncasecmp_m(NULL, "Foo", 3) != 0, "one NULL"); + torture_assert(tctx, strncasecmp_m("foo", NULL, 3) != 0, "other NULL"); + torture_assert(tctx, strncasecmp_m(NULL, NULL, 3) == 0, "both NULL"); + return true; +} + +static bool test_next_token_null(struct torture_context *tctx) +{ + char buf[20]; + torture_assert(tctx, !next_token(NULL, buf, " ", 20), "null ptr works"); + return true; +} + +static bool test_next_token(struct torture_context *tctx) +{ + const char *teststr = "foo bar bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo", "token matches"); + torture_assert_str_equal(tctx, teststr, "bar bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bar", "token matches"); + torture_assert_str_equal(tctx, teststr, "bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, " ", 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_implicit_sep(struct torture_context *tctx) +{ + const char *teststr = "foo\tbar\n bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, NULL, 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo", "token matches"); + torture_assert_str_equal(tctx, teststr, "bar\n bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, NULL, 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bar", "token matches"); + torture_assert_str_equal(tctx, teststr, " bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, NULL, 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, NULL, 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_seps(struct torture_context *tctx) +{ + const char *teststr = ",foo bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, ",", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, ",", 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_quotes(struct torture_context *tctx) +{ + const char *teststr = "\"foo bar\" bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo bar", "token matches"); + torture_assert_str_equal(tctx, teststr, "bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, " ", 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_quote_wrong(struct torture_context *tctx) +{ + const char *teststr = "\"foo bar bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo bar bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, " ", 20), "finding token doesn't work"); + return true; +} + +static bool test_strlen_m(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strlen_m("foo"), 3, "simple len"); + torture_assert_int_equal(tctx, strlen_m("foo\x83l"), 6, "extended len"); + torture_assert_int_equal(tctx, strlen_m(NULL), 0, "NULL"); + return true; +} + +static bool test_strlen_m_term(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strlen_m_term("foo"), 4, "simple len"); + torture_assert_int_equal(tctx, strlen_m_term("foo\x83l"), 7, "extended len"); + torture_assert_int_equal(tctx, strlen_m(NULL), 0, "NULL"); + return true; +} + +static bool test_strhaslower(struct torture_context *tctx) +{ + torture_assert(tctx, strhaslower("a"), "one low char"); + torture_assert(tctx, strhaslower("aB"), "one low, one up char"); + torture_assert(tctx, !strhaslower("B"), "one up char"); + torture_assert(tctx, !strhaslower(""), "empty string"); + torture_assert(tctx, !strhaslower("3"), "one digit"); + return true; +} + +static bool test_strhasupper(struct torture_context *tctx) +{ + torture_assert(tctx, strhasupper("B"), "one up char"); + torture_assert(tctx, strhasupper("aB"), "one low, one up char"); + torture_assert(tctx, !strhasupper("a"), "one low char"); + torture_assert(tctx, !strhasupper(""), "empty string"); + torture_assert(tctx, !strhasupper("3"), "one digit"); + return true; +} + +static bool test_count_chars_w(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, count_chars_w("foo", 'o'), 2, "simple"); + torture_assert_int_equal(tctx, count_chars_w("", 'o'), 0, "empty"); + torture_assert_int_equal(tctx, count_chars_w("bla", 'o'), 0, "none"); + torture_assert_int_equal(tctx, count_chars_w("bla", '\0'), 0, "null"); + return true; +} + +struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "CHARSET"); + + torture_suite_add_simple_test(suite, "toupper_w", test_toupper_w); + torture_suite_add_simple_test(suite, "tolower_w", test_tolower_w); + torture_suite_add_simple_test(suite, "codepoint_cmpi", test_codepoint_cmpi); + torture_suite_add_simple_test(suite, "strcasecmp_m", test_strcasecmp_m); + torture_suite_add_simple_test(suite, "strequal_w", test_strequal_w); + torture_suite_add_simple_test(suite, "strcsequal_w", test_strcsequal_w); + torture_suite_add_simple_test(suite, "string_replace_w", test_string_replace_w); + torture_suite_add_simple_test(suite, "strncasecmp_m", test_strncasecmp_m); + torture_suite_add_simple_test(suite, "next_token", test_next_token); + torture_suite_add_simple_test(suite, "next_token_null", test_next_token_null); + torture_suite_add_simple_test(suite, "next_token_implicit_sep", test_next_token_implicit_sep); + torture_suite_add_simple_test(suite, "next_token_quotes", test_next_token_quotes); + torture_suite_add_simple_test(suite, "next_token_seps", test_next_token_seps); + torture_suite_add_simple_test(suite, "next_token_quote_wrong", test_next_token_quote_wrong); + torture_suite_add_simple_test(suite, "strlen_m", test_strlen_m); + torture_suite_add_simple_test(suite, "strlen_m_term", test_strlen_m_term); + torture_suite_add_simple_test(suite, "strhaslower", test_strhaslower); + torture_suite_add_simple_test(suite, "strhasupper", test_strhasupper); + torture_suite_add_simple_test(suite, "count_chars_w", test_count_chars_w); + + return suite; +} diff --git a/lib/util/charset/tests/iconv.c b/lib/util/charset/tests/iconv.c new file mode 100644 index 0000000000..aeb42c2fa1 --- /dev/null +++ b/lib/util/charset/tests/iconv.c @@ -0,0 +1,424 @@ +/* + Unix SMB/CIFS implementation. + + local testing of iconv routines. This tests the system iconv code against + the built-in iconv code + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "includes.h" +#include "torture/torture.h" +#include "system/iconv.h" +#include "system/time.h" +#include "libcli/raw/libcliraw.h" +#include "param/param.h" +#include "torture/util.h" + +#if HAVE_NATIVE_ICONV + +static bool iconv_untestable(struct torture_context *tctx) +{ + iconv_t cd; + + if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true)) + torture_skip(tctx, "system iconv disabled - skipping test"); + + cd = iconv_open("UTF-16LE", "UCS-4LE"); + if (cd == (iconv_t)-1) + torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE"); + iconv_close(cd); + + cd = iconv_open("UTF-16LE", "CP850"); + if (cd == (iconv_t)-1) + torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n"); + iconv_close(cd); + + return false; +} + +/* + generate a UTF-16LE buffer for a given unicode codepoint +*/ +static int gen_codepoint_utf16(unsigned int codepoint, + char *buf, size_t *size) +{ + static iconv_t cd; + uint8_t in[4]; + char *ptr_in; + size_t size_in, size_out, ret; + if (!cd) { + cd = iconv_open("UTF-16LE", "UCS-4LE"); + if (cd == (iconv_t)-1) { + cd = NULL; + return -1; + } + } + + in[0] = codepoint & 0xFF; + in[1] = (codepoint>>8) & 0xFF; + in[2] = (codepoint>>16) & 0xFF; + in[3] = (codepoint>>24) & 0xFF; + + ptr_in = (char *)in; + size_in = 4; + size_out = 8; + + ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out); + + *size = 8 - size_out; + + return ret; +} + + +/* + work out the unicode codepoint of the first UTF-8 character in the buffer +*/ +static unsigned int get_codepoint(char *buf, size_t size, const char *charset) +{ + iconv_t cd; + uint8_t out[4]; + char *ptr_out; + size_t size_out, size_in, ret; + + cd = iconv_open("UCS-4LE", charset); + + size_in = size; + ptr_out = (char *)out; + size_out = sizeof(out); + memset(out, 0, sizeof(out)); + + ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out); + + iconv_close(cd); + + return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24); +} + +/* + display a buffer with name prefix +*/ +static void show_buf(const char *name, uint8_t *buf, size_t size) +{ + int i; + printf("%s ", name); + for (i=0;ilp_ctx, NULL, "iconv", "native", true)); + cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true)); + last_charset = charset; + } + + /* internal convert to charset - placing result in buf1 */ + ptr_in = (const char *)inbuf; + ptr_out = (char *)buf1; + size_in1 = size; + outsize1 = sizeof(buf1); + + memset(ptr_out, 0, outsize1); + errno = 0; + ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1); + errno1 = errno; + + /* system convert to charset - placing result in buf2 */ + ptr_in = (const char *)inbuf; + ptr_out = (char *)buf2; + size_in2 = size; + outsize2 = sizeof(buf2); + + memset(ptr_out, 0, outsize2); + errno = 0; + ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2); + errno2 = errno; + + len1 = sizeof(buf1) - outsize1; + len2 = sizeof(buf2) - outsize2; + + /* codepoints above 1M are not interesting for now */ + if (len2 > len1 && + memcmp(buf1, buf2, len1) == 0 && + get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) { + return true; + } + if (len1 > len2 && + memcmp(buf1, buf2, len2) == 0 && + get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) { + return true; + } + + torture_assert_int_equal(test, ret1, ret2, "ret mismatch"); + + if (errno1 != errno2) { + show_buf(" rem1:", inbuf+(size-size_in1), size_in1); + show_buf(" rem2:", inbuf+(size-size_in2), size_in2); + torture_fail(test, talloc_asprintf(test, + "e1=%d/%s e2=%d/%s", + errno1, strerror(errno1), + errno2, strerror(errno2))); + } + + torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch"); + + torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch"); + + if (len1 != len2 || + memcmp(buf1, buf2, len1) != 0) { + torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2); + show_buf(" IN1:", inbuf, size-size_in1); + show_buf(" IN2:", inbuf, size-size_in2); + show_buf("OUT1:", buf1, len1); + show_buf("OUT2:", buf2, len2); + if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) { + torture_comment(test, "next codepoint is %u", + get_codepoint((char *)(buf2+len1), len2-len1, charset)); + } + if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) { + torture_comment(test, "next codepoint is %u", + get_codepoint((char *)(buf1+len2),len1-len2, charset)); + } + + torture_fail(test, "failed"); + } + + /* convert back to UTF-16, putting result in buf3 */ + size = size - size_in1; + ptr_in = (const char *)buf1; + ptr_out = (char *)buf3; + size_in3 = len1; + outsize3 = sizeof(buf3); + + memset(ptr_out, 0, outsize3); + ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3); + + /* we only internally support the first 1M codepoints */ + if (outsize3 != sizeof(buf3) - size && + get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), + size - (sizeof(buf3) - outsize3), + "UTF-16LE") >= (1<<20)) { + return true; + } + + torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test, + "pull failed - %s", strerror(errno))); + + if (strncmp(charset, "UTF", 3) != 0) { + /* don't expect perfect mappings for non UTF charsets */ + return true; + } + + + torture_assert_int_equal(test, outsize3, sizeof(buf3) - size, + "wrong outsize3"); + + if (memcmp(buf3, inbuf, size) != 0) { + torture_comment(test, "pull bytes mismatch:"); + show_buf("inbuf", inbuf, size); + show_buf(" buf3", buf3, sizeof(buf3) - outsize3); + torture_comment(test, "next codepoint is %u\n", + get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), + size - (sizeof(buf3) - outsize3), + "UTF-16LE")); + torture_fail(test, ""); + } + + return true; +} + + +/* + test the push_codepoint() and next_codepoint() functions for a given + codepoint +*/ +static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint) +{ + uint8_t buf[10]; + size_t size, size2; + codepoint_t c; + + size = push_codepoint(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint); + torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000), + "Invalid Codepoint range"); + + if (size == -1) return true; + + buf[size] = random(); + buf[size+1] = random(); + buf[size+2] = random(); + buf[size+3] = random(); + + c = next_codepoint(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2); + + torture_assert(tctx, c == codepoint, + talloc_asprintf(tctx, + "next_codepoint(%u) failed - gave %u", codepoint, c)); + + torture_assert(tctx, size2 == size, + talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n", + codepoint, (int)size2, (int)size)); + + return true; +} + +static bool test_next_codepoint(struct torture_context *tctx) +{ + unsigned int codepoint; + if (iconv_untestable(tctx)) + return true; + + for (codepoint=0;codepoint<(1<<20);codepoint++) { + if (!test_codepoint(tctx, codepoint)) + return false; + } + return true; +} + +static bool test_first_1m(struct torture_context *tctx) +{ + unsigned int codepoint; + size_t size; + unsigned char inbuf[1000]; + + if (iconv_untestable(tctx)) + return true; + + for (codepoint=0;codepoint<(1<<20);codepoint++) { + if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) { + continue; + } + + if (codepoint % 1000 == 0) { + if (torture_setting_bool(tctx, "progress", true)) { + torture_comment(tctx, "codepoint=%u \r", codepoint); + fflush(stdout); + } + } + + if (!test_buffer(tctx, inbuf, size, "UTF-8")) + return false; + } + return true; +} + +static bool test_random_5m(struct torture_context *tctx) +{ + unsigned char inbuf[1000]; + unsigned int i; + + if (iconv_untestable(tctx)) + return true; + + for (i=0;i<500000;i++) { + size_t size; + unsigned int c; + + if (i % 1000 == 0) { + if (torture_setting_bool(tctx, "progress", true)) { + torture_comment(tctx, "i=%u \r", i); + fflush(stdout); + } + } + + size = random() % 100; + for (c=0;c