diff options
author | Jakub Hrozek <jhrozek@redhat.com> | 2011-12-06 14:57:58 +0100 |
---|---|---|
committer | Stephen Gallagher <sgallagh@redhat.com> | 2011-12-16 14:46:16 -0500 |
commit | dbea04f585a30d001b574317c068cd03a4fa332b (patch) | |
tree | f192eea6f1777f8442b625485f788bf2b411b5bc | |
parent | f7255e5d207800eb86003ec926951c0a6397aa04 (diff) | |
download | sssd-dbea04f585a30d001b574317c068cd03a4fa332b.tar.gz sssd-dbea04f585a30d001b574317c068cd03a4fa332b.tar.bz2 sssd-dbea04f585a30d001b574317c068cd03a4fa332b.zip |
sss_utf8_tolower utility function+unit tests
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | src/tests/util-tests.c | 96 | ||||
-rw-r--r-- | src/util/sss_tc_utf8.c | 57 | ||||
-rw-r--r-- | src/util/sss_utf8.c | 54 | ||||
-rw-r--r-- | src/util/sss_utf8.h | 5 | ||||
-rw-r--r-- | src/util/util.c | 1 | ||||
-rw-r--r-- | src/util/util.h | 6 |
7 files changed, 221 insertions, 1 deletions
diff --git a/Makefile.am b/Makefile.am index 8d75b876..b8fb9e09 100644 --- a/Makefile.am +++ b/Makefile.am @@ -382,7 +382,8 @@ libsss_util_la_SOURCES = \ src/util/strtonum.c \ src/util/check_and_open.c \ src/util/refcount.c \ - src/util/sss_utf8.c + src/util/sss_utf8.c \ + src/util/sss_tc_utf8.c libsss_util_la_LIBADD = \ $(SSSD_LIBS) \ $(UNICODE_LIBS) \ diff --git a/src/tests/util-tests.c b/src/tests/util-tests.c index 52d30277..33406dac 100644 --- a/src/tests/util-tests.c +++ b/src/tests/util-tests.c @@ -26,6 +26,7 @@ #include <talloc.h> #include <check.h> #include "util/util.h" +#include "util/sss_utf8.h" #include "tests/common.h" START_TEST(test_parse_args) @@ -312,6 +313,91 @@ START_TEST(test_size_t_overflow) } END_TEST +START_TEST(test_utf8_lowercase) +{ + const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 }; + const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 }; + uint8_t *lcase; + size_t nlen; + + lcase = sss_utf8_tolower(munchen_utf8_upcase, + strlen((const char *)munchen_utf8_upcase), + &nlen); + fail_if(strlen((const char *) munchen_utf8_upcase) != nlen); /* This is not true for utf8 strings in general */ + fail_if(memcmp(lcase, munchen_utf8_lowcase, nlen)); + sss_utf8_free(lcase); +} +END_TEST + +START_TEST(test_utf8_talloc_lowercase) +{ + const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 }; + const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 }; + uint8_t *lcase; + size_t nsize; + + TALLOC_CTX *test_ctx; + test_ctx = talloc_new(NULL); + fail_if(test_ctx == NULL); + + lcase = sss_tc_utf8_tolower(test_ctx, munchen_utf8_upcase, + strlen((const char *) munchen_utf8_upcase), + &nsize); + fail_if(memcmp(lcase, munchen_utf8_lowcase, nsize)); + talloc_free(test_ctx); +} +END_TEST + +START_TEST(test_utf8_talloc_str_lowercase) +{ + const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 }; + const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 }; + char *lcase; + + TALLOC_CTX *test_ctx; + test_ctx = talloc_new(NULL); + fail_if(test_ctx == NULL); + + lcase = sss_tc_utf8_str_tolower(test_ctx, (const char *) munchen_utf8_upcase); + fail_if(memcmp(lcase, munchen_utf8_lowcase, strlen(lcase))); + talloc_free(test_ctx); +} +END_TEST + +START_TEST(test_utf8_caseeq) +{ + const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 }; + const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 }; + const uint8_t czech_utf8_lowcase[] = { 0xC4, 0x8D, 'e', 'c', 'h', 0x0 }; + const uint8_t czech_utf8_upcase[] = { 0xC4, 0x8C, 'e', 'c', 'h', 0x0 }; + const uint8_t czech_utf8_lowcase_neg[] = { 0xC4, 0x8E, 'e', 'c', 'h', 0x0 }; + errno_t ret; + + ret = sss_utf8_case_eq(munchen_utf8_upcase, munchen_utf8_lowcase); + fail_unless(ret == EOK, "Latin 1 Supplement comparison failed\n"); + + ret = sss_utf8_case_eq(czech_utf8_upcase, czech_utf8_lowcase); + fail_unless(ret == EOK, "Latin Extended A comparison failed\n"); + + ret = sss_utf8_case_eq(czech_utf8_upcase, czech_utf8_lowcase_neg); + fail_if(ret == EOK, "Negative test succeeded\n"); +} +END_TEST + +START_TEST(test_utf8_check) +{ + const char *invalid = "ad\351la\357d"; + const uint8_t valid[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 }; + bool ret; + + ret = sss_utf8_check(valid, strlen((const char *) valid)); + fail_unless(ret == true, "Positive test failed\n"); + + ret = sss_utf8_check((const uint8_t *) invalid, strlen(invalid)); + fail_unless(ret == false, "Negative test succeeded\n"); +} +END_TEST + Suite *util_suite(void) { Suite *s = suite_create("util"); @@ -324,7 +410,17 @@ Suite *util_suite(void) tcase_add_test (tc_util, test_parse_args); tcase_set_timeout(tc_util, 60); + TCase *tc_utf8 = tcase_create("utf8"); + tcase_add_test (tc_util, test_utf8_lowercase); + tcase_add_test (tc_util, test_utf8_talloc_lowercase); + tcase_add_test (tc_util, test_utf8_talloc_str_lowercase); + tcase_add_test (tc_util, test_utf8_caseeq); + tcase_add_test (tc_util, test_utf8_check); + + tcase_set_timeout(tc_utf8, 60); + suite_add_tcase (s, tc_util); + suite_add_tcase (s, tc_utf8); return s; } diff --git a/src/util/sss_tc_utf8.c b/src/util/sss_tc_utf8.c new file mode 100644 index 00000000..6a976211 --- /dev/null +++ b/src/util/sss_tc_utf8.c @@ -0,0 +1,57 @@ +/* + Authors: + Jakub Hrozek <jhrozek@redhat.com> + + Copyright (C) 2011 Red Hat + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <talloc.h> +#include "util/sss_utf8.h" + +char * +sss_tc_utf8_str_tolower(TALLOC_CTX *mem_ctx, const char *s) +{ + size_t nlen; + uint8_t *ret; + + ret = sss_tc_utf8_tolower(mem_ctx, (const uint8_t *) s, strlen(s), &nlen); + if (!ret) return NULL; + + ret = talloc_realloc(mem_ctx, ret, uint8_t, nlen+1); + if (!ret) return NULL; + + ret[nlen] = '\0'; + return (char *) ret; +} + +uint8_t * +sss_tc_utf8_tolower(TALLOC_CTX *mem_ctx, const uint8_t *s, size_t len, size_t *_nlen) +{ + uint8_t *lower; + uint8_t *ret; + size_t nlen; + + lower = sss_utf8_tolower(s, len, &nlen); + if (!lower) return NULL; + + ret = talloc_memdup(mem_ctx, lower, nlen); + sss_utf8_free(lower); + if (!ret) return NULL; + + *_nlen = nlen; + return ret; +} + diff --git a/src/util/sss_utf8.c b/src/util/sss_utf8.c index 4a98233b..7997a6df 100644 --- a/src/util/sss_utf8.c +++ b/src/util/sss_utf8.c @@ -24,6 +24,60 @@ #include "sss_utf8.h" #ifdef HAVE_LIBUNISTRING +void sss_utf8_free(void *ptr) +{ + return free(ptr); +} +#elif HAVE_GLIB2 +void sss_utf8_free(void *ptr) +{ + return g_free(ptr); +} +#else +#error No unicode library +#endif + +#ifdef HAVE_LIBUNISTRING +uint8_t *sss_utf8_tolower(const uint8_t *s, size_t len, size_t *_nlen) +{ + size_t llen; + uint8_t *lower; + + lower = u8_tolower(s, len, NULL, NULL, NULL, &llen); + if (!lower) return NULL; + + if (_nlen) *_nlen = llen; + return lower; +} +#elif HAVE_GLIB2 +uint8_t *sss_utf8_tolower(const uint8_t *s, size_t len, size_t *_nlen) +{ + gchar *glower; + size_t nlen; + uint8_t *lower; + + glower = g_utf8_strdown((const gchar *) s, len); + if (!glower) return NULL; + + /* strlen() is safe here because g_utf8_strdown() always null-terminates */ + nlen = strlen(glower); + + lower = g_malloc(nlen); + if (!lower) { + g_free(glower); + return NULL; + } + + memcpy(lower, glower, nlen); + g_free(glower); + if (_nlen) *_nlen = nlen; + return (uint8_t *) lower; +} +#else +#error No unicode library +#endif + +#ifdef HAVE_LIBUNISTRING bool sss_utf8_check(const uint8_t *s, size_t n) { if (u8_check(s, n) == NULL) { diff --git a/src/util/sss_utf8.h b/src/util/sss_utf8.h index 37dcff95..b7da7621 100644 --- a/src/util/sss_utf8.h +++ b/src/util/sss_utf8.h @@ -35,6 +35,11 @@ #define ENOMATCH -1 #endif +void sss_utf8_free(void *ptr); + +/* The result must be freed with sss_utf8_free() */ +uint8_t *sss_utf8_tolower(const uint8_t *s, size_t len, size_t *nlen); + bool sss_utf8_check(const uint8_t *s, size_t n); errno_t sss_utf8_case_eq(const uint8_t *s1, const uint8_t *s2); diff --git a/src/util/util.c b/src/util/util.c index b4b1b124..f525c915 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -23,6 +23,7 @@ #include "talloc.h" #include "util/util.h" +#include "util/sss_utf8.h" #include "dhash.h" /* split a string into an allocated array of strings. diff --git a/src/util/util.h b/src/util/util.h index 9a006471..4ff112b7 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -502,4 +502,10 @@ errno_t sss_filter_sanitize(TALLOC_CTX *mem_ctx, char * sss_escape_ip_address(TALLOC_CTX *mem_ctx, int family, const char *addr); +/* from sss_tc_utf8.c */ +char * +sss_tc_utf8_str_tolower(TALLOC_CTX *mem_ctx, const char *s); +uint8_t * +sss_tc_utf8_tolower(TALLOC_CTX *mem_ctx, const uint8_t *s, size_t len, size_t *_nlen); + #endif /* __SSSD_UTIL_H__ */ |