summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Hrozek <jhrozek@redhat.com>2011-12-06 14:57:58 +0100
committerStephen Gallagher <sgallagh@redhat.com>2011-12-16 14:46:16 -0500
commitdbea04f585a30d001b574317c068cd03a4fa332b (patch)
treef192eea6f1777f8442b625485f788bf2b411b5bc
parentf7255e5d207800eb86003ec926951c0a6397aa04 (diff)
downloadsssd-dbea04f585a30d001b574317c068cd03a4fa332b.tar.gz
sssd-dbea04f585a30d001b574317c068cd03a4fa332b.tar.bz2
sssd-dbea04f585a30d001b574317c068cd03a4fa332b.zip
sss_utf8_tolower utility function+unit tests
-rw-r--r--Makefile.am3
-rw-r--r--src/tests/util-tests.c96
-rw-r--r--src/util/sss_tc_utf8.c57
-rw-r--r--src/util/sss_utf8.c54
-rw-r--r--src/util/sss_utf8.h5
-rw-r--r--src/util/util.c1
-rw-r--r--src/util/util.h6
7 files changed, 221 insertions, 1 deletions
diff --git a/Makefile.am b/Makefile.am
index 8d75b876..b8fb9e09 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -382,7 +382,8 @@ libsss_util_la_SOURCES = \
src/util/strtonum.c \
src/util/check_and_open.c \
src/util/refcount.c \
- src/util/sss_utf8.c
+ src/util/sss_utf8.c \
+ src/util/sss_tc_utf8.c
libsss_util_la_LIBADD = \
$(SSSD_LIBS) \
$(UNICODE_LIBS) \
diff --git a/src/tests/util-tests.c b/src/tests/util-tests.c
index 52d30277..33406dac 100644
--- a/src/tests/util-tests.c
+++ b/src/tests/util-tests.c
@@ -26,6 +26,7 @@
#include <talloc.h>
#include <check.h>
#include "util/util.h"
+#include "util/sss_utf8.h"
#include "tests/common.h"
START_TEST(test_parse_args)
@@ -312,6 +313,91 @@ START_TEST(test_size_t_overflow)
}
END_TEST
+START_TEST(test_utf8_lowercase)
+{
+ const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 };
+ const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 };
+ uint8_t *lcase;
+ size_t nlen;
+
+ lcase = sss_utf8_tolower(munchen_utf8_upcase,
+ strlen((const char *)munchen_utf8_upcase),
+ &nlen);
+ fail_if(strlen((const char *) munchen_utf8_upcase) != nlen); /* This is not true for utf8 strings in general */
+ fail_if(memcmp(lcase, munchen_utf8_lowcase, nlen));
+ sss_utf8_free(lcase);
+}
+END_TEST
+
+START_TEST(test_utf8_talloc_lowercase)
+{
+ const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 };
+ const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 };
+ uint8_t *lcase;
+ size_t nsize;
+
+ TALLOC_CTX *test_ctx;
+ test_ctx = talloc_new(NULL);
+ fail_if(test_ctx == NULL);
+
+ lcase = sss_tc_utf8_tolower(test_ctx, munchen_utf8_upcase,
+ strlen((const char *) munchen_utf8_upcase),
+ &nsize);
+ fail_if(memcmp(lcase, munchen_utf8_lowcase, nsize));
+ talloc_free(test_ctx);
+}
+END_TEST
+
+START_TEST(test_utf8_talloc_str_lowercase)
+{
+ const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 };
+ const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 };
+ char *lcase;
+
+ TALLOC_CTX *test_ctx;
+ test_ctx = talloc_new(NULL);
+ fail_if(test_ctx == NULL);
+
+ lcase = sss_tc_utf8_str_tolower(test_ctx, (const char *) munchen_utf8_upcase);
+ fail_if(memcmp(lcase, munchen_utf8_lowcase, strlen(lcase)));
+ talloc_free(test_ctx);
+}
+END_TEST
+
+START_TEST(test_utf8_caseeq)
+{
+ const uint8_t munchen_utf8_upcase[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 };
+ const uint8_t munchen_utf8_lowcase[] = { 'm', 0xC3, 0xBC, 'n', 'c', 'h', 'e', 'n', 0x0 };
+ const uint8_t czech_utf8_lowcase[] = { 0xC4, 0x8D, 'e', 'c', 'h', 0x0 };
+ const uint8_t czech_utf8_upcase[] = { 0xC4, 0x8C, 'e', 'c', 'h', 0x0 };
+ const uint8_t czech_utf8_lowcase_neg[] = { 0xC4, 0x8E, 'e', 'c', 'h', 0x0 };
+ errno_t ret;
+
+ ret = sss_utf8_case_eq(munchen_utf8_upcase, munchen_utf8_lowcase);
+ fail_unless(ret == EOK, "Latin 1 Supplement comparison failed\n");
+
+ ret = sss_utf8_case_eq(czech_utf8_upcase, czech_utf8_lowcase);
+ fail_unless(ret == EOK, "Latin Extended A comparison failed\n");
+
+ ret = sss_utf8_case_eq(czech_utf8_upcase, czech_utf8_lowcase_neg);
+ fail_if(ret == EOK, "Negative test succeeded\n");
+}
+END_TEST
+
+START_TEST(test_utf8_check)
+{
+ const char *invalid = "ad\351la\357d";
+ const uint8_t valid[] = { 'M', 0xC3, 0x9C, 'N', 'C', 'H', 'E', 'N', 0x0 };
+ bool ret;
+
+ ret = sss_utf8_check(valid, strlen((const char *) valid));
+ fail_unless(ret == true, "Positive test failed\n");
+
+ ret = sss_utf8_check((const uint8_t *) invalid, strlen(invalid));
+ fail_unless(ret == false, "Negative test succeeded\n");
+}
+END_TEST
+
Suite *util_suite(void)
{
Suite *s = suite_create("util");
@@ -324,7 +410,17 @@ Suite *util_suite(void)
tcase_add_test (tc_util, test_parse_args);
tcase_set_timeout(tc_util, 60);
+ TCase *tc_utf8 = tcase_create("utf8");
+ tcase_add_test (tc_util, test_utf8_lowercase);
+ tcase_add_test (tc_util, test_utf8_talloc_lowercase);
+ tcase_add_test (tc_util, test_utf8_talloc_str_lowercase);
+ tcase_add_test (tc_util, test_utf8_caseeq);
+ tcase_add_test (tc_util, test_utf8_check);
+
+ tcase_set_timeout(tc_utf8, 60);
+
suite_add_tcase (s, tc_util);
+ suite_add_tcase (s, tc_utf8);
return s;
}
diff --git a/src/util/sss_tc_utf8.c b/src/util/sss_tc_utf8.c
new file mode 100644
index 00000000..6a976211
--- /dev/null
+++ b/src/util/sss_tc_utf8.c
@@ -0,0 +1,57 @@
+/*
+ Authors:
+ Jakub Hrozek <jhrozek@redhat.com>
+
+ Copyright (C) 2011 Red Hat
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <talloc.h>
+#include "util/sss_utf8.h"
+
+char *
+sss_tc_utf8_str_tolower(TALLOC_CTX *mem_ctx, const char *s)
+{
+ size_t nlen;
+ uint8_t *ret;
+
+ ret = sss_tc_utf8_tolower(mem_ctx, (const uint8_t *) s, strlen(s), &nlen);
+ if (!ret) return NULL;
+
+ ret = talloc_realloc(mem_ctx, ret, uint8_t, nlen+1);
+ if (!ret) return NULL;
+
+ ret[nlen] = '\0';
+ return (char *) ret;
+}
+
+uint8_t *
+sss_tc_utf8_tolower(TALLOC_CTX *mem_ctx, const uint8_t *s, size_t len, size_t *_nlen)
+{
+ uint8_t *lower;
+ uint8_t *ret;
+ size_t nlen;
+
+ lower = sss_utf8_tolower(s, len, &nlen);
+ if (!lower) return NULL;
+
+ ret = talloc_memdup(mem_ctx, lower, nlen);
+ sss_utf8_free(lower);
+ if (!ret) return NULL;
+
+ *_nlen = nlen;
+ return ret;
+}
+
diff --git a/src/util/sss_utf8.c b/src/util/sss_utf8.c
index 4a98233b..7997a6df 100644
--- a/src/util/sss_utf8.c
+++ b/src/util/sss_utf8.c
@@ -24,6 +24,60 @@
#include "sss_utf8.h"
#ifdef HAVE_LIBUNISTRING
+void sss_utf8_free(void *ptr)
+{
+ return free(ptr);
+}
+#elif HAVE_GLIB2
+void sss_utf8_free(void *ptr)
+{
+ return g_free(ptr);
+}
+#else
+#error No unicode library
+#endif
+
+#ifdef HAVE_LIBUNISTRING
+uint8_t *sss_utf8_tolower(const uint8_t *s, size_t len, size_t *_nlen)
+{
+ size_t llen;
+ uint8_t *lower;
+
+ lower = u8_tolower(s, len, NULL, NULL, NULL, &llen);
+ if (!lower) return NULL;
+
+ if (_nlen) *_nlen = llen;
+ return lower;
+}
+#elif HAVE_GLIB2
+uint8_t *sss_utf8_tolower(const uint8_t *s, size_t len, size_t *_nlen)
+{
+ gchar *glower;
+ size_t nlen;
+ uint8_t *lower;
+
+ glower = g_utf8_strdown((const gchar *) s, len);
+ if (!glower) return NULL;
+
+ /* strlen() is safe here because g_utf8_strdown() always null-terminates */
+ nlen = strlen(glower);
+
+ lower = g_malloc(nlen);
+ if (!lower) {
+ g_free(glower);
+ return NULL;
+ }
+
+ memcpy(lower, glower, nlen);
+ g_free(glower);
+ if (_nlen) *_nlen = nlen;
+ return (uint8_t *) lower;
+}
+#else
+#error No unicode library
+#endif
+
+#ifdef HAVE_LIBUNISTRING
bool sss_utf8_check(const uint8_t *s, size_t n)
{
if (u8_check(s, n) == NULL) {
diff --git a/src/util/sss_utf8.h b/src/util/sss_utf8.h
index 37dcff95..b7da7621 100644
--- a/src/util/sss_utf8.h
+++ b/src/util/sss_utf8.h
@@ -35,6 +35,11 @@
#define ENOMATCH -1
#endif
+void sss_utf8_free(void *ptr);
+
+/* The result must be freed with sss_utf8_free() */
+uint8_t *sss_utf8_tolower(const uint8_t *s, size_t len, size_t *nlen);
+
bool sss_utf8_check(const uint8_t *s, size_t n);
errno_t sss_utf8_case_eq(const uint8_t *s1, const uint8_t *s2);
diff --git a/src/util/util.c b/src/util/util.c
index b4b1b124..f525c915 100644
--- a/src/util/util.c
+++ b/src/util/util.c
@@ -23,6 +23,7 @@
#include "talloc.h"
#include "util/util.h"
+#include "util/sss_utf8.h"
#include "dhash.h"
/* split a string into an allocated array of strings.
diff --git a/src/util/util.h b/src/util/util.h
index 9a006471..4ff112b7 100644
--- a/src/util/util.h
+++ b/src/util/util.h
@@ -502,4 +502,10 @@ errno_t sss_filter_sanitize(TALLOC_CTX *mem_ctx,
char *
sss_escape_ip_address(TALLOC_CTX *mem_ctx, int family, const char *addr);
+/* from sss_tc_utf8.c */
+char *
+sss_tc_utf8_str_tolower(TALLOC_CTX *mem_ctx, const char *s);
+uint8_t *
+sss_tc_utf8_tolower(TALLOC_CTX *mem_ctx, const uint8_t *s, size_t len, size_t *_nlen);
+
#endif /* __SSSD_UTIL_H__ */