lib/util/charset Move source3/lib/util_unistr.c to the common code.

This file (largely) contains functions to deal with UTF16 strings. Andrew Bartlett Signed-off-by: Andrew Tridgell <tridge@samba.org>
author: Andrew Bartlett <abartlet@samba.org> 2011-04-12 16:31:08 +1000
committer: Andrew Tridgell <tridge@samba.org> 2011-04-13 14:47:07 +1000
commit: 9941dfe9f6532ecbc317685046d74e6f90c41695 (patch)
tree: ab591eead8e1d38e0167f005730a4ac76dd4fa74 /lib
parent: ce2f217bd2402ada76c13bf3c170c8f55752fb11 (diff)
download: samba-9941dfe9f6532ecbc317685046d74e6f90c41695.tar.gz
samba-9941dfe9f6532ecbc317685046d74e6f90c41695.tar.bz2
samba-9941dfe9f6532ecbc317685046d74e6f90c41695.zip
3 files changed, 345 insertions, 1 deletions
diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h
index 16bb9c62fb..3a6e6a3216 100644
--- a/lib/util/charset/charset.h
+++ b/lib/util/charset/charset.h
@@ -240,6 +240,26 @@ void load_case_tables(void);
 void load_case_tables_library(void);
 bool smb_register_charset(const struct charset_functions *funcs_in);
 
+/* The following definitions come from util_unistr_w.c  */
+
+size_t strlen_w(const smb_ucs2_t *src);
+size_t strnlen_w(const smb_ucs2_t *src, size_t max);
+smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c);
+smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c);
+smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c);
+smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n);
+smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins);
+bool strlower_w(smb_ucs2_t *s);
+bool strupper_w(smb_ucs2_t *s);
+int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b);
+int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b);
+int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len);
+int strcmp_wa(const smb_ucs2_t *a, const char *b);
+int toupper_ascii(int c);
+int tolower_ascii(int c);
+int isupper_ascii(int c);
+int islower_ascii(int c);
+
 /*
  *   Define stub for charset module which implements 8-bit encoding with gaps.
  *   Encoding tables for such module should be produced from glibc's CHARMAPs
diff --git a/lib/util/charset/util_unistr_w.c b/lib/util/charset/util_unistr_w.c
new file mode 100644
index 0000000000..a550e52776
--- /dev/null
+++ b/lib/util/charset/util_unistr_w.c
@@ -0,0 +1,324 @@
+/*
+   Unix SMB/CIFS implementation.
+   Samba utility functions
+   Copyright (C) Andrew Tridgell 1992-2001
+   Copyright (C) Simo Sorce 2001
+   Copyright (C) Jeremy Allison 2005
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+
+/* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */
+#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\
+				((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest))
+
+
+/* return an ascii version of a ucs2 character */
+#define UCS2_TO_CHAR(c) (((c) >> UCS2_SHIFT) & 0xff)
+
+static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len);
+
+/*******************************************************************
+ Count the number of two-byte pairs in a UTF16 string.
+********************************************************************/
+
+size_t strlen_w(const smb_ucs2_t *src)
+{
+	size_t len;
+	smb_ucs2_t c;
+
+	for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
+		;
+	}
+
+	return len;
+}
+
+/*******************************************************************
+ Count up to max number of characters in a smb_ucs2_t string.
+********************************************************************/
+
+size_t strnlen_w(const smb_ucs2_t *src, size_t max)
+{
+	size_t len;
+	smb_ucs2_t c;
+
+	for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
+		;
+	}
+
+	return len;
+}
+
+/*******************************************************************
+ Wide strchr().
+********************************************************************/
+
+smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
+{
+	smb_ucs2_t cp;
+	while (*(COPY_UCS2_CHAR(&cp,s))) {
+		if (c == cp) {
+			return (smb_ucs2_t *)s;
+		}
+		s++;
+	}
+	if (c == cp) {
+		return (smb_ucs2_t *)s;
+	}
+
+	return NULL;
+}
+
+smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
+{
+	return strchr_w(s, UCS2_CHAR(c));
+}
+
+/*******************************************************************
+ Wide strrchr().
+********************************************************************/
+
+smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
+{
+	smb_ucs2_t cp;
+	const smb_ucs2_t *p = s;
+	int len = strlen_w(s);
+
+	if (len == 0) {
+		return NULL;
+	}
+	p += (len - 1);
+	do {
+		if (c == *(COPY_UCS2_CHAR(&cp,p))) {
+			return (smb_ucs2_t *)p;
+		}
+	} while (p-- != s);
+	return NULL;
+}
+
+/*******************************************************************
+ Wide version of strrchr that returns after doing strrchr 'n' times.
+********************************************************************/
+
+smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
+{
+	smb_ucs2_t cp;
+	const smb_ucs2_t *p = s;
+	int len = strlen_w(s);
+
+	if (len == 0 || !n) {
+		return NULL;
+	}
+	p += (len - 1);
+	do {
+		if (c == *(COPY_UCS2_CHAR(&cp,p))) {
+			n--;
+		}
+
+		if (!n) {
+			return (smb_ucs2_t *)p;
+		}
+	} while (p-- != s);
+	return NULL;
+}
+
+/*******************************************************************
+ Wide strstr().
+********************************************************************/
+
+smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
+{
+	smb_ucs2_t *r;
+	size_t inslen;
+
+	if (!s || !*s || !ins || !*ins) {
+		return NULL;
+	}
+
+	inslen = strlen_w(ins);
+	r = (smb_ucs2_t *)s;
+
+	while ((r = strchr_w(r, *ins))) {
+		if (strncmp_w(r, ins, inslen) == 0) {
+			return r;
+		}
+		r++;
+	}
+
+	return NULL;
+}
+
+/*******************************************************************
+ Convert a string to lower case.
+ return True if any char is converted
+
+ This is unsafe for any string involving a UTF16 character
+********************************************************************/
+
+bool strlower_w(smb_ucs2_t *s)
+{
+	smb_ucs2_t cp;
+	bool ret = false;
+
+	while (*(COPY_UCS2_CHAR(&cp,s))) {
+		smb_ucs2_t v = tolower_m(cp);
+		if (v != cp) {
+			COPY_UCS2_CHAR(s,&v);
+			ret = true;
+		}
+		s++;
+	}
+	return ret;
+}
+
+/*******************************************************************
+ Convert a string to upper case.
+ return True if any char is converted
+
+ This is unsafe for any string involving a UTF16 character
+********************************************************************/
+
+bool strupper_w(smb_ucs2_t *s)
+{
+	smb_ucs2_t cp;
+	bool ret = false;
+	while (*(COPY_UCS2_CHAR(&cp,s))) {
+		smb_ucs2_t v = toupper_m(cp);
+		if (v != cp) {
+			COPY_UCS2_CHAR(s,&v);
+			ret = true;
+		}
+		s++;
+	}
+	return ret;
+}
+
+int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
+{
+	smb_ucs2_t cpa, cpb;
+
+	while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
+		a++;
+		b++;
+	}
+	return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b)));
+	/* warning: if *a != *b and both are not 0 we return a random
+		greater or lesser than 0 number not realted to which
+		string is longer */
+}
+
+static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
+{
+	smb_ucs2_t cpa, cpb;
+	size_t n = 0;
+
+	while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
+		a++;
+		b++;
+		n++;
+	}
+	return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
+}
+
+/*******************************************************************
+ Case insensitive string comparison.
+********************************************************************/
+
+int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
+{
+	smb_ucs2_t cpa, cpb;
+
+	while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
+		a++;
+		b++;
+	}
+	return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
+}
+
+/*******************************************************************
+ Case insensitive string comparison, length limited.
+********************************************************************/
+
+int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
+{
+	smb_ucs2_t cpa, cpb;
+	size_t n = 0;
+
+	while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
+		a++;
+		b++;
+		n++;
+	}
+	return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
+}
+
+/*
+  The *_wa() functions take a combination of 7 bit ascii
+  and wide characters They are used so that you can use string
+  functions combining C string constants with ucs2 strings
+
+  The char* arguments must NOT be multibyte - to be completely sure
+  of this only pass string constants */
+
+int strcmp_wa(const smb_ucs2_t *a, const char *b)
+{
+	smb_ucs2_t cp = 0;
+
+	while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
+		a++;
+		b++;
+	}
+	return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b));
+}
+
+/*************************************************************
+ ascii only toupper - saves the need for smbd to be in C locale.
+*************************************************************/
+
+int toupper_ascii(int c)
+{
+	smb_ucs2_t uc = toupper_m(UCS2_CHAR(c));
+	return UCS2_TO_CHAR(uc);
+}
+
+/*************************************************************
+ ascii only tolower - saves the need for smbd to be in C locale.
+*************************************************************/
+
+int tolower_ascii(int c)
+{
+	smb_ucs2_t uc = tolower_m(UCS2_CHAR(c));
+	return UCS2_TO_CHAR(uc);
+}
+
+/*************************************************************
+ ascii only isupper - saves the need for smbd to be in C locale.
+*************************************************************/
+
+int isupper_ascii(int c)
+{
+	return isupper_m(UCS2_CHAR(c));
+}
+
+/*************************************************************
+ ascii only islower - saves the need for smbd to be in C locale.
+*************************************************************/
+
+int islower_ascii(int c)
+{
+	return islower_m(UCS2_CHAR(c));
+}
diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build
index a245ef1b0c..29e168dce1 100644
--- a/lib/util/charset/wscript_build
+++ b/lib/util/charset/wscript_build
@@ -13,6 +13,6 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER',
                     public_deps='iconv replace talloc')
 
 bld.SAMBA_SUBSYSTEM('CODEPOINTS',
-	source='codepoints.c util_str.c',
+	source='codepoints.c util_str.c util_unistr_w.c',
 	deps='DYNCONFIG ICONV_WRAPPER'
 	)
author	Andrew Bartlett <abartlet@samba.org>	2011-04-12 16:31:08 +1000
committer	Andrew Tridgell <tridge@samba.org>	2011-04-13 14:47:07 +1000
commit	9941dfe9f6532ecbc317685046d74e6f90c41695 (patch)
tree	ab591eead8e1d38e0167f005730a4ac76dd4fa74 /lib
parent	ce2f217bd2402ada76c13bf3c170c8f55752fb11 (diff)
download	samba-9941dfe9f6532ecbc317685046d74e6f90c41695.tar.gz samba-9941dfe9f6532ecbc317685046d74e6f90c41695.tar.bz2 samba-9941dfe9f6532ecbc317685046d74e6f90c41695.zip