From 8177fc778b02d9f61ef482fc60d32f353be77ba4 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Tue, 21 Dec 1999 23:14:01 +0000
Subject: Added new unicode functions - not used yet, but are the basis for the
 internal unicode conversion of Samba. Jeremy. (This used to be commit
 302412df64aa4b6572b13ef61dfd68c3f8ebbb8b)

---
 source3/lib/util_unistr.c | 210 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 209 insertions(+), 1 deletion(-)

(limited to 'source3/lib/util_unistr.c')

diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 185e7c3547..fca9d8bfda 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -345,8 +345,9 @@ int unistrcpy(char *dst, char *src)
 
 
 /*******************************************************************
- free any existing maps
+ Free any existing maps.
 ********************************************************************/
+
 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 {
 	/* this handles identity mappings where we share the pointer */
@@ -556,3 +557,210 @@ BOOL load_unix_unicode_map(const char *unix_char_set)
   strupper(upper_unix_char_set);
   return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
 }
+
+/*******************************************************************
+ The following functions reproduce many of the non-UNICODE standard
+ string functions in Samba.
+********************************************************************/
+
+/*******************************************************************
+ Convert a UNICODE string to multibyte format. Note that the 'src' is in
+ native byte order, not little endian. Always zero terminates.
+ dst_len is in bytes.
+********************************************************************/
+
+static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
+                                  size_t dst_len, const uint16 *ucs2_to_cp)
+{
+	size_t i;
+
+	for(i = 0; (i < (dst_len  - 1)) && src[i];) {
+		smb_ucs2_t val = ucs2_to_cp[*src];
+		if(val < 256) {
+			dst[i++] = (char)val;
+		} else if (i < (dst_len  - 2)) {
+
+			/*
+			 * A 2 byte value is always written as
+			 * high/low into the buffer stream.
+			 */
+
+			dst[i++] = (char)((val >> 8) & 0xff);
+			dst[i++] = (char)(val & 0xff);
+		}
+	} 	
+
+	dst[i] = '\0';
+
+	return dst;
+}
+
+/*******************************************************************
+ Convert a multibyte string to UNICODE format. Note that the 'dst' is in
+ native byte order, not little endian. Always zero terminates.
+ dst_len is in bytes.
+********************************************************************/
+
+smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
+                                 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
+{
+	size_t i;
+
+	dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
+
+	for(i = 0; (i < (dst_len  - 1)) && src[i];) {
+		size_t skip = skip_multibyte_char(*src);
+		smb_ucs2_t val = (*src & 0xff);
+
+		/*
+		 * If this is a multibyte character
+		 * then work out the index value for the unicode conversion.
+		 */
+
+		if (skip == 2)
+			val = ((val << 8) | (src[1] & 0xff));
+
+		dst[i++] = cp_to_ucs2[val];
+		if (skip)
+			src += skip;
+		else
+			src++;
+	}
+
+	dst[i] = 0;
+
+	return dst;
+}
+
+/*******************************************************************
+ Convert a UNICODE string to multibyte format. Note that the 'src' is in
+ native byte order, not little endian. Always zero terminates.
+ This function may be replaced if the MB  codepage format is an
+ encoded one (ie. utf8, hex). See the code in lib/kanji.c
+ for details. dst_len is in bytes.
+********************************************************************/
+
+char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
+{
+	return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
+}
+
+/*******************************************************************
+ Convert a UNIX string to UNICODE format. Note that the 'dst' is in
+ native byte order, not little endian. Always zero terminates.
+ This function may be replaced if the UNIX codepage format is a
+ multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
+ for details. dst_len is in bytes, not ucs2 units.
+********************************************************************/
+
+smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
+{
+	return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
+}
+
+/*******************************************************************
+ Convert a UNICODE string to DOS format. Note that the 'src' is in
+ native byte order, not little endian. Always zero terminates. 
+ dst_len is in bytes.
+********************************************************************/ 
+
+char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
+{
+	return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
+}
+
+/*******************************************************************
+ Convert a DOS string to UNICODE format. Note that the 'dst' is in
+ native byte order, not little endian. Always zero terminates.
+ This function may be replaced if the DOS codepage format is a
+ multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
+ for details. dst_len is in bytes, not ucs2 units.
+********************************************************************/
+
+smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
+{
+	return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
+}
+
+/*******************************************************************
+ Count the number of characters in a smb_ucs2_t string.
+********************************************************************/
+
+size_t wstrlen(const smb_ucs2_t *src)
+{
+  size_t len;
+
+  for(len = 0; *src; len++)
+    ;
+
+  return len;
+}
+
+/*******************************************************************
+ Safe wstring copy into a known length string. maxlength includes
+ the terminating zero. maxlength is in bytes.
+********************************************************************/
+
+smb_ucs2_t *safe_wstrcpy(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
+{
+    size_t ucs2_len;
+
+    if (!dest) {
+        DEBUG(0,("ERROR: NULL dest in safe_wstrcpy\n"));
+        return NULL;
+    }
+
+    if (!src) {
+        *dest = 0;
+        return dest;
+    }
+
+	ucs2_len = wstrlen(src);
+
+    if (ucs2_len >= (maxlength/sizeof(smb_ucs2_t))) {
+		fstring out;
+        DEBUG(0,("ERROR: string overflow by %u bytes in safe_wstrcpy [%.50s]\n",
+			(unsigned int)((ucs2_len*sizeof(smb_ucs2_t))-maxlength),
+			unicode_to_unix(out,src,sizeof(out))) );
+		ucs2_len = (maxlength/sizeof(smb_ucs2_t)) - 1;
+    }
+
+    memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
+    dest[ucs2_len] = 0;
+    return dest;
+}
+
+/*******************************************************************
+ Safe string cat into a string. maxlength includes the terminating zero.
+ maxlength is in bytes.
+********************************************************************/
+
+smb_ucs2_t *safe_wstrcat(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
+{
+    size_t ucs2_src_len, ucs2_dest_len;
+
+    if (!dest) {
+        DEBUG(0,("ERROR: NULL dest in safe_wstrcat\n"));
+        return NULL;
+    }
+
+    if (!src) {
+        return dest;
+    }
+
+    ucs2_src_len = wstrlen(src);
+    ucs2_dest_len = wstrlen(dest);
+
+    if (ucs2_src_len + ucs2_dest_len >= (maxlength/sizeof(smb_ucs2_t))) {
+		fstring out;
+		int new_len = (maxlength/sizeof(smb_ucs2_t)) - ucs2_dest_len - 1;
+        DEBUG(0,("ERROR: string overflow by %u characters in safe_wstrcat [%.50s]\n",
+			(unsigned int)((sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len)) - maxlength),
+			unicode_to_unix(out,src,sizeof(out))) );
+        ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
+    }
+
+    memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
+    dest[ucs2_dest_len + ucs2_src_len] = 0;
+    return dest;
+}
-- 
cgit