1 files changed, 445 insertions, 194 deletions
diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 6f90528bf4..aee2a21fc0 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -21,287 +21,538 @@
 
 #include "includes.h"
 
-/*******************************************************************
- Put an ASCII string into a UNICODE buffer (little endian).
- ********************************************************************/
+extern int DEBUGLEVEL;
 
-char *ascii_to_unibuf(char *dest, const char *src, int maxlen)
-{
-	char *destend = dest + maxlen;
-	register char c;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		if (c == 0)
-		{
-			break;
-		}
+/*
+ * The following are the codepage to ucs2 and vica versa maps.
+ * These are dynamically loaded from a unicode translation file.
+ */
 
-		*(dest++) = c;
-		*(dest++) = 0;
-	}
+static smb_ucs2_t *doscp_to_ucs2;
+static uint16 *ucs2_to_doscp;
 
-	*dest++ = 0;
-	*dest++ = 0;
-	return dest;
-}
+static smb_ucs2_t *unixcp_to_ucs2;
+static uint16 *ucs2_to_unixcp;
 
+#ifndef MAXUNI
+#define MAXUNI 1024
+#endif
 
 /*******************************************************************
- Pull an ASCII string out of a UNICODE buffer (little endian).
- ********************************************************************/
+ Write a string in (little-endian) unicode format. src is in
+ the current DOS codepage. len is the length in bytes of the
+ string pointed to by dst.
 
-const char* unibuf_to_ascii(char *dest, const char *src, int maxlen)
-{
-	char *destend = dest + maxlen;
-	register char c;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		if ((c == 0) && (*src == 0))
-		{
-			break;
-		}
+ the return value is the length of the string *without* the trailing 
+ two bytes of zero
+********************************************************************/
 
-		*dest++ = c;
-		src++;
+int dos_PutUniCode(char *dst,const char *src, ssize_t len)
+{
+	int ret = 0;
+	while (*src && (len > 2)) {
+		size_t skip = skip_multibyte_char(*src);
+		smb_ucs2_t val = (*src & 0xff);
+
+		/*
+		 * If this is a multibyte character (and all DOS/Windows
+		 * codepages have at maximum 2 byte multibyte characters)
+		 * then work out the index value for the unicode conversion.
+		 */
+
+		if (skip == 2)
+			val = ((val << 8) | src[1]);
+
+		SSVAL(dst,ret,doscp_to_ucs2[val]);
+		ret += 2;
+		len -= 2;
+		if (skip)
+			src += skip;
+		else
+			src++;
 	}
+	SSVAL(dst,ret,0);
+	return(ret);
+}
 
-	*dest = 0;
+/*******************************************************************
+ Skip past some unicode strings in a buffer.
+********************************************************************/
 
-	return src;
+char *skip_unicode_string(char *buf,int n)
+{
+	while (n--) {
+		while (*buf)
+			buf += 2;
+		buf += 2;
+	}
+	return(buf);
 }
 
-
 /*******************************************************************
- Put an ASCII string into a UNICODE array (uint16's).
- ********************************************************************/
+ Return a DOS codepage version of a little-endian unicode string.
+ Hack alert: uses fixed buffer(s).
+********************************************************************/
 
-void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
+char *dos_unistrn2(uint16 *src, int len)
 {
-	uint16 *destend = dest + maxlen;
-	register char c;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		if (c == 0)
-		{
-			break;
+	static char lbufs[8][MAXUNI];
+	static int nexti;
+	char *lbuf = lbufs[nexti];
+	char *p;
+
+	nexti = (nexti+1)%8;
+
+	for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
+		uint16 ucs2_val = SVAL(src,0);
+		uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+		if (cp_val < 256)
+			*p++ = (char)cp_val;
+		else {
+			*p++ = (cp_val >> 8) & 0xff;
+			*p++ = (cp_val & 0xff);
 		}
-
-		*(dest++) = (uint16)c;
 	}
 
-	*dest = 0;
+	*p = 0;
+	return lbuf;
 }
 
+static char lbufs[8][MAXUNI];
+static int nexti;
 
 /*******************************************************************
- Pull an ASCII string out of a UNICODE array (uint16's).
- ********************************************************************/
+ Return a DOS codepage version of a little-endian unicode string.
+ Hack alert: uses fixed buffer(s).
+********************************************************************/
 
-void unistr_to_ascii(char *dest, const uint16 *src, int len)
+char *dos_unistr2(uint16 *src)
 {
-	char *destend = dest + len;
-	register uint16 c;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		if (c == 0)
-		{
-			break;
-		}
+	char *lbuf = lbufs[nexti];
+	char *p;
+
+	nexti = (nexti+1)%8;
+
+	for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) {
+		uint16 ucs2_val = SVAL(src,0);
+		uint16 cp_val = ucs2_to_doscp[ucs2_val];
 
-		*(dest++) = (char)c;
+		if (cp_val < 256)
+			*p++ = (char)cp_val;
+		else {
+			*p++ = (cp_val >> 8) & 0xff;
+			*p++ = (cp_val & 0xff);
+		}
 	}
 
-	*dest = 0;
+	*p = 0;
+	return lbuf;
 }
 
-
 /*******************************************************************
- Convert a UNISTR2 structure to an ASCII string
- ********************************************************************/
+Return a DOS codepage version of a little-endian unicode string
+********************************************************************/
 
-void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
+char *dos_unistr2_to_str(UNISTR2 *str)
 {
-	char *destend;
-	const uint16 *src;
-	size_t len;
-	register uint16 c;
-
-	src = str->buffer;
-	len = MIN(str->uni_str_len, maxlen);
-	destend = dest + len;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		if (c == 0)
-		{
-			break;
+	char *lbuf = lbufs[nexti];
+	char *p;
+	uint16 *src = str->buffer;
+	int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len);
+
+	nexti = (nexti+1)%8;
+
+	for (p = lbuf; *src && p-lbuf < max_size; src++) {
+		uint16 ucs2_val = SVAL(src,0);
+		uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+		if (cp_val < 256)
+			*p++ = (char)cp_val;
+		else {
+			*p++ = (cp_val >> 8) & 0xff;
+			*p++ = (cp_val & 0xff);
 		}
-
-		*(dest++) = (char)c;
 	}
 
-	*dest = 0;
+	*p = 0;
+	return lbuf;
 }
 
-
 /*******************************************************************
- Skip a UNICODE string in a little endian buffer.
- ********************************************************************/
+Return a number stored in a buffer
+********************************************************************/
 
-char *skip_unibuf(char *srcbuf, int len)
+uint32 buffer2_to_uint32(BUFFER2 *str)
 {
-	uint16 *src = (uint16 *)srcbuf;
-	uint16 *srcend = src + len/2;
+	if (str->buf_len == 4)
+		return IVAL(str->buffer, 0);
+	else
+		return 0;
+}
 
-	while ((src < srcend) && (*(src++) != 0))
-	{
+/*******************************************************************
+Return a DOS codepage version of a NOTunicode string
+********************************************************************/
+
+char *dos_buffer2_to_str(BUFFER2 *str)
+{
+	char *lbuf = lbufs[nexti];
+	char *p;
+	uint16 *src = str->buffer;
+	int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
+
+	nexti = (nexti+1)%8;
+
+	for (p = lbuf; *src && p-lbuf < max_size; src++) {
+		uint16 ucs2_val = SVAL(src,0);
+		uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+		if (cp_val < 256)
+			*p++ = (char)cp_val;
+		else {
+			*p++ = (cp_val >> 8) & 0xff;
+			*p++ = (cp_val & 0xff);
+		}
 	}
 
-	return (char *)src;
+	*p = 0;
+	return lbuf;
 }
 
-
 /*******************************************************************
- UNICODE strcpy between buffers.
- ********************************************************************/
+ Return a dos codepage version of a NOTunicode string
+********************************************************************/
 
-char *uni_strncpy(char *destbuf, const char *srcbuf, int len)
+char *dos_buffer2_to_multistr(BUFFER2 *str)
 {
-	const uint16 *src = (const uint16 *)srcbuf;
-	uint16 *dest = (uint16 *)destbuf;
-	uint16 *destend = dest + len/2;
-	register uint16 c;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		if (c == 0)
-		{
-			break;
+	char *lbuf = lbufs[nexti];
+	char *p;
+	uint16 *src = str->buffer;
+	int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
+
+	nexti = (nexti+1)%8;
+
+	for (p = lbuf; p-lbuf < max_size; src++) {
+		if (*src == 0) {
+			*p++ = ' ';
+		} else {
+			uint16 ucs2_val = SVAL(src,0);
+			uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+			if (cp_val < 256)
+				*p++ = (char)cp_val;
+			else {
+				*p++ = (cp_val >> 8) & 0xff;
+				*p++ = (cp_val & 0xff);
+			}
 		}
-
-		*(dest++) = c;
 	}
 
-	*dest++ = 0;
-	return (char *)dest;
+	*p = 0;
+	return lbuf;
 }
 
-
 /*******************************************************************
- Return a number stored in a buffer
- ********************************************************************/
+ Create a null-terminated unicode string from a null-terminated DOS
+ codepage string.
+ Return number of unicode chars copied, excluding the null character.
+ Unicode strings created are in little-endian format.
+********************************************************************/
 
-uint32 buffer2_to_uint32(const BUFFER2 *str)
+size_t dos_struni2(char *dst, const char *src, size_t max_len)
 {
-	if (str->buf_len == 4)
-	{
-		const uchar *src = str->buffer;
-		return IVAL(src, 0);
-	}
-	else
-	{
+	size_t len = 0;
+
+	if (dst == NULL)
 		return 0;
+
+	if (src != NULL) {
+		for (; *src && len < max_len-2; len++, dst +=2) {
+			size_t skip = skip_multibyte_char(*src);
+			smb_ucs2_t val = (*src & 0xff);
+
+			/*
+			 * If this is a multibyte character (and all DOS/Windows
+			 * codepages have at maximum 2 byte multibyte characters)
+			 * then work out the index value for the unicode conversion.
+			 */
+
+			if (skip == 2)
+				val = ((val << 8) | src[1]);
+
+			SSVAL(dst,0,doscp_to_ucs2[val]);
+			if (skip)
+				src += skip;
+			else
+				src++;
+		}
 	}
-}
 
+	SSVAL(dst,0,0);
+
+	return len;
+}
 
 /*******************************************************************
-  Convert a 'multi-string' buffer to space-separated ASCII.
- ********************************************************************/
-void buffer2_to_multistr(char *dest, const BUFFER2 *str, size_t maxlen)
+ Return a DOS codepage version of a little-endian unicode string.
+ Hack alert: uses fixed buffer(s).
+********************************************************************/
+
+char *dos_unistr(char *buf)
 {
-	char *destend;
-	const uchar *src;
-	size_t len;
-	register uint16 c;
-
-	src = str->buffer;
-	len = MIN(str->buf_len/2, maxlen);
-	destend = dest + len;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		*(dest++) = (c == 0) ? ' ' : (char)c;
-		src++;
+	char *lbuf = lbufs[nexti];
+	uint16 *src = (uint16 *)buf;
+	char *p;
+
+	nexti = (nexti+1)%8;
+
+	for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) {
+		uint16 ucs2_val = SVAL(src,0);
+		uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+		if (cp_val < 256)
+			*p++ = (char)cp_val;
+		else {
+			*p++ = (cp_val >> 8) & 0xff;
+			*p++ = (cp_val & 0xff);
+		}
 	}
 
-	*dest = 0;
+	*p = 0;
+	return lbuf;
 }
 
 /*******************************************************************
-  Convert a buffer4 to space-separated ASCII.
- ********************************************************************/
-void buffer4_to_str(char *dest, const BUFFER4 *str, size_t maxlen)
+ Strcpy for unicode strings.  returns length (in num of wide chars)
+********************************************************************/
+
+int unistrcpy(char *dst, char *src)
 {
-	char *destend;
-	const uchar *src;
-	size_t len;
-	register uint16 c;
-
-	src = str->buffer;
-	len = MIN(str->buf_len, maxlen);
-	destend = dest + len;
-
-	while (dest < destend)
-	{
-		c = *(src++);
-		*(dest++) = (char)c;
+	int num_wchars = 0;
+	uint16 *wsrc = (uint16 *)src;
+	uint16 *wdst = (uint16 *)dst;
+
+	while (*wsrc) {
+		*wdst++ = *wsrc++;
+		num_wchars++;
 	}
+	*wdst = 0;
 
-	*dest = 0;
+	return num_wchars;
 }
 
+
+
 /*******************************************************************
-copies a UNISTR2 structure.
+ free any existing maps
 ********************************************************************/
-BOOL copy_unistr2(UNISTR2 *str, const UNISTR2 *from)
+static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 {
-	if (from != NULL)
-	{
-		/* set up string lengths. add one if string is not null-terminated */
-		str->uni_max_len = from->uni_max_len;
-		str->undoc       = from->undoc;
-		str->uni_str_len = from->uni_str_len;
-
-		/* copy the string */
-		memcpy(str->buffer, from->buffer, sizeof(from->buffer));
+	/* this handles identity mappings where we share the pointer */
+	if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
+		*pp_ucs2_to_cp = NULL;
 	}
-	else
-	{
-		str->uni_max_len = 1;
-		str->undoc = 0;
-		str->uni_str_len = 1;
-		str->buffer[0] = 0;
+
+	if (*pp_cp_to_ucs2) {
+		free(*pp_cp_to_ucs2);
+		*pp_cp_to_ucs2 = NULL;
 	}
 
-	return True;
+	if (*pp_ucs2_to_cp) {
+		free(*pp_ucs2_to_cp);
+		*pp_ucs2_to_cp = NULL;
+	}
 }
 
+
 /*******************************************************************
-duplicates a UNISTR2 structure.
+ Build a default (null) codepage to unicode map.
 ********************************************************************/
-UNISTR2 *unistr2_dup(const UNISTR2 *name)
+
+void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 {
-	UNISTR2 *copy = (UNISTR2*)malloc(sizeof(*copy));
-	copy_unistr2(copy, name);
-	return copy;
+  int i;
+
+  free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+  if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
+    DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
+    abort();
+  }
+
+  *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
+  for (i = 0; i < 65536; i++)
+    (*pp_cp_to_ucs2)[i] = i;
 }
 
 /*******************************************************************
-frees a UNISTR2 structure.
+ Load a codepage to unicode and vica-versa map.
 ********************************************************************/
-void unistr2_free(UNISTR2 *name)
+
+BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
 {
-	free(name);
+  pstring unicode_map_file_name;
+  FILE *fp = NULL;
+  SMB_STRUCT_STAT st;
+  smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
+  uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
+  size_t cp_to_ucs2_size;
+  size_t ucs2_to_cp_size;
+  size_t i;
+  size_t size;
+  char buf[UNICODE_MAP_HEADER_SIZE];
+
+  DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
+
+  if (*codepage == '\0')
+    goto clean_and_exit;
+
+  if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) {
+    DEBUG(0,("load_unicode_map: filename too long to load\n"));
+    goto clean_and_exit;
+  }
+
+  pstrcpy(unicode_map_file_name, CODEPAGEDIR);
+  pstrcat(unicode_map_file_name, "/");
+  pstrcat(unicode_map_file_name, "unicode_map.");
+  pstrcat(unicode_map_file_name, codepage);
+
+  if(sys_stat(unicode_map_file_name,&st)!=0) {
+    DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
+              unicode_map_file_name));
+    goto clean_and_exit;
+  }
+
+  size = st.st_size;
+
+  if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
+    DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
+unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
+    goto clean_and_exit;
+  }
+
+  if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
+    DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
+              unicode_map_file_name, strerror(errno)));
+    goto clean_and_exit;
+  }
+
+  if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
+    DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
+              unicode_map_file_name, strerror(errno)));
+    goto clean_and_exit;
+  }
+
+  /* Check the version value */
+  if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
+    DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
+Needed %hu, got %hu.\n",
+          unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
+          SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
+    goto clean_and_exit;
+  }
+
+  /* Check the codepage value */
+  if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
+    DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
+requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
+    goto clean_and_exit;
+  }
+
+  ucs2_to_cp_size = 2*65536;
+  if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
+    /* 
+     * This is a multibyte code page.
+     */
+    cp_to_ucs2_size = 2*65536;
+  } else {
+    /*
+     * Single byte code page.
+     */
+    cp_to_ucs2_size = 2*256;
+  }
+
+  /* 
+   * Free any old translation tables.
+   */
+
+  free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+  if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
+    DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
+    goto clean_and_exit;
+  }
+
+  if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
+    DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
+    goto clean_and_exit;
+  }
+
+  if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
+    DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
+              unicode_map_file_name, strerror(errno)));
+    goto clean_and_exit;
+  }
+
+  if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
+    DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
+              unicode_map_file_name, strerror(errno)));
+    goto clean_and_exit;
+  }
+
+  /*
+   * Now ensure the 16 bit values are in the correct endianness.
+   */
+
+  for (i = 0; i < cp_to_ucs2_size/2; i++)
+    cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
+
+  for (i = 0; i < ucs2_to_cp_size/2; i++)
+    ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
+
+  fclose(fp);
+
+  *pp_cp_to_ucs2 = cp_to_ucs2;
+  *pp_ucs2_to_cp = ucs2_to_cp;
+
+  return True;
+
+clean_and_exit:
+
+  /* pseudo destructor :-) */
+
+  if(fp != NULL)
+    fclose(fp);
+
+  free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+  default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+  return False;
+}
+
+/*******************************************************************
+ Load a dos codepage to unicode and vica-versa map.
+********************************************************************/
+
+BOOL load_dos_unicode_map(int codepage)
+{
+  fstring codepage_str;
+
+  slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
+  return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
+}
+
+/*******************************************************************
+ Load a UNIX codepage to unicode and vica-versa map.
+********************************************************************/
+
+BOOL load_unix_unicode_map(const char *unix_char_set)
+{
+  fstring upper_unix_char_set;
+
+  fstrcpy(upper_unix_char_set, unix_char_set);
+  strupper(upper_unix_char_set);
+  return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
 }