summaryrefslogtreecommitdiff
path: root/source3/lib/util_unistr.c
diff options
context:
space:
mode:
Diffstat (limited to 'source3/lib/util_unistr.c')
-rw-r--r--source3/lib/util_unistr.c639
1 files changed, 445 insertions, 194 deletions
diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 6f90528bf4..aee2a21fc0 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -21,287 +21,538 @@
#include "includes.h"
-/*******************************************************************
- Put an ASCII string into a UNICODE buffer (little endian).
- ********************************************************************/
+extern int DEBUGLEVEL;
-char *ascii_to_unibuf(char *dest, const char *src, int maxlen)
-{
- char *destend = dest + maxlen;
- register char c;
-
- while (dest < destend)
- {
- c = *(src++);
- if (c == 0)
- {
- break;
- }
+/*
+ * The following are the codepage to ucs2 and vica versa maps.
+ * These are dynamically loaded from a unicode translation file.
+ */
- *(dest++) = c;
- *(dest++) = 0;
- }
+static smb_ucs2_t *doscp_to_ucs2;
+static uint16 *ucs2_to_doscp;
- *dest++ = 0;
- *dest++ = 0;
- return dest;
-}
+static smb_ucs2_t *unixcp_to_ucs2;
+static uint16 *ucs2_to_unixcp;
+#ifndef MAXUNI
+#define MAXUNI 1024
+#endif
/*******************************************************************
- Pull an ASCII string out of a UNICODE buffer (little endian).
- ********************************************************************/
+ Write a string in (little-endian) unicode format. src is in
+ the current DOS codepage. len is the length in bytes of the
+ string pointed to by dst.
-const char* unibuf_to_ascii(char *dest, const char *src, int maxlen)
-{
- char *destend = dest + maxlen;
- register char c;
-
- while (dest < destend)
- {
- c = *(src++);
- if ((c == 0) && (*src == 0))
- {
- break;
- }
+ the return value is the length of the string *without* the trailing
+ two bytes of zero
+********************************************************************/
- *dest++ = c;
- src++;
+int dos_PutUniCode(char *dst,const char *src, ssize_t len)
+{
+ int ret = 0;
+ while (*src && (len > 2)) {
+ size_t skip = skip_multibyte_char(*src);
+ smb_ucs2_t val = (*src & 0xff);
+
+ /*
+ * If this is a multibyte character (and all DOS/Windows
+ * codepages have at maximum 2 byte multibyte characters)
+ * then work out the index value for the unicode conversion.
+ */
+
+ if (skip == 2)
+ val = ((val << 8) | src[1]);
+
+ SSVAL(dst,ret,doscp_to_ucs2[val]);
+ ret += 2;
+ len -= 2;
+ if (skip)
+ src += skip;
+ else
+ src++;
}
+ SSVAL(dst,ret,0);
+ return(ret);
+}
- *dest = 0;
+/*******************************************************************
+ Skip past some unicode strings in a buffer.
+********************************************************************/
- return src;
+char *skip_unicode_string(char *buf,int n)
+{
+ while (n--) {
+ while (*buf)
+ buf += 2;
+ buf += 2;
+ }
+ return(buf);
}
-
/*******************************************************************
- Put an ASCII string into a UNICODE array (uint16's).
- ********************************************************************/
+ Return a DOS codepage version of a little-endian unicode string.
+ Hack alert: uses fixed buffer(s).
+********************************************************************/
-void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
+char *dos_unistrn2(uint16 *src, int len)
{
- uint16 *destend = dest + maxlen;
- register char c;
-
- while (dest < destend)
- {
- c = *(src++);
- if (c == 0)
- {
- break;
+ static char lbufs[8][MAXUNI];
+ static int nexti;
+ char *lbuf = lbufs[nexti];
+ char *p;
+
+ nexti = (nexti+1)%8;
+
+ for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
+ uint16 ucs2_val = SVAL(src,0);
+ uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+ if (cp_val < 256)
+ *p++ = (char)cp_val;
+ else {
+ *p++ = (cp_val >> 8) & 0xff;
+ *p++ = (cp_val & 0xff);
}
-
- *(dest++) = (uint16)c;
}
- *dest = 0;
+ *p = 0;
+ return lbuf;
}
+static char lbufs[8][MAXUNI];
+static int nexti;
/*******************************************************************
- Pull an ASCII string out of a UNICODE array (uint16's).
- ********************************************************************/
+ Return a DOS codepage version of a little-endian unicode string.
+ Hack alert: uses fixed buffer(s).
+********************************************************************/
-void unistr_to_ascii(char *dest, const uint16 *src, int len)
+char *dos_unistr2(uint16 *src)
{
- char *destend = dest + len;
- register uint16 c;
-
- while (dest < destend)
- {
- c = *(src++);
- if (c == 0)
- {
- break;
- }
+ char *lbuf = lbufs[nexti];
+ char *p;
+
+ nexti = (nexti+1)%8;
+
+ for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) {
+ uint16 ucs2_val = SVAL(src,0);
+ uint16 cp_val = ucs2_to_doscp[ucs2_val];
- *(dest++) = (char)c;
+ if (cp_val < 256)
+ *p++ = (char)cp_val;
+ else {
+ *p++ = (cp_val >> 8) & 0xff;
+ *p++ = (cp_val & 0xff);
+ }
}
- *dest = 0;
+ *p = 0;
+ return lbuf;
}
-
/*******************************************************************
- Convert a UNISTR2 structure to an ASCII string
- ********************************************************************/
+Return a DOS codepage version of a little-endian unicode string
+********************************************************************/
-void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
+char *dos_unistr2_to_str(UNISTR2 *str)
{
- char *destend;
- const uint16 *src;
- size_t len;
- register uint16 c;
-
- src = str->buffer;
- len = MIN(str->uni_str_len, maxlen);
- destend = dest + len;
-
- while (dest < destend)
- {
- c = *(src++);
- if (c == 0)
- {
- break;
+ char *lbuf = lbufs[nexti];
+ char *p;
+ uint16 *src = str->buffer;
+ int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len);
+
+ nexti = (nexti+1)%8;
+
+ for (p = lbuf; *src && p-lbuf < max_size; src++) {
+ uint16 ucs2_val = SVAL(src,0);
+ uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+ if (cp_val < 256)
+ *p++ = (char)cp_val;
+ else {
+ *p++ = (cp_val >> 8) & 0xff;
+ *p++ = (cp_val & 0xff);
}
-
- *(dest++) = (char)c;
}
- *dest = 0;
+ *p = 0;
+ return lbuf;
}
-
/*******************************************************************
- Skip a UNICODE string in a little endian buffer.
- ********************************************************************/
+Return a number stored in a buffer
+********************************************************************/
-char *skip_unibuf(char *srcbuf, int len)
+uint32 buffer2_to_uint32(BUFFER2 *str)
{
- uint16 *src = (uint16 *)srcbuf;
- uint16 *srcend = src + len/2;
+ if (str->buf_len == 4)
+ return IVAL(str->buffer, 0);
+ else
+ return 0;
+}
- while ((src < srcend) && (*(src++) != 0))
- {
+/*******************************************************************
+Return a DOS codepage version of a NOTunicode string
+********************************************************************/
+
+char *dos_buffer2_to_str(BUFFER2 *str)
+{
+ char *lbuf = lbufs[nexti];
+ char *p;
+ uint16 *src = str->buffer;
+ int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
+
+ nexti = (nexti+1)%8;
+
+ for (p = lbuf; *src && p-lbuf < max_size; src++) {
+ uint16 ucs2_val = SVAL(src,0);
+ uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+ if (cp_val < 256)
+ *p++ = (char)cp_val;
+ else {
+ *p++ = (cp_val >> 8) & 0xff;
+ *p++ = (cp_val & 0xff);
+ }
}
- return (char *)src;
+ *p = 0;
+ return lbuf;
}
-
/*******************************************************************
- UNICODE strcpy between buffers.
- ********************************************************************/
+ Return a dos codepage version of a NOTunicode string
+********************************************************************/
-char *uni_strncpy(char *destbuf, const char *srcbuf, int len)
+char *dos_buffer2_to_multistr(BUFFER2 *str)
{
- const uint16 *src = (const uint16 *)srcbuf;
- uint16 *dest = (uint16 *)destbuf;
- uint16 *destend = dest + len/2;
- register uint16 c;
-
- while (dest < destend)
- {
- c = *(src++);
- if (c == 0)
- {
- break;
+ char *lbuf = lbufs[nexti];
+ char *p;
+ uint16 *src = str->buffer;
+ int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2);
+
+ nexti = (nexti+1)%8;
+
+ for (p = lbuf; p-lbuf < max_size; src++) {
+ if (*src == 0) {
+ *p++ = ' ';
+ } else {
+ uint16 ucs2_val = SVAL(src,0);
+ uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+ if (cp_val < 256)
+ *p++ = (char)cp_val;
+ else {
+ *p++ = (cp_val >> 8) & 0xff;
+ *p++ = (cp_val & 0xff);
+ }
}
-
- *(dest++) = c;
}
- *dest++ = 0;
- return (char *)dest;
+ *p = 0;
+ return lbuf;
}
-
/*******************************************************************
- Return a number stored in a buffer
- ********************************************************************/
+ Create a null-terminated unicode string from a null-terminated DOS
+ codepage string.
+ Return number of unicode chars copied, excluding the null character.
+ Unicode strings created are in little-endian format.
+********************************************************************/
-uint32 buffer2_to_uint32(const BUFFER2 *str)
+size_t dos_struni2(char *dst, const char *src, size_t max_len)
{
- if (str->buf_len == 4)
- {
- const uchar *src = str->buffer;
- return IVAL(src, 0);
- }
- else
- {
+ size_t len = 0;
+
+ if (dst == NULL)
return 0;
+
+ if (src != NULL) {
+ for (; *src && len < max_len-2; len++, dst +=2) {
+ size_t skip = skip_multibyte_char(*src);
+ smb_ucs2_t val = (*src & 0xff);
+
+ /*
+ * If this is a multibyte character (and all DOS/Windows
+ * codepages have at maximum 2 byte multibyte characters)
+ * then work out the index value for the unicode conversion.
+ */
+
+ if (skip == 2)
+ val = ((val << 8) | src[1]);
+
+ SSVAL(dst,0,doscp_to_ucs2[val]);
+ if (skip)
+ src += skip;
+ else
+ src++;
+ }
}
-}
+ SSVAL(dst,0,0);
+
+ return len;
+}
/*******************************************************************
- Convert a 'multi-string' buffer to space-separated ASCII.
- ********************************************************************/
-void buffer2_to_multistr(char *dest, const BUFFER2 *str, size_t maxlen)
+ Return a DOS codepage version of a little-endian unicode string.
+ Hack alert: uses fixed buffer(s).
+********************************************************************/
+
+char *dos_unistr(char *buf)
{
- char *destend;
- const uchar *src;
- size_t len;
- register uint16 c;
-
- src = str->buffer;
- len = MIN(str->buf_len/2, maxlen);
- destend = dest + len;
-
- while (dest < destend)
- {
- c = *(src++);
- *(dest++) = (c == 0) ? ' ' : (char)c;
- src++;
+ char *lbuf = lbufs[nexti];
+ uint16 *src = (uint16 *)buf;
+ char *p;
+
+ nexti = (nexti+1)%8;
+
+ for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) {
+ uint16 ucs2_val = SVAL(src,0);
+ uint16 cp_val = ucs2_to_doscp[ucs2_val];
+
+ if (cp_val < 256)
+ *p++ = (char)cp_val;
+ else {
+ *p++ = (cp_val >> 8) & 0xff;
+ *p++ = (cp_val & 0xff);
+ }
}
- *dest = 0;
+ *p = 0;
+ return lbuf;
}
/*******************************************************************
- Convert a buffer4 to space-separated ASCII.
- ********************************************************************/
-void buffer4_to_str(char *dest, const BUFFER4 *str, size_t maxlen)
+ Strcpy for unicode strings. returns length (in num of wide chars)
+********************************************************************/
+
+int unistrcpy(char *dst, char *src)
{
- char *destend;
- const uchar *src;
- size_t len;
- register uint16 c;
-
- src = str->buffer;
- len = MIN(str->buf_len, maxlen);
- destend = dest + len;
-
- while (dest < destend)
- {
- c = *(src++);
- *(dest++) = (char)c;
+ int num_wchars = 0;
+ uint16 *wsrc = (uint16 *)src;
+ uint16 *wdst = (uint16 *)dst;
+
+ while (*wsrc) {
+ *wdst++ = *wsrc++;
+ num_wchars++;
}
+ *wdst = 0;
- *dest = 0;
+ return num_wchars;
}
+
+
/*******************************************************************
-copies a UNISTR2 structure.
+ free any existing maps
********************************************************************/
-BOOL copy_unistr2(UNISTR2 *str, const UNISTR2 *from)
+static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
{
- if (from != NULL)
- {
- /* set up string lengths. add one if string is not null-terminated */
- str->uni_max_len = from->uni_max_len;
- str->undoc = from->undoc;
- str->uni_str_len = from->uni_str_len;
-
- /* copy the string */
- memcpy(str->buffer, from->buffer, sizeof(from->buffer));
+ /* this handles identity mappings where we share the pointer */
+ if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
+ *pp_ucs2_to_cp = NULL;
}
- else
- {
- str->uni_max_len = 1;
- str->undoc = 0;
- str->uni_str_len = 1;
- str->buffer[0] = 0;
+
+ if (*pp_cp_to_ucs2) {
+ free(*pp_cp_to_ucs2);
+ *pp_cp_to_ucs2 = NULL;
}
- return True;
+ if (*pp_ucs2_to_cp) {
+ free(*pp_ucs2_to_cp);
+ *pp_ucs2_to_cp = NULL;
+ }
}
+
/*******************************************************************
-duplicates a UNISTR2 structure.
+ Build a default (null) codepage to unicode map.
********************************************************************/
-UNISTR2 *unistr2_dup(const UNISTR2 *name)
+
+void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
{
- UNISTR2 *copy = (UNISTR2*)malloc(sizeof(*copy));
- copy_unistr2(copy, name);
- return copy;
+ int i;
+
+ free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+ if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
+ DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
+ abort();
+ }
+
+ *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
+ for (i = 0; i < 65536; i++)
+ (*pp_cp_to_ucs2)[i] = i;
}
/*******************************************************************
-frees a UNISTR2 structure.
+ Load a codepage to unicode and vica-versa map.
********************************************************************/
-void unistr2_free(UNISTR2 *name)
+
+BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
{
- free(name);
+ pstring unicode_map_file_name;
+ FILE *fp = NULL;
+ SMB_STRUCT_STAT st;
+ smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
+ uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
+ size_t cp_to_ucs2_size;
+ size_t ucs2_to_cp_size;
+ size_t i;
+ size_t size;
+ char buf[UNICODE_MAP_HEADER_SIZE];
+
+ DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
+
+ if (*codepage == '\0')
+ goto clean_and_exit;
+
+ if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) {
+ DEBUG(0,("load_unicode_map: filename too long to load\n"));
+ goto clean_and_exit;
+ }
+
+ pstrcpy(unicode_map_file_name, CODEPAGEDIR);
+ pstrcat(unicode_map_file_name, "/");
+ pstrcat(unicode_map_file_name, "unicode_map.");
+ pstrcat(unicode_map_file_name, codepage);
+
+ if(sys_stat(unicode_map_file_name,&st)!=0) {
+ DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
+ unicode_map_file_name));
+ goto clean_and_exit;
+ }
+
+ size = st.st_size;
+
+ if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
+ DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
+unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
+ goto clean_and_exit;
+ }
+
+ if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
+ DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
+ unicode_map_file_name, strerror(errno)));
+ goto clean_and_exit;
+ }
+
+ if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
+ DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
+ unicode_map_file_name, strerror(errno)));
+ goto clean_and_exit;
+ }
+
+ /* Check the version value */
+ if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
+ DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
+Needed %hu, got %hu.\n",
+ unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
+ SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
+ goto clean_and_exit;
+ }
+
+ /* Check the codepage value */
+ if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
+ DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
+requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
+ goto clean_and_exit;
+ }
+
+ ucs2_to_cp_size = 2*65536;
+ if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
+ /*
+ * This is a multibyte code page.
+ */
+ cp_to_ucs2_size = 2*65536;
+ } else {
+ /*
+ * Single byte code page.
+ */
+ cp_to_ucs2_size = 2*256;
+ }
+
+ /*
+ * Free any old translation tables.
+ */
+
+ free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+ if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
+ DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
+ goto clean_and_exit;
+ }
+
+ if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
+ DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
+ goto clean_and_exit;
+ }
+
+ if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
+ DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
+ unicode_map_file_name, strerror(errno)));
+ goto clean_and_exit;
+ }
+
+ if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
+ DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
+ unicode_map_file_name, strerror(errno)));
+ goto clean_and_exit;
+ }
+
+ /*
+ * Now ensure the 16 bit values are in the correct endianness.
+ */
+
+ for (i = 0; i < cp_to_ucs2_size/2; i++)
+ cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
+
+ for (i = 0; i < ucs2_to_cp_size/2; i++)
+ ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
+
+ fclose(fp);
+
+ *pp_cp_to_ucs2 = cp_to_ucs2;
+ *pp_ucs2_to_cp = ucs2_to_cp;
+
+ return True;
+
+clean_and_exit:
+
+ /* pseudo destructor :-) */
+
+ if(fp != NULL)
+ fclose(fp);
+
+ free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+ default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
+
+ return False;
+}
+
+/*******************************************************************
+ Load a dos codepage to unicode and vica-versa map.
+********************************************************************/
+
+BOOL load_dos_unicode_map(int codepage)
+{
+ fstring codepage_str;
+
+ slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
+ return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
+}
+
+/*******************************************************************
+ Load a UNIX codepage to unicode and vica-versa map.
+********************************************************************/
+
+BOOL load_unix_unicode_map(const char *unix_char_set)
+{
+ fstring upper_unix_char_set;
+
+ fstrcpy(upper_unix_char_set, unix_char_set);
+ strupper(upper_unix_char_set);
+ return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
}