From 3db52feb1f3b2c07ce0b06ad4a7099fa6efe3fc7 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 13 Dec 1999 13:27:58 +0000 Subject: first pass at updating head branch to be to be the same as the SAMBA_2_0 branch (This used to be commit 453a822a76780063dff23526c35408866d0c0154) --- source3/lib/util_unistr.c | 639 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 445 insertions(+), 194 deletions(-) (limited to 'source3/lib/util_unistr.c') diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c index 6f90528bf4..aee2a21fc0 100644 --- a/source3/lib/util_unistr.c +++ b/source3/lib/util_unistr.c @@ -21,287 +21,538 @@ #include "includes.h" -/******************************************************************* - Put an ASCII string into a UNICODE buffer (little endian). - ********************************************************************/ +extern int DEBUGLEVEL; -char *ascii_to_unibuf(char *dest, const char *src, int maxlen) -{ - char *destend = dest + maxlen; - register char c; - - while (dest < destend) - { - c = *(src++); - if (c == 0) - { - break; - } +/* + * The following are the codepage to ucs2 and vica versa maps. + * These are dynamically loaded from a unicode translation file. + */ - *(dest++) = c; - *(dest++) = 0; - } +static smb_ucs2_t *doscp_to_ucs2; +static uint16 *ucs2_to_doscp; - *dest++ = 0; - *dest++ = 0; - return dest; -} +static smb_ucs2_t *unixcp_to_ucs2; +static uint16 *ucs2_to_unixcp; +#ifndef MAXUNI +#define MAXUNI 1024 +#endif /******************************************************************* - Pull an ASCII string out of a UNICODE buffer (little endian). - ********************************************************************/ + Write a string in (little-endian) unicode format. src is in + the current DOS codepage. len is the length in bytes of the + string pointed to by dst. -const char* unibuf_to_ascii(char *dest, const char *src, int maxlen) -{ - char *destend = dest + maxlen; - register char c; - - while (dest < destend) - { - c = *(src++); - if ((c == 0) && (*src == 0)) - { - break; - } + the return value is the length of the string *without* the trailing + two bytes of zero +********************************************************************/ - *dest++ = c; - src++; +int dos_PutUniCode(char *dst,const char *src, ssize_t len) +{ + int ret = 0; + while (*src && (len > 2)) { + size_t skip = skip_multibyte_char(*src); + smb_ucs2_t val = (*src & 0xff); + + /* + * If this is a multibyte character (and all DOS/Windows + * codepages have at maximum 2 byte multibyte characters) + * then work out the index value for the unicode conversion. + */ + + if (skip == 2) + val = ((val << 8) | src[1]); + + SSVAL(dst,ret,doscp_to_ucs2[val]); + ret += 2; + len -= 2; + if (skip) + src += skip; + else + src++; } + SSVAL(dst,ret,0); + return(ret); +} - *dest = 0; +/******************************************************************* + Skip past some unicode strings in a buffer. +********************************************************************/ - return src; +char *skip_unicode_string(char *buf,int n) +{ + while (n--) { + while (*buf) + buf += 2; + buf += 2; + } + return(buf); } - /******************************************************************* - Put an ASCII string into a UNICODE array (uint16's). - ********************************************************************/ + Return a DOS codepage version of a little-endian unicode string. + Hack alert: uses fixed buffer(s). +********************************************************************/ -void ascii_to_unistr(uint16 *dest, const char *src, int maxlen) +char *dos_unistrn2(uint16 *src, int len) { - uint16 *destend = dest + maxlen; - register char c; - - while (dest < destend) - { - c = *(src++); - if (c == 0) - { - break; + static char lbufs[8][MAXUNI]; + static int nexti; + char *lbuf = lbufs[nexti]; + char *p; + + nexti = (nexti+1)%8; + + for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) { + uint16 ucs2_val = SVAL(src,0); + uint16 cp_val = ucs2_to_doscp[ucs2_val]; + + if (cp_val < 256) + *p++ = (char)cp_val; + else { + *p++ = (cp_val >> 8) & 0xff; + *p++ = (cp_val & 0xff); } - - *(dest++) = (uint16)c; } - *dest = 0; + *p = 0; + return lbuf; } +static char lbufs[8][MAXUNI]; +static int nexti; /******************************************************************* - Pull an ASCII string out of a UNICODE array (uint16's). - ********************************************************************/ + Return a DOS codepage version of a little-endian unicode string. + Hack alert: uses fixed buffer(s). +********************************************************************/ -void unistr_to_ascii(char *dest, const uint16 *src, int len) +char *dos_unistr2(uint16 *src) { - char *destend = dest + len; - register uint16 c; - - while (dest < destend) - { - c = *(src++); - if (c == 0) - { - break; - } + char *lbuf = lbufs[nexti]; + char *p; + + nexti = (nexti+1)%8; + + for (p = lbuf; *src && (p-lbuf < MAXUNI-3); src++) { + uint16 ucs2_val = SVAL(src,0); + uint16 cp_val = ucs2_to_doscp[ucs2_val]; - *(dest++) = (char)c; + if (cp_val < 256) + *p++ = (char)cp_val; + else { + *p++ = (cp_val >> 8) & 0xff; + *p++ = (cp_val & 0xff); + } } - *dest = 0; + *p = 0; + return lbuf; } - /******************************************************************* - Convert a UNISTR2 structure to an ASCII string - ********************************************************************/ +Return a DOS codepage version of a little-endian unicode string +********************************************************************/ -void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen) +char *dos_unistr2_to_str(UNISTR2 *str) { - char *destend; - const uint16 *src; - size_t len; - register uint16 c; - - src = str->buffer; - len = MIN(str->uni_str_len, maxlen); - destend = dest + len; - - while (dest < destend) - { - c = *(src++); - if (c == 0) - { - break; + char *lbuf = lbufs[nexti]; + char *p; + uint16 *src = str->buffer; + int max_size = MIN(sizeof(str->buffer)-3, str->uni_str_len); + + nexti = (nexti+1)%8; + + for (p = lbuf; *src && p-lbuf < max_size; src++) { + uint16 ucs2_val = SVAL(src,0); + uint16 cp_val = ucs2_to_doscp[ucs2_val]; + + if (cp_val < 256) + *p++ = (char)cp_val; + else { + *p++ = (cp_val >> 8) & 0xff; + *p++ = (cp_val & 0xff); } - - *(dest++) = (char)c; } - *dest = 0; + *p = 0; + return lbuf; } - /******************************************************************* - Skip a UNICODE string in a little endian buffer. - ********************************************************************/ +Return a number stored in a buffer +********************************************************************/ -char *skip_unibuf(char *srcbuf, int len) +uint32 buffer2_to_uint32(BUFFER2 *str) { - uint16 *src = (uint16 *)srcbuf; - uint16 *srcend = src + len/2; + if (str->buf_len == 4) + return IVAL(str->buffer, 0); + else + return 0; +} - while ((src < srcend) && (*(src++) != 0)) - { +/******************************************************************* +Return a DOS codepage version of a NOTunicode string +********************************************************************/ + +char *dos_buffer2_to_str(BUFFER2 *str) +{ + char *lbuf = lbufs[nexti]; + char *p; + uint16 *src = str->buffer; + int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2); + + nexti = (nexti+1)%8; + + for (p = lbuf; *src && p-lbuf < max_size; src++) { + uint16 ucs2_val = SVAL(src,0); + uint16 cp_val = ucs2_to_doscp[ucs2_val]; + + if (cp_val < 256) + *p++ = (char)cp_val; + else { + *p++ = (cp_val >> 8) & 0xff; + *p++ = (cp_val & 0xff); + } } - return (char *)src; + *p = 0; + return lbuf; } - /******************************************************************* - UNICODE strcpy between buffers. - ********************************************************************/ + Return a dos codepage version of a NOTunicode string +********************************************************************/ -char *uni_strncpy(char *destbuf, const char *srcbuf, int len) +char *dos_buffer2_to_multistr(BUFFER2 *str) { - const uint16 *src = (const uint16 *)srcbuf; - uint16 *dest = (uint16 *)destbuf; - uint16 *destend = dest + len/2; - register uint16 c; - - while (dest < destend) - { - c = *(src++); - if (c == 0) - { - break; + char *lbuf = lbufs[nexti]; + char *p; + uint16 *src = str->buffer; + int max_size = MIN(sizeof(str->buffer)-3, str->buf_len/2); + + nexti = (nexti+1)%8; + + for (p = lbuf; p-lbuf < max_size; src++) { + if (*src == 0) { + *p++ = ' '; + } else { + uint16 ucs2_val = SVAL(src,0); + uint16 cp_val = ucs2_to_doscp[ucs2_val]; + + if (cp_val < 256) + *p++ = (char)cp_val; + else { + *p++ = (cp_val >> 8) & 0xff; + *p++ = (cp_val & 0xff); + } } - - *(dest++) = c; } - *dest++ = 0; - return (char *)dest; + *p = 0; + return lbuf; } - /******************************************************************* - Return a number stored in a buffer - ********************************************************************/ + Create a null-terminated unicode string from a null-terminated DOS + codepage string. + Return number of unicode chars copied, excluding the null character. + Unicode strings created are in little-endian format. +********************************************************************/ -uint32 buffer2_to_uint32(const BUFFER2 *str) +size_t dos_struni2(char *dst, const char *src, size_t max_len) { - if (str->buf_len == 4) - { - const uchar *src = str->buffer; - return IVAL(src, 0); - } - else - { + size_t len = 0; + + if (dst == NULL) return 0; + + if (src != NULL) { + for (; *src && len < max_len-2; len++, dst +=2) { + size_t skip = skip_multibyte_char(*src); + smb_ucs2_t val = (*src & 0xff); + + /* + * If this is a multibyte character (and all DOS/Windows + * codepages have at maximum 2 byte multibyte characters) + * then work out the index value for the unicode conversion. + */ + + if (skip == 2) + val = ((val << 8) | src[1]); + + SSVAL(dst,0,doscp_to_ucs2[val]); + if (skip) + src += skip; + else + src++; + } } -} + SSVAL(dst,0,0); + + return len; +} /******************************************************************* - Convert a 'multi-string' buffer to space-separated ASCII. - ********************************************************************/ -void buffer2_to_multistr(char *dest, const BUFFER2 *str, size_t maxlen) + Return a DOS codepage version of a little-endian unicode string. + Hack alert: uses fixed buffer(s). +********************************************************************/ + +char *dos_unistr(char *buf) { - char *destend; - const uchar *src; - size_t len; - register uint16 c; - - src = str->buffer; - len = MIN(str->buf_len/2, maxlen); - destend = dest + len; - - while (dest < destend) - { - c = *(src++); - *(dest++) = (c == 0) ? ' ' : (char)c; - src++; + char *lbuf = lbufs[nexti]; + uint16 *src = (uint16 *)buf; + char *p; + + nexti = (nexti+1)%8; + + for (p = lbuf; *src && p-lbuf < MAXUNI-3; src++) { + uint16 ucs2_val = SVAL(src,0); + uint16 cp_val = ucs2_to_doscp[ucs2_val]; + + if (cp_val < 256) + *p++ = (char)cp_val; + else { + *p++ = (cp_val >> 8) & 0xff; + *p++ = (cp_val & 0xff); + } } - *dest = 0; + *p = 0; + return lbuf; } /******************************************************************* - Convert a buffer4 to space-separated ASCII. - ********************************************************************/ -void buffer4_to_str(char *dest, const BUFFER4 *str, size_t maxlen) + Strcpy for unicode strings. returns length (in num of wide chars) +********************************************************************/ + +int unistrcpy(char *dst, char *src) { - char *destend; - const uchar *src; - size_t len; - register uint16 c; - - src = str->buffer; - len = MIN(str->buf_len, maxlen); - destend = dest + len; - - while (dest < destend) - { - c = *(src++); - *(dest++) = (char)c; + int num_wchars = 0; + uint16 *wsrc = (uint16 *)src; + uint16 *wdst = (uint16 *)dst; + + while (*wsrc) { + *wdst++ = *wsrc++; + num_wchars++; } + *wdst = 0; - *dest = 0; + return num_wchars; } + + /******************************************************************* -copies a UNISTR2 structure. + free any existing maps ********************************************************************/ -BOOL copy_unistr2(UNISTR2 *str, const UNISTR2 *from) +static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp) { - if (from != NULL) - { - /* set up string lengths. add one if string is not null-terminated */ - str->uni_max_len = from->uni_max_len; - str->undoc = from->undoc; - str->uni_str_len = from->uni_str_len; - - /* copy the string */ - memcpy(str->buffer, from->buffer, sizeof(from->buffer)); + /* this handles identity mappings where we share the pointer */ + if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) { + *pp_ucs2_to_cp = NULL; } - else - { - str->uni_max_len = 1; - str->undoc = 0; - str->uni_str_len = 1; - str->buffer[0] = 0; + + if (*pp_cp_to_ucs2) { + free(*pp_cp_to_ucs2); + *pp_cp_to_ucs2 = NULL; } - return True; + if (*pp_ucs2_to_cp) { + free(*pp_ucs2_to_cp); + *pp_ucs2_to_cp = NULL; + } } + /******************************************************************* -duplicates a UNISTR2 structure. + Build a default (null) codepage to unicode map. ********************************************************************/ -UNISTR2 *unistr2_dup(const UNISTR2 *name) + +void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp) { - UNISTR2 *copy = (UNISTR2*)malloc(sizeof(*copy)); - copy_unistr2(copy, name); - return copy; + int i; + + free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp); + + if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) { + DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536)); + abort(); + } + + *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */ + for (i = 0; i < 65536; i++) + (*pp_cp_to_ucs2)[i] = i; } /******************************************************************* -frees a UNISTR2 structure. + Load a codepage to unicode and vica-versa map. ********************************************************************/ -void unistr2_free(UNISTR2 *name) + +BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp) { - free(name); + pstring unicode_map_file_name; + FILE *fp = NULL; + SMB_STRUCT_STAT st; + smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2; + uint16 *ucs2_to_cp = *pp_ucs2_to_cp; + size_t cp_to_ucs2_size; + size_t ucs2_to_cp_size; + size_t i; + size_t size; + char buf[UNICODE_MAP_HEADER_SIZE]; + + DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage)); + + if (*codepage == '\0') + goto clean_and_exit; + + if(strlen(CODEPAGEDIR) + 13 + strlen(codepage) > sizeof(unicode_map_file_name)) { + DEBUG(0,("load_unicode_map: filename too long to load\n")); + goto clean_and_exit; + } + + pstrcpy(unicode_map_file_name, CODEPAGEDIR); + pstrcat(unicode_map_file_name, "/"); + pstrcat(unicode_map_file_name, "unicode_map."); + pstrcat(unicode_map_file_name, codepage); + + if(sys_stat(unicode_map_file_name,&st)!=0) { + DEBUG(0,("load_unicode_map: filename %s does not exist.\n", + unicode_map_file_name)); + goto clean_and_exit; + } + + size = st.st_size; + + if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) { + DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \ +unicode map file (size=%d).\n", unicode_map_file_name, (int)size)); + goto clean_and_exit; + } + + if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) { + DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n", + unicode_map_file_name, strerror(errno))); + goto clean_and_exit; + } + + if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) { + DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n", + unicode_map_file_name, strerror(errno))); + goto clean_and_exit; + } + + /* Check the version value */ + if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) { + DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \ +Needed %hu, got %hu.\n", + unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID, + SVAL(buf,UNICODE_MAP_VERSION_OFFSET))); + goto clean_and_exit; + } + + /* Check the codepage value */ + if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) { + DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \ +requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage )); + goto clean_and_exit; + } + + ucs2_to_cp_size = 2*65536; + if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) { + /* + * This is a multibyte code page. + */ + cp_to_ucs2_size = 2*65536; + } else { + /* + * Single byte code page. + */ + cp_to_ucs2_size = 2*256; + } + + /* + * Free any old translation tables. + */ + + free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp); + + if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) { + DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size )); + goto clean_and_exit; + } + + if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) { + DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size )); + goto clean_and_exit; + } + + if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) { + DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n", + unicode_map_file_name, strerror(errno))); + goto clean_and_exit; + } + + if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) { + DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n", + unicode_map_file_name, strerror(errno))); + goto clean_and_exit; + } + + /* + * Now ensure the 16 bit values are in the correct endianness. + */ + + for (i = 0; i < cp_to_ucs2_size/2; i++) + cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2); + + for (i = 0; i < ucs2_to_cp_size/2; i++) + ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2); + + fclose(fp); + + *pp_cp_to_ucs2 = cp_to_ucs2; + *pp_ucs2_to_cp = ucs2_to_cp; + + return True; + +clean_and_exit: + + /* pseudo destructor :-) */ + + if(fp != NULL) + fclose(fp); + + free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp); + + default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp); + + return False; +} + +/******************************************************************* + Load a dos codepage to unicode and vica-versa map. +********************************************************************/ + +BOOL load_dos_unicode_map(int codepage) +{ + fstring codepage_str; + + slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage); + return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp); +} + +/******************************************************************* + Load a UNIX codepage to unicode and vica-versa map. +********************************************************************/ + +BOOL load_unix_unicode_map(const char *unix_char_set) +{ + fstring upper_unix_char_set; + + fstrcpy(upper_unix_char_set, unix_char_set); + strupper(upper_unix_char_set); + return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp); } -- cgit