summaryrefslogtreecommitdiff
path: root/source3/lib/util_unistr.c
diff options
context:
space:
mode:
authorJeremy Allison <jra@samba.org>1999-12-21 23:14:01 +0000
committerJeremy Allison <jra@samba.org>1999-12-21 23:14:01 +0000
commit8177fc778b02d9f61ef482fc60d32f353be77ba4 (patch)
tree163966dad88e6a91cf063df5e0f3f7946392ea11 /source3/lib/util_unistr.c
parent44dc314fea03d23b137fb6f2f51a271e6c0ed1c2 (diff)
downloadsamba-8177fc778b02d9f61ef482fc60d32f353be77ba4.tar.gz
samba-8177fc778b02d9f61ef482fc60d32f353be77ba4.tar.bz2
samba-8177fc778b02d9f61ef482fc60d32f353be77ba4.zip
Added new unicode functions - not used yet, but are the basis for the
internal unicode conversion of Samba. Jeremy. (This used to be commit 302412df64aa4b6572b13ef61dfd68c3f8ebbb8b)
Diffstat (limited to 'source3/lib/util_unistr.c')
-rw-r--r--source3/lib/util_unistr.c210
1 files changed, 209 insertions, 1 deletions
diff --git a/source3/lib/util_unistr.c b/source3/lib/util_unistr.c
index 185e7c3547..fca9d8bfda 100644
--- a/source3/lib/util_unistr.c
+++ b/source3/lib/util_unistr.c
@@ -345,8 +345,9 @@ int unistrcpy(char *dst, char *src)
/*******************************************************************
- free any existing maps
+ Free any existing maps.
********************************************************************/
+
static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
{
/* this handles identity mappings where we share the pointer */
@@ -556,3 +557,210 @@ BOOL load_unix_unicode_map(const char *unix_char_set)
strupper(upper_unix_char_set);
return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
}
+
+/*******************************************************************
+ The following functions reproduce many of the non-UNICODE standard
+ string functions in Samba.
+********************************************************************/
+
+/*******************************************************************
+ Convert a UNICODE string to multibyte format. Note that the 'src' is in
+ native byte order, not little endian. Always zero terminates.
+ dst_len is in bytes.
+********************************************************************/
+
+static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
+ size_t dst_len, const uint16 *ucs2_to_cp)
+{
+ size_t i;
+
+ for(i = 0; (i < (dst_len - 1)) && src[i];) {
+ smb_ucs2_t val = ucs2_to_cp[*src];
+ if(val < 256) {
+ dst[i++] = (char)val;
+ } else if (i < (dst_len - 2)) {
+
+ /*
+ * A 2 byte value is always written as
+ * high/low into the buffer stream.
+ */
+
+ dst[i++] = (char)((val >> 8) & 0xff);
+ dst[i++] = (char)(val & 0xff);
+ }
+ }
+
+ dst[i] = '\0';
+
+ return dst;
+}
+
+/*******************************************************************
+ Convert a multibyte string to UNICODE format. Note that the 'dst' is in
+ native byte order, not little endian. Always zero terminates.
+ dst_len is in bytes.
+********************************************************************/
+
+smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
+ size_t dst_len, smb_ucs2_t *cp_to_ucs2)
+{
+ size_t i;
+
+ dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
+
+ for(i = 0; (i < (dst_len - 1)) && src[i];) {
+ size_t skip = skip_multibyte_char(*src);
+ smb_ucs2_t val = (*src & 0xff);
+
+ /*
+ * If this is a multibyte character
+ * then work out the index value for the unicode conversion.
+ */
+
+ if (skip == 2)
+ val = ((val << 8) | (src[1] & 0xff));
+
+ dst[i++] = cp_to_ucs2[val];
+ if (skip)
+ src += skip;
+ else
+ src++;
+ }
+
+ dst[i] = 0;
+
+ return dst;
+}
+
+/*******************************************************************
+ Convert a UNICODE string to multibyte format. Note that the 'src' is in
+ native byte order, not little endian. Always zero terminates.
+ This function may be replaced if the MB codepage format is an
+ encoded one (ie. utf8, hex). See the code in lib/kanji.c
+ for details. dst_len is in bytes.
+********************************************************************/
+
+char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
+{
+ return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
+}
+
+/*******************************************************************
+ Convert a UNIX string to UNICODE format. Note that the 'dst' is in
+ native byte order, not little endian. Always zero terminates.
+ This function may be replaced if the UNIX codepage format is a
+ multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
+ for details. dst_len is in bytes, not ucs2 units.
+********************************************************************/
+
+smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
+{
+ return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
+}
+
+/*******************************************************************
+ Convert a UNICODE string to DOS format. Note that the 'src' is in
+ native byte order, not little endian. Always zero terminates.
+ dst_len is in bytes.
+********************************************************************/
+
+char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
+{
+ return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
+}
+
+/*******************************************************************
+ Convert a DOS string to UNICODE format. Note that the 'dst' is in
+ native byte order, not little endian. Always zero terminates.
+ This function may be replaced if the DOS codepage format is a
+ multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
+ for details. dst_len is in bytes, not ucs2 units.
+********************************************************************/
+
+smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
+{
+ return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
+}
+
+/*******************************************************************
+ Count the number of characters in a smb_ucs2_t string.
+********************************************************************/
+
+size_t wstrlen(const smb_ucs2_t *src)
+{
+ size_t len;
+
+ for(len = 0; *src; len++)
+ ;
+
+ return len;
+}
+
+/*******************************************************************
+ Safe wstring copy into a known length string. maxlength includes
+ the terminating zero. maxlength is in bytes.
+********************************************************************/
+
+smb_ucs2_t *safe_wstrcpy(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
+{
+ size_t ucs2_len;
+
+ if (!dest) {
+ DEBUG(0,("ERROR: NULL dest in safe_wstrcpy\n"));
+ return NULL;
+ }
+
+ if (!src) {
+ *dest = 0;
+ return dest;
+ }
+
+ ucs2_len = wstrlen(src);
+
+ if (ucs2_len >= (maxlength/sizeof(smb_ucs2_t))) {
+ fstring out;
+ DEBUG(0,("ERROR: string overflow by %u bytes in safe_wstrcpy [%.50s]\n",
+ (unsigned int)((ucs2_len*sizeof(smb_ucs2_t))-maxlength),
+ unicode_to_unix(out,src,sizeof(out))) );
+ ucs2_len = (maxlength/sizeof(smb_ucs2_t)) - 1;
+ }
+
+ memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
+ dest[ucs2_len] = 0;
+ return dest;
+}
+
+/*******************************************************************
+ Safe string cat into a string. maxlength includes the terminating zero.
+ maxlength is in bytes.
+********************************************************************/
+
+smb_ucs2_t *safe_wstrcat(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
+{
+ size_t ucs2_src_len, ucs2_dest_len;
+
+ if (!dest) {
+ DEBUG(0,("ERROR: NULL dest in safe_wstrcat\n"));
+ return NULL;
+ }
+
+ if (!src) {
+ return dest;
+ }
+
+ ucs2_src_len = wstrlen(src);
+ ucs2_dest_len = wstrlen(dest);
+
+ if (ucs2_src_len + ucs2_dest_len >= (maxlength/sizeof(smb_ucs2_t))) {
+ fstring out;
+ int new_len = (maxlength/sizeof(smb_ucs2_t)) - ucs2_dest_len - 1;
+ DEBUG(0,("ERROR: string overflow by %u characters in safe_wstrcat [%.50s]\n",
+ (unsigned int)((sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len)) - maxlength),
+ unicode_to_unix(out,src,sizeof(out))) );
+ ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
+ }
+
+ memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
+ dest[ucs2_dest_len + ucs2_src_len] = 0;
+ return dest;
+}