summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Allison <jra@samba.org>2006-09-21 17:00:07 +0000
committerGerald (Jerry) Carter <jerry@samba.org>2007-10-10 12:00:57 -0500
commitf18c9365caaad75c0f4c9e26b89327a75cfcb3e6 (patch)
treeb5db870641dd49feb5d167b55350ecaeabd1f247
parentb3d18b12c23670b02f4f98e1afeb32f829050fb3 (diff)
downloadsamba-f18c9365caaad75c0f4c9e26b89327a75cfcb3e6.tar.gz
samba-f18c9365caaad75c0f4c9e26b89327a75cfcb3e6.tar.bz2
samba-f18c9365caaad75c0f4c9e26b89327a75cfcb3e6.zip
r18787: Fix the strlen_m and strlen_m_term code by merging
in (and using elsewhere) next_codepoint from Samba4. Jerry please test. Jeremy. (This used to be commit ece00b70a4621633f1ac9e576c4bbe332031de09)
-rw-r--r--source3/include/charset.h3
-rw-r--r--source3/include/smb.h4
-rw-r--r--source3/lib/charcnv.c99
-rw-r--r--source3/lib/util_str.c52
-rw-r--r--source3/script/mkproto.awk2
-rw-r--r--source3/smbd/reply.c27
-rw-r--r--source3/smbd/service.c11
7 files changed, 169 insertions, 29 deletions
diff --git a/source3/include/charset.h b/source3/include/charset.h
index a4dfef3a50..8a51a1876e 100644
--- a/source3/include/charset.h
+++ b/source3/include/charset.h
@@ -22,6 +22,7 @@
/* this defines the charset types used in samba */
typedef enum {CH_UCS2=0, CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=4} charset_t;
+#if 0
/* FIXME!!! Hack job for now to get the lsa ndr code compiling */
#ifndef strlen_m
#define strlen_m strlen
@@ -29,7 +30,7 @@ typedef enum {CH_UCS2=0, CH_UTF16=0, CH_UNIX=1, CH_DISPLAY=2, CH_DOS=3, CH_UTF8=
#ifndef strlen_m_term
#define strlen_m_term strlen
#endif
-
+#endif
#define NUM_CHARSETS 5
diff --git a/source3/include/smb.h b/source3/include/smb.h
index bba1621e8f..700dbcdf85 100644
--- a/source3/include/smb.h
+++ b/source3/include/smb.h
@@ -170,6 +170,10 @@ typedef smb_ucs2_t wfstring[FSTRING_LEN];
#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\
((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest))
+/* Large data type for manipulating uint32 unicode codepoints */
+typedef uint32 codepoint_t;
+#define INVALID_CODEPOINT ((codepoint_t)-1)
+
/* pipe string names */
#define PIPE_LANMAN "\\PIPE\\LANMAN"
#define PIPE_SRVSVC "\\PIPE\\srvsvc"
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c
index fffdf010a0..c5ce3ca8c7 100644
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -1374,33 +1374,86 @@ size_t align_string(const void *base_ptr, const char *p, int flags)
return 0;
}
-/****************************************************************
- Calculate the size (in bytes) of the next multibyte character in
- our internal character set. Note that p must be pointing to a
- valid mb char, not within one.
-****************************************************************/
+/*
+ Return the unicode codepoint for the next multi-byte CH_UNIX character
+ in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
-size_t next_mb_char_size(const char *s)
+ Also return the number of bytes consumed (which tells the caller
+ how many bytes to skip to get to the next CH_UNIX character).
+
+ Return INVALID_CODEPOINT if the next character cannot be converted.
+*/
+
+codepoint_t next_codepoint(const char *str, size_t *size)
{
- size_t i;
+ /* It cannot occupy more than 4 bytes in UTF16 format */
+ uint8_t buf[4];
+ smb_iconv_t descriptor;
+ size_t ilen_orig;
+ size_t ilen;
+ size_t olen;
+ char *outbuf;
+
+ if ((str[0] & 0x80) == 0) {
+ *size = 1;
+ return (codepoint_t)str[0];
+ }
- if (!(*s & 0x80))
- return 1; /* ascii. */
+ /* We assume that no multi-byte character can take
+ more than 5 bytes. This is OK as we only
+ support codepoints up to 1M */
- conv_silent = True;
- for ( i = 1; i <=4; i++ ) {
- smb_ucs2_t uc;
- if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
-#if 0 /* JRATEST */
- DEBUG(10,("next_mb_char_size: size %u at string %s\n",
- (unsigned int)i, s));
-#endif
- conv_silent = False;
- return i;
+ ilen_orig = strnlen(str, 5);
+ ilen = ilen_orig;
+
+ lazy_initialize_conv();
+
+ /* CH_UCS2 == UTF16-LE. */
+ descriptor = conv_handles[CH_UNIX][CH_UCS2];
+ if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
+ *size = 1;
+ return INVALID_CODEPOINT;
+ }
+
+ /* This looks a little strange, but it is needed to cope
+ with codepoints above 64k which are encoded as per RFC2781. */
+ olen = 2;
+ outbuf = (char *)buf;
+ smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
+ if (olen == 2) {
+ /* We failed to convert to a 2 byte character.
+ See if we can convert to a 4 UTF16-LE byte char encoding.
+ */
+ olen = 4;
+ outbuf = (char *)buf;
+ smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
+ if (olen == 4) {
+ /* We didn't convert any bytes */
+ *size = 1;
+ return INVALID_CODEPOINT;
}
+ olen = 4 - olen;
+ } else {
+ olen = 2 - olen;
}
- /* We're hosed - we don't know how big this is... */
- DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s));
- conv_silent = False;
- return 1;
+
+ *size = ilen_orig - ilen;
+
+ if (olen == 2) {
+ /* 2 byte, UTF16-LE encoded value. */
+ return (codepoint_t)SVAL(buf, 0);
+ }
+ if (olen == 4) {
+ /* Decode a 4 byte UTF16-LE character manually.
+ See RFC2871 for the encoding machanism.
+ */
+ codepoint_t w1 = SVAL(buf,0) & ~0xD800;
+ codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
+
+ return (codepoint_t)0x10000 +
+ (w1 << 10) + w2;
+ }
+
+ /* no other length is valid */
+ return INVALID_CODEPOINT;
}
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c
index 4619d47388..414a87a562 100644
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -1593,6 +1593,58 @@ void strupper_m(char *s)
}
/**
+ Count the number of UCS2 characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+**/
+
+size_t strlen_m(const char *s)
+{
+ size_t count = 0;
+
+ if (!s) {
+ return 0;
+ }
+
+ while (*s && !(((uint8_t)*s) & 0x80)) {
+ s++;
+ count++;
+ }
+
+ if (!*s) {
+ return count;
+ }
+
+ while (*s) {
+ size_t c_size;
+ codepoint_t c = next_codepoint(s, &c_size);
+ if (c < 0x10000) {
+ /* Unicode char fits into 16 bits. */
+ count += 1;
+ } else {
+ /* Double-width unicode char - 32 bits. */
+ count += 2;
+ }
+ s += c_size;
+ }
+
+ return count;
+}
+
+/**
+ Count the number of UCS2 characters in a string including the null
+ terminator.
+**/
+
+size_t strlen_m_term(const char *s)
+{
+ if (!s) {
+ return 0;
+ }
+ return strlen_m(s) + 1;
+}
+
+/**
Return a RFC2254 binary string representation of a buffer.
Used in LDAP filters.
Caller must free.
diff --git a/source3/script/mkproto.awk b/source3/script/mkproto.awk
index 30b5628b33..97578b046f 100644
--- a/source3/script/mkproto.awk
+++ b/source3/script/mkproto.awk
@@ -146,7 +146,7 @@ END {
gotstart = 1;
}
- if( $0 ~ /^NODE_STATUS_STRUCT|SMB_STRUCT_DIR|ELOG_TDB/ ) {
+ if( $0 ~ /^NODE_STATUS_STRUCT|SMB_STRUCT_DIR|ELOG_TDB|codepoint_t/ ) {
gotstart = 1;
}
diff --git a/source3/smbd/reply.c b/source3/smbd/reply.c
index e38edadee4..a0596643f8 100644
--- a/source3/smbd/reply.c
+++ b/source3/smbd/reply.c
@@ -132,13 +132,22 @@ NTSTATUS check_path_syntax(pstring destname, const pstring srcname)
break;
}
} else {
- switch(next_mb_char_size(s)) {
+ size_t siz;
+ /* Get the size of the next MB character. */
+ next_codepoint(s,&siz);
+ switch(siz) {
+ case 5:
+ *d++ = *s++;
+ /*fall through*/
case 4:
*d++ = *s++;
+ /*fall through*/
case 3:
*d++ = *s++;
+ /*fall through*/
case 2:
*d++ = *s++;
+ /*fall through*/
case 1:
*d++ = *s++;
break;
@@ -266,7 +275,13 @@ NTSTATUS check_path_syntax_wcard(pstring destname, const pstring srcname, BOOL *
}
*d++ = *s++;
} else {
- switch(next_mb_char_size(s)) {
+ size_t siz;
+ /* Get the size of the next MB character. */
+ next_codepoint(s,&siz);
+ switch(siz) {
+ case 5:
+ *d++ = *s++;
+ /*fall through*/
case 4:
*d++ = *s++;
/*fall through*/
@@ -374,7 +389,13 @@ NTSTATUS check_path_syntax_posix(pstring destname, const pstring srcname)
if (!(*s & 0x80)) {
*d++ = *s++;
} else {
- switch(next_mb_char_size(s)) {
+ size_t siz;
+ /* Get the size of the next MB character. */
+ next_codepoint(s,&siz);
+ switch(siz) {
+ case 5:
+ *d++ = *s++;
+ /*fall through*/
case 4:
*d++ = *s++;
/*fall through*/
diff --git a/source3/smbd/service.c b/source3/smbd/service.c
index 734feef4f7..9c341f19fd 100644
--- a/source3/smbd/service.c
+++ b/source3/smbd/service.c
@@ -95,13 +95,22 @@ void set_conn_connectpath(connection_struct *conn, const pstring connectpath)
if (!(*s & 0x80)) {
*d++ = *s++;
} else {
- switch(next_mb_char_size(s)) {
+ size_t siz;
+ /* Get the size of the next MB character. */
+ next_codepoint(s,&siz);
+ switch(siz) {
+ case 5:
+ *d++ = *s++;
+ /*fall through*/
case 4:
*d++ = *s++;
+ /*fall through*/
case 3:
*d++ = *s++;
+ /*fall through*/
case 2:
*d++ = *s++;
+ /*fall through*/
case 1:
*d++ = *s++;
break;