diff options
author | Jeremy Allison <jra@samba.org> | 2003-09-05 01:33:22 +0000 |
---|---|---|
committer | Jeremy Allison <jra@samba.org> | 2003-09-05 01:33:22 +0000 |
commit | ff78c21f51263ea7f6108acddb610bbd775efc87 (patch) | |
tree | f243c7dea59c2dee6d81a69cd70508d9e1c59085 /source3/lib | |
parent | 2dacf5e00ccfb04ffca977c35d86e3e0fa0b965d (diff) | |
download | samba-ff78c21f51263ea7f6108acddb610bbd775efc87.tar.gz samba-ff78c21f51263ea7f6108acddb610bbd775efc87.tar.bz2 samba-ff78c21f51263ea7f6108acddb610bbd775efc87.zip |
Hand optimisatinos for strrchr_m using the properties we know about MB
character sets and how we use this call.
Jeremy.
(This used to be commit a9709700eea3bb48ab4a79d74e0b8d22dc98576f)
Diffstat (limited to 'source3/lib')
-rw-r--r-- | source3/lib/util_str.c | 58 |
1 files changed, 47 insertions, 11 deletions
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 34fdf75f63..4556405b04 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1115,7 +1115,7 @@ char *strchr_m(const char *s, char c) supported multi-byte character sets are ascii-compatible (ie. they match for the first 128 chars) */ - while (*s && !(((unsigned char)s[0]) & 0x7F)) { + while (*s && (((unsigned char)s[0]) & 0x80)) { if (*s == c) return s; } @@ -1134,17 +1134,53 @@ char *strchr_m(const char *s, char c) char *strrchr_m(const char *s, char c) { - wpstring ws; - pstring s2; - smb_ucs2_t *p; + /* this is quite a common operation, so we want it to be + fast. We optimise for the ascii case, knowing that all our + supported multi-byte character sets are ascii-compatible + (ie. they match for the first 128 chars). Also, in Samba + we only search for ascii characters in 'c' and that + in all mb character sets with a compound character + containing c, if 'c' is not a match at position + p, then p[-1] > 0x7f. JRA. */ - push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); - p = strrchr_w(ws, UCS2_CHAR(c)); - if (!p) - return NULL; - *p = 0; - pull_ucs2_pstring(s2, ws); - return (char *)(s+strlen(s2)); + { + size_t len = strlen(s); + const char *cp = s; + BOOL got_mb = False; + + if (len == 0) + return NULL; + cp += (len - 1); + do { + if (c == *cp) { + /* Could be a match. Part of a multibyte ? */ + if ((cp > s) && (((unsigned char)cp[-1]) & 0x80)) { + /* Yep - go slow :-( */ + got_mb = True; + break; + } + /* No - we have a match ! */ + return cp; + } + } while (cp-- != s); + if (!got_mb) + return NULL; + } + + /* String contained a non-ascii char. Slow path. */ + { + wpstring ws; + pstring s2; + smb_ucs2_t *p; + + push_ucs2(NULL, ws, s, sizeof(ws), STR_TERMINATE); + p = strrchr_w(ws, UCS2_CHAR(c)); + if (!p) + return NULL; + *p = 0; + pull_ucs2_pstring(s2, ws); + return (char *)(s+strlen(s2)); + } } /*********************************************************************** |