From 5a70b84cb66fb8c7a3fce0e3f2e4b61e0b2ea9d4 Mon Sep 17 00:00:00 2001 From: Simo Sorce Date: Tue, 27 Dec 2011 19:56:43 -0500 Subject: util: add murmurhash3 hash function --- src/tests/util-tests.c | 24 +++++++++++ src/util/murmurhash3.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++ src/util/murmurhash3.h | 10 +++++ 3 files changed, 144 insertions(+) create mode 100644 src/util/murmurhash3.c create mode 100644 src/util/murmurhash3.h (limited to 'src') diff --git a/src/tests/util-tests.c b/src/tests/util-tests.c index c6973d33..557be10e 100644 --- a/src/tests/util-tests.c +++ b/src/tests/util-tests.c @@ -27,6 +27,7 @@ #include #include "util/util.h" #include "util/sss_utf8.h" +#include "util/murmurhash3.h" #include "tests/common.h" START_TEST(test_parse_args) @@ -398,6 +399,24 @@ START_TEST(test_utf8_check) } END_TEST +START_TEST(test_murmurhash3_check) +{ + const char *tests[6] = { "1052800007", "1052800008", "1052800000", + "abcdefghijk", "abcdefghili", "abcdefgh000" }; + uint32_t results[6]; + int i, j; + + for (i = 0; i< 6; i++) { + results[i] = murmurhash3(tests[i], + strlen(tests[i]), + 0xdeadbeef); + for (j = 0; j < i; j++) { + fail_if(results[i] == results[j]); + } + } +} +END_TEST + Suite *util_suite(void) { Suite *s = suite_create("util"); @@ -419,8 +438,13 @@ Suite *util_suite(void) tcase_set_timeout(tc_utf8, 60); + TCase *tc_mh3 = tcase_create("murmurhash3"); + tcase_add_test (tc_mh3, test_murmurhash3_check); + tcase_set_timeout(tc_mh3, 60); + suite_add_tcase (s, tc_util); suite_add_tcase (s, tc_utf8); + suite_add_tcase (s, tc_mh3); return s; } diff --git a/src/util/murmurhash3.c b/src/util/murmurhash3.c new file mode 100644 index 00000000..c14cd7ce --- /dev/null +++ b/src/util/murmurhash3.c @@ -0,0 +1,110 @@ +/* This file is based on the public domain MurmurHash3 from Austin Appleby: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + * + * We use only the 32 bit variant because the 2 produce different result while + * we need to produce the same result regardless of the architecture as + * clients can be both 64 or 32 bit at the same time. + */ + +#include +#include +#include +#include + +static uint32_t rotl(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +/* slower than original but is endian neutral and handles platforms that + * do only aligned reads */ +__attribute__((always_inline)) +static uint32_t getblock(const uint8_t *p, int i) +{ + uint32_t r; + size_t size = sizeof(uint32_t); + + memcpy(&r, &p[i * size], size); + + return le32toh(r); +} + +/* + * Finalization mix - force all bits of a hash block to avalanche + */ + +__attribute__((always_inline)) +static uint32_t fmix(uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + + +uint32_t murmurhash3(const char *key, int len, uint32_t seed) +{ + const uint8_t *blocks; + const uint8_t *tail; + int nblocks; + uint32_t h1; + uint32_t k1; + uint32_t c1; + uint32_t c2; + int i; + + blocks = (const uint8_t *)key; + nblocks = len / 4; + h1 = seed; + c1 = 0xcc9e2d51; + c2 = 0x1b873593; + + /* body */ + + for (i = 0; i < nblocks; i++) { + + k1 = getblock(blocks, i); + + k1 *= c1; + k1 = rotl(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = rotl(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + /* tail */ + + tail = (const uint8_t *)key + nblocks * 4; + + k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = rotl(k1, 15); + k1 *= c2; + h1 ^= k1; + default: + break; + } + + /* finalization */ + + h1 ^= len; + h1 = fmix(h1); + + return h1; +} + + diff --git a/src/util/murmurhash3.h b/src/util/murmurhash3.h new file mode 100644 index 00000000..9174554b --- /dev/null +++ b/src/util/murmurhash3.h @@ -0,0 +1,10 @@ +/* This file is based on the public domain MurmurHash3 from Austin Appleby: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + * + * We use only the 32 bit variant because the 2 produce different result while + * we need to produce the same result regardless of the architecture as + * clients can be both 64 or 32 bit at the same time. + */ + +uint32_t murmurhash3(const char *key, int len, uint32_t seed); + -- cgit