From 1beb793664dba184892b23dced4a3676fb94ff9f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 5 Dec 2011 16:42:46 +1030 Subject: lib/ccan/htable, strset: benchmarking tools. This lets us compare hash table vs. strset vs. the example implementation of critbit trees. cbspeed 100 runs, min-max(avg): #01: Initial insert: 236-245(237) #02: Initial lookup (match): 180-186(180) #03: Initial lookup (miss): 171-185(172) #04: Initial lookup (random): 441-457(444) #05: Initial delete all: 127-132(128) #06: Initial re-inserting: 219-225(220) #07: Deleting first half: 101-104(102) #08: Adding (a different) half: 158-162(159) #09: Lookup after half-change (match): 202-207(203) #10: Lookup after half-change (miss): 217-222(218) #11: Churn 1: 297-302(299) #12: Churn 2: 297-305(300) #13: Churn 3: 301-308(303) #14: Post-Churn lookup (match): 189-195(190) #15: Post-Churn lookup (miss): 189-193(190) #16: Post-Churn lookup (random): 499-513(503) speed 100 runs, min-max(avg): #01: Initial insert: 211-218(212) #02: Initial lookup (match): 161-166(162) #03: Initial lookup (miss): 157-162(158) #04: Initial lookup (random): 452-460(454) #05: Initial delete all: 126-135(127) #06: Initial re-inserting: 193-201(194) #07: Deleting first half: 99-107(99) #08: Adding (a different) half: 143-190(144) #09: Lookup after half-change (match): 183-195(184) #10: Lookup after half-change (miss): 197-203(198) #11: Churn 1: 271-278(274) #12: Churn 2: 280-287(282) #13: Churn 3: 277-285(279) #14: Post-Churn lookup (match): 171-175(171) #15: Post-Churn lookup (miss): 174-178(175) #16: Post-Churn lookup (random): 525-552(528) stringspeed 100 runs, min-max(avg): #01: Initial insert: 300-343(308) #02: Initial lookup (match): 98-136(99) #03: Initial lookup (miss): 73-102(75) #04: Initial lookup (random): 230-282(233) #05: Initial delete all: 66-102(69) #06: Initial re-inserting: 62-99(64) #07: Deleting first half: 43-52(43) #08: Adding (a different) half: 101-156(106) #09: Lookup after half-change (match): 114-156(120) #10: Lookup after half-change (miss): 94-103(95) #11: Churn 1: 98-105(99) #12: Churn 2: 96-104(98) #13: Churn 3: 174-184(176) #14: Post-Churn lookup (match): 93-112(94) #15: Post-Churn lookup (miss): 77-107(79) #16: Post-Churn lookup (random): 229-265(232) Signed-off-by: Rusty Russell (Imported from CCAN commit 5c559e7df1d31b4c0ddf26451fac972dc8a0c2c9) --- lib/ccan/htable/tools/Makefile | 15 ++- lib/ccan/htable/tools/stringspeed.c | 246 ++++++++++++++++++++++++++++++++++++ 2 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 lib/ccan/htable/tools/stringspeed.c (limited to 'lib/ccan/htable') diff --git a/lib/ccan/htable/tools/Makefile b/lib/ccan/htable/tools/Makefile index 001e160b78..289d92b335 100644 --- a/lib/ccan/htable/tools/Makefile +++ b/lib/ccan/htable/tools/Makefile @@ -1,5 +1,18 @@ CFLAGS=-Wall -Werror -O3 -I../../.. +#CFLAGS=-Wall -Werror -g -I../../.. -speed: speed.o ../../hash.o +all: speed stringspeed + +speed: speed.o hash.o speed.o: speed.c ../htable.h ../htable.c + +hash.o: ../../hash/hash.c + $(CC) $(CFLAGS) -c -o $@ $< + +stringspeed: stringspeed.o hash.o ../../talloc.o ../../str_talloc.o ../../grab_file.o ../../str.o ../../time.o ../../noerr.o + +stringspeed.o: speed.c ../htable.h ../htable.c + +clean: + rm -f stringspeed speed diff --git a/lib/ccan/htable/tools/stringspeed.c b/lib/ccan/htable/tools/stringspeed.c new file mode 100644 index 0000000000..00ad2790cd --- /dev/null +++ b/lib/ccan/htable/tools/stringspeed.c @@ -0,0 +1,246 @@ +/* Simple speed tests for a hash of strings. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static size_t hashcount; + +static const char *strkey(const char *str) +{ + return str; +} + +static size_t hash_str(const char *key) +{ + hashcount++; + return hash(key, strlen(key), 0); +} + +static bool cmp(const char *obj, const char *key) +{ + return strcmp(obj, key) == 0; +} + +HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, str); + +/* Nanoseconds per operation */ +static size_t normalize(const struct timeval *start, + const struct timeval *stop, + unsigned int num) +{ + struct timeval diff; + + timersub(stop, start, &diff); + + /* Floating point is more accurate here. */ + return (double)(diff.tv_sec * 1000000 + diff.tv_usec) + / num * 1000; +} + +int main(int argc, char *argv[]) +{ + size_t i, j, num; + struct timeval start, stop; + struct htable_str *ht; + char **words, **misswords; + + words = strsplit(NULL, grab_file(NULL, + argv[1] ? argv[1] : "/usr/share/dict/words", + NULL), "\n"); + ht = htable_str_new(); + num = talloc_array_length(words) - 1; + printf("%zu words\n", num); + + /* Append and prepend last char for miss testing. */ + misswords = talloc_array(words, char *, num); + for (i = 0; i < num; i++) { + char lastc; + if (strlen(words[i])) + lastc = words[i][strlen(words[i])-1]; + else + lastc = 'z'; + misswords[i] = talloc_asprintf(misswords, "%c%s%c%c", + lastc, words[i], lastc, lastc); + } + + printf("#01: Initial insert: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + htable_str_add(ht, words[i]); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Bytes allocated: %zu\n", + sizeof(((struct htable *)ht)->table[0]) + << ((struct htable *)ht)->bits); + + printf("#02: Initial lookup (match): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + if (htable_str_get(ht, words[i]) != words[i]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#03: Initial lookup (miss): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) { + if (htable_str_get(ht, misswords[i])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Lookups in order are very cache-friendly for judy; try random */ + printf("#04: Initial lookup (random): "); + fflush(stdout); + start = time_now(); + for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) + if (htable_str_get(ht, words[j]) != words[j]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("#05: Initial delete all: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + if (!htable_str_del(ht, words[i])) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#06: Initial re-inserting: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + htable_str_add(ht, words[i]); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("#07: Deleting first half: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i+=2) + if (!htable_str_del(ht, words[i])) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#08: Adding (a different) half: "); + fflush(stdout); + + start = time_now(); + for (i = 0; i < num; i+=2) + htable_str_add(ht, misswords[i]); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#09: Lookup after half-change (match): "); + fflush(stdout); + start = time_now(); + for (i = 1; i < num; i+=2) + if (htable_str_get(ht, words[i]) != words[i]) + abort(); + for (i = 0; i < num; i+=2) { + if (htable_str_get(ht, misswords[i]) != misswords[i]) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#10: Lookup after half-change (miss): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i+=2) + if (htable_str_get(ht, words[i])) + abort(); + for (i = 1; i < num; i+=2) { + if (htable_str_get(ht, misswords[i])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Hashtables with delete markers can fill with markers over time. + * so do some changes to see how it operates in long-term. */ + printf("#11: Churn 1: "); + start = time_now(); + for (j = 0; j < num; j+=2) { + if (!htable_str_del(ht, misswords[j])) + abort(); + if (!htable_str_add(ht, words[j])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#12: Churn 2: "); + start = time_now(); + for (j = 1; j < num; j+=2) { + if (!htable_str_del(ht, words[j])) + abort(); + if (!htable_str_add(ht, misswords[j])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#13: Churn 3: "); + start = time_now(); + for (j = 1; j < num; j+=2) { + if (!htable_str_del(ht, misswords[j])) + abort(); + if (!htable_str_add(ht, words[j])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Now it's back to normal... */ + printf("#14: Post-Churn lookup (match): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + if (htable_str_get(ht, words[i]) != words[i]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#15: Post-Churn lookup (miss): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) { + if (htable_str_get(ht, misswords[i])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Lookups in order are very cache-friendly for judy; try random */ + printf("#16: Post-Churn lookup (random): "); + fflush(stdout); + start = time_now(); + for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) + if (htable_str_get(ht, words[j]) != words[j]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + return 0; +} -- cgit