diff options
Diffstat (limited to 'lib/ntdb')
36 files changed, 1096 insertions, 1662 deletions
diff --git a/lib/ntdb/check.c b/lib/ntdb/check.c index be27003a51..2790c68eaf 100644 --- a/lib/ntdb/check.c +++ b/lib/ntdb/check.c @@ -38,8 +38,10 @@ static bool append(struct ntdb_context *ntdb, return true; } -static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, ntdb_off_t *recovery, - uint64_t *features, size_t *num_capabilities) +static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, + ntdb_off_t *recovery, + uint64_t *features, + size_t *num_capabilities) { uint64_t hash_test; struct ntdb_header hdr; @@ -112,374 +114,227 @@ static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, ntdb_off_t *recov return NTDB_SUCCESS; } -static enum NTDB_ERROR check_hash_tree(struct ntdb_context *ntdb, - ntdb_off_t off, unsigned int group_bits, - uint64_t hprefix, - unsigned hprefix_bits, - ntdb_off_t used[], - size_t num_used, - size_t *num_found, - enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, void *), - void *data); +static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b) +{ + /* Can overflow an int. */ + return *a > *b ? 1 + : *a < *b ? -1 + : 0; +} -static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb, - ntdb_off_t off, - uint64_t hash, - ntdb_off_t used[], - size_t num_used, - size_t *num_found, - enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, - void *), - void *data) +static enum NTDB_ERROR check_entry(struct ntdb_context *ntdb, + ntdb_off_t off_and_hash, + ntdb_len_t bucket, + ntdb_off_t used[], + size_t num_used, + size_t *num_found, + enum NTDB_ERROR (*check)(NTDB_DATA, + NTDB_DATA, + void *), + void *data) { - struct ntdb_used_record rec; enum NTDB_ERROR ecode; - - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - return ecode; + const struct ntdb_used_record *r; + const unsigned char *kptr; + ntdb_len_t klen, dlen; + uint32_t hash; + ntdb_off_t off = off_and_hash & NTDB_OFF_MASK; + ntdb_off_t *p; + + /* Empty bucket is fine. */ + if (!off_and_hash) { + return NTDB_SUCCESS; } - if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) { + /* This can't point to a chain, we handled those at toplevel. */ + if (off_and_hash & (1ULL << NTDB_OFF_CHAIN_BIT)) { return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash chain magic %llu", - (long long)rec_magic(&rec)); + "ntdb_check: Invalid chain bit in offset " + " %llu", (long long)off_and_hash); } - if (rec_data_length(&rec) != sizeof(struct ntdb_chain)) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check:" - " Bad hash chain length %llu vs %zu", - (long long)rec_data_length(&rec), - sizeof(struct ntdb_chain)); - } - if (rec_key_length(&rec) != 0) { + p = asearch(&off, used, num_used, off_cmp); + if (!p) { return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash chain key length %llu", - (long long)rec_key_length(&rec)); - } - if (rec_hash(&rec) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash chain hash value %llu", - (long long)rec_hash(&rec)); + "ntdb_check: Invalid offset" + " %llu in hash", (long long)off); } + /* Mark it invalid. */ + *p ^= 1; + (*num_found)++; - off += sizeof(rec); - ecode = check_hash_tree(ntdb, off, 0, hash, 64, - used, num_used, num_found, check, data); - if (ecode != NTDB_SUCCESS) { - return ecode; + r = ntdb_access_read(ntdb, off, sizeof(*r), true); + if (NTDB_PTR_IS_ERR(r)) { + return NTDB_PTR_ERR(r); + } + klen = rec_key_length(r); + dlen = rec_data_length(r); + ntdb_access_release(ntdb, r); + + kptr = ntdb_access_read(ntdb, off + sizeof(*r), klen + dlen, false); + if (NTDB_PTR_IS_ERR(kptr)) { + return NTDB_PTR_ERR(kptr); + } + + hash = ntdb_hash(ntdb, kptr, klen); + + /* Are we in the right chain? */ + if (bits_from(hash, 0, ntdb->hash_bits) != bucket) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: Bad bucket %u vs %llu", + bits_from(hash, 0, ntdb->hash_bits), + (long long)bucket); + /* Next 8 bits should be the same as top bits of bucket. */ + } else if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL) + != bits_from(off_and_hash, 64-NTDB_OFF_UPPER_STEAL, + NTDB_OFF_UPPER_STEAL)) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "ntdb_check: Bad hash bits %llu vs %llu", + (long long)off_and_hash, + (long long)hash); + } else if (check) { + NTDB_DATA k, d; + + k = ntdb_mkdata(kptr, klen); + d = ntdb_mkdata(kptr + klen, dlen); + ecode = check(k, d, data); + } else { + ecode = NTDB_SUCCESS; } + ntdb_access_release(ntdb, kptr); - off = ntdb_read_off(ntdb, off + offsetof(struct ntdb_chain, next)); - if (NTDB_OFF_IS_ERR(off)) { - return NTDB_OFF_TO_ERR(off); - } - if (off == 0) - return NTDB_SUCCESS; - (*num_found)++; - return check_hash_chain(ntdb, off, hash, used, num_used, num_found, - check, data); + return ecode; } -static enum NTDB_ERROR check_hash_record(struct ntdb_context *ntdb, +static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb, ntdb_off_t off, - uint64_t hprefix, - unsigned hprefix_bits, + ntdb_len_t bucket, ntdb_off_t used[], size_t num_used, size_t *num_found, enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, - void *), + NTDB_DATA, + void *), void *data) { struct ntdb_used_record rec; enum NTDB_ERROR ecode; + const ntdb_off_t *entries; + ntdb_len_t i, num; - if (hprefix_bits >= 64) - return check_hash_chain(ntdb, off, hprefix, used, num_used, - num_found, check, data); + /* This is a used entry. */ + (*num_found)++; ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); if (ecode != NTDB_SUCCESS) { return ecode; } - if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) { + if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) { return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash table magic %llu", + "ntdb_check: Bad hash chain magic %llu", (long long)rec_magic(&rec)); } - if (rec_data_length(&rec) - != sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS) { + + if (rec_data_length(&rec) % sizeof(ntdb_off_t)) { return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check:" - " Bad hash table length %llu vs %llu", - (long long)rec_data_length(&rec), - (long long)sizeof(ntdb_off_t) - << NTDB_SUBLEVEL_HASH_BITS); + "ntdb_check: Bad hash chain data length %llu", + (long long)rec_data_length(&rec)); } + if (rec_key_length(&rec) != 0) { return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash table key length %llu", + "ntdb_check: Bad hash chain key length %llu", (long long)rec_key_length(&rec)); } - if (rec_hash(&rec) != 0) { - return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Bad hash table hash value %llu", - (long long)rec_hash(&rec)); - } off += sizeof(rec); - return check_hash_tree(ntdb, off, - NTDB_SUBLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS, - hprefix, hprefix_bits, - used, num_used, num_found, check, data); -} - -static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b) -{ - /* Can overflow an int. */ - return *a > *b ? 1 - : *a < *b ? -1 - : 0; -} + num = rec_data_length(&rec) / sizeof(ntdb_off_t); + entries = ntdb_access_read(ntdb, off, rec_data_length(&rec), true); + if (NTDB_PTR_IS_ERR(entries)) { + return NTDB_PTR_ERR(entries); + } -static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used) -{ - *used += num; + /* Check each non-deleted entry in chain. */ + for (i = 0; i < num; i++) { + ecode = check_entry(ntdb, entries[i], bucket, + used, num_used, num_found, check, data); + if (ecode) { + break; + } + } - return (h >> (64 - *used)) & ((1U << num) - 1); + ntdb_access_release(ntdb, entries); + return ecode; } -static enum NTDB_ERROR check_hash_tree(struct ntdb_context *ntdb, - ntdb_off_t off, unsigned int group_bits, - uint64_t hprefix, - unsigned hprefix_bits, - ntdb_off_t used[], - size_t num_used, - size_t *num_found, - enum NTDB_ERROR (*check)(NTDB_DATA, - NTDB_DATA, void *), - void *data) +static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb, + ntdb_off_t used[], + size_t num_used, + size_t num_other_used, + enum NTDB_ERROR (*check)(NTDB_DATA, + NTDB_DATA, + void *), + void *data) { - unsigned int g, b; - const ntdb_off_t *hash; - struct ntdb_used_record rec; enum NTDB_ERROR ecode; + struct ntdb_used_record rec; + const ntdb_off_t *entries; + ntdb_len_t i; + /* Free tables and capabilities also show up as used, as do we. */ + size_t num_found = num_other_used + 1; - hash = ntdb_access_read(ntdb, off, - sizeof(ntdb_off_t) - << (group_bits + NTDB_HASH_GROUP_BITS), - true); - if (NTDB_PTR_IS_ERR(hash)) { - return NTDB_PTR_ERR(hash); - } - - for (g = 0; g < (1 << group_bits); g++) { - const ntdb_off_t *group = hash + (g << NTDB_HASH_GROUP_BITS); - for (b = 0; b < (1 << NTDB_HASH_GROUP_BITS); b++) { - unsigned int bucket, i, used_bits; - uint64_t h; - ntdb_off_t *p; - if (group[b] == 0) - continue; - - off = group[b] & NTDB_OFF_MASK; - p = asearch(&off, used, num_used, off_cmp); - if (!p) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: Invalid offset" - " %llu in hash", - (long long)off); - goto fail; - } - /* Mark it invalid. */ - *p ^= 1; - (*num_found)++; - - if (hprefix_bits == 64) { - /* Chained entries are unordered. */ - if (is_subhash(group[b])) { - ecode = NTDB_ERR_CORRUPT; - ntdb_logerr(ntdb, ecode, - NTDB_LOG_ERROR, - "ntdb_check: Invalid chain" - " entry subhash"); - goto fail; - } - h = hash_record(ntdb, off); - if (h != hprefix) { - ecode = NTDB_ERR_CORRUPT; - ntdb_logerr(ntdb, ecode, - NTDB_LOG_ERROR, - "check: bad hash chain" - " placement" - " 0x%llx vs 0x%llx", - (long long)h, - (long long)hprefix); - goto fail; - } - ecode = ntdb_read_convert(ntdb, off, &rec, - sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - goto check; - } - - if (is_subhash(group[b])) { - uint64_t subprefix; - subprefix = (hprefix - << (group_bits + NTDB_HASH_GROUP_BITS)) - + g * (1 << NTDB_HASH_GROUP_BITS) + b; - - ecode = check_hash_record(ntdb, - group[b] & NTDB_OFF_MASK, - subprefix, - hprefix_bits - + group_bits - + NTDB_HASH_GROUP_BITS, - used, num_used, num_found, - check, data); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - continue; - } - /* A normal entry */ - - /* Does it belong here at all? */ - h = hash_record(ntdb, off); - used_bits = 0; - if (get_bits(h, hprefix_bits, &used_bits) != hprefix - && hprefix_bits) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "check: bad hash placement" - " 0x%llx vs 0x%llx", - (long long)h, - (long long)hprefix); - goto fail; - } - - /* Does it belong in this group? */ - if (get_bits(h, group_bits, &used_bits) != g) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "check: bad group %llu" - " vs %u", - (long long)h, g); - goto fail; - } - - /* Are bucket bits correct? */ - bucket = group[b] & NTDB_OFF_HASH_GROUP_MASK; - if (get_bits(h, NTDB_HASH_GROUP_BITS, &used_bits) - != bucket) { - used_bits -= NTDB_HASH_GROUP_BITS; - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "check: bad bucket %u vs %u", - (unsigned)get_bits(h, - NTDB_HASH_GROUP_BITS, - &used_bits), - bucket); - goto fail; - } + ecode = ntdb_read_convert(ntdb, NTDB_HASH_OFFSET, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } - /* There must not be any zero entries between - * the bucket it belongs in and this one! */ - for (i = bucket; - i != b; - i = (i + 1) % (1 << NTDB_HASH_GROUP_BITS)) { - if (group[i] == 0) { - ecode = NTDB_ERR_CORRUPT; - ntdb_logerr(ntdb, ecode, - NTDB_LOG_ERROR, - "check: bad group placement" - " %u vs %u", - b, bucket); - goto fail; - } - } + if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash table magic %llu", + (long long)rec_magic(&rec)); + } - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - goto fail; - } + if (rec_data_length(&rec) != (sizeof(ntdb_off_t) << ntdb->hash_bits)) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash table data length %llu", + (long long)rec_data_length(&rec)); + } - /* Bottom bits must match header. */ - if ((h & ((1 << 11)-1)) != rec_hash(&rec)) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "ntdb_check: Bad hash magic" - " at offset %llu" - " (0x%llx vs 0x%llx)", - (long long)off, - (long long)h, - (long long)rec_hash(&rec)); - goto fail; - } + if (rec_key_length(&rec) != 0) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Bad hash table key length %llu", + (long long)rec_key_length(&rec)); + } - check: - if (check) { - NTDB_DATA k, d; - const unsigned char *kptr; - - kptr = ntdb_access_read(ntdb, - off + sizeof(rec), - rec_key_length(&rec) - + rec_data_length(&rec), - false); - if (NTDB_PTR_IS_ERR(kptr)) { - ecode = NTDB_PTR_ERR(kptr); - goto fail; - } + entries = ntdb_access_read(ntdb, NTDB_HASH_OFFSET + sizeof(rec), + rec_data_length(&rec), true); + if (NTDB_PTR_IS_ERR(entries)) { + return NTDB_PTR_ERR(entries); + } - k = ntdb_mkdata(kptr, rec_key_length(&rec)); - d = ntdb_mkdata(kptr + k.dsize, - rec_data_length(&rec)); - ecode = check(k, d, data); - ntdb_access_release(ntdb, kptr); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - } + for (i = 0; i < (1 << ntdb->hash_bits); i++) { + ntdb_off_t off = entries[i] & NTDB_OFF_MASK; + if (entries[i] & (1ULL << NTDB_OFF_CHAIN_BIT)) { + ecode = check_hash_chain(ntdb, off, i, + used, num_used, &num_found, + check, data); + } else { + ecode = check_entry(ntdb, entries[i], i, + used, num_used, &num_found, + check, data); + } + if (ecode) { + break; } } - ntdb_access_release(ntdb, hash); - return NTDB_SUCCESS; - -fail: - ntdb_access_release(ntdb, hash); - return ecode; -} + ntdb_access_release(ntdb, entries); -static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb, - ntdb_off_t used[], - size_t num_used, size_t num_other_used, - enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *), - void *data) -{ - /* Free tables and capabilities also show up as used. */ - size_t num_found = num_other_used; - enum NTDB_ERROR ecode; - - ecode = check_hash_tree(ntdb, offsetof(struct ntdb_header, hashtable), - NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS, - 0, 0, used, num_used, &num_found, - check, data); - if (ecode == NTDB_SUCCESS) { - if (num_found != num_used) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, - "ntdb_check: Not all entries" - " are in hash"); - } + if (ecode == NTDB_SUCCESS && num_found != num_used) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, + "ntdb_check: Not all entries are in hash"); } return ecode; } @@ -547,8 +402,7 @@ static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb, if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC || rec_key_length(&ft.hdr) != 0 - || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr) - || rec_hash(&ft.hdr) != 0) { + || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)) { return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR, "ntdb_check: Invalid header on free table"); } diff --git a/lib/ntdb/free.c b/lib/ntdb/free.c index 3e31937382..971436f5a3 100644 --- a/lib/ntdb/free.c +++ b/lib/ntdb/free.c @@ -19,7 +19,6 @@ #include <ccan/likely/likely.h> #include <ccan/ilog/ilog.h> #include <time.h> -#include <assert.h> #include <limits.h> static unsigned fls64(uint64_t val) @@ -646,8 +645,7 @@ static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb, ntdb_off_t bucket, size_t keylen, size_t datalen, bool want_extra, - unsigned magic, - unsigned hashlow) + unsigned magic) { ntdb_off_t off, b_off,best_off; struct ntdb_free_record best = { 0 }; @@ -741,7 +739,7 @@ static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb, /* We need to mark non-free before we drop lock, otherwise * coalesce() could try to merge it! */ ecode = set_header(ntdb, &rec, magic, keylen, datalen, - frec_len(&best) - leftover, hashlow); + frec_len(&best) - leftover); if (ecode != NTDB_SUCCESS) { goto unlock_err; } @@ -788,7 +786,7 @@ unlock_err: /* Get a free block from current free list, or 0 if none, -ve on error. */ static ntdb_off_t get_free(struct ntdb_context *ntdb, size_t keylen, size_t datalen, bool want_extra, - unsigned magic, unsigned hashlow) + unsigned magic) { ntdb_off_t off, ftable_off; ntdb_off_t start_b, b, ftable; @@ -811,7 +809,7 @@ static ntdb_off_t get_free(struct ntdb_context *ntdb, /* Try getting one from list. */ off = lock_and_alloc(ntdb, ftable_off, b, keylen, datalen, want_extra, - magic, hashlow); + magic); if (NTDB_OFF_IS_ERR(off)) return off; if (off != 0) { @@ -854,13 +852,11 @@ static ntdb_off_t get_free(struct ntdb_context *ntdb, enum NTDB_ERROR set_header(struct ntdb_context *ntdb, struct ntdb_used_record *rec, unsigned magic, uint64_t keylen, uint64_t datalen, - uint64_t actuallen, unsigned hashlow) + uint64_t actuallen) { uint64_t keybits = (fls64(keylen) + 1) / 2; - /* Use bottom bits of hash, so it's independent of hash table size. */ - rec->magic_and_meta = (hashlow & ((1 << 11)-1)) - | ((actuallen - (keylen + datalen)) << 11) + rec->magic_and_meta = ((actuallen - (keylen + datalen)) << 11) | (keybits << 43) | ((uint64_t)magic << 48); rec->key_and_data_len = (keylen | (datalen << (keybits*2))); @@ -958,7 +954,7 @@ static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size) /* This won't fail: it will expand the database if it has to. */ ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, - uint64_t hash, unsigned magic, bool growing) + unsigned magic, bool growing) { ntdb_off_t off; @@ -967,7 +963,7 @@ ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, for (;;) { enum NTDB_ERROR ecode; - off = get_free(ntdb, keylen, datalen, growing, magic, hash); + off = get_free(ntdb, keylen, datalen, growing, magic); if (likely(off != 0)) break; diff --git a/lib/ntdb/hash.c b/lib/ntdb/hash.c index e87705b123..69192a119b 100644 --- a/lib/ntdb/hash.c +++ b/lib/ntdb/hash.c @@ -17,68 +17,23 @@ */ #include "private.h" #include <ccan/hash/hash.h> -#include <assert.h> /* Default hash function. */ -uint64_t ntdb_jenkins_hash(const void *key, size_t length, uint64_t seed, +uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed, void *unused) { - uint64_t ret; - /* hash64_stable assumes lower bits are more important; they are a - * slightly better hash. We use the upper bits first, so swap them. */ - ret = hash64_stable((const unsigned char *)key, length, seed); - return (ret >> 32) | (ret << 32); + return hash_stable((const unsigned char *)key, length, seed); } -uint64_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len) +uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len) { return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data); } -uint64_t hash_record(struct ntdb_context *ntdb, ntdb_off_t off) -{ - const struct ntdb_used_record *r; - const void *key; - uint64_t klen, hash; - - r = ntdb_access_read(ntdb, off, sizeof(*r), true); - if (NTDB_PTR_IS_ERR(r)) { - /* FIXME */ - return 0; - } - - klen = rec_key_length(r); - ntdb_access_release(ntdb, r); - - key = ntdb_access_read(ntdb, off + sizeof(*r), klen, false); - if (NTDB_PTR_IS_ERR(key)) { - return 0; - } - - hash = ntdb_hash(ntdb, key, klen); - ntdb_access_release(ntdb, key); - return hash; -} - -/* Get bits from a value. */ -static uint32_t bits_from(uint64_t val, unsigned start, unsigned num) -{ - assert(num <= 32); - return (val >> start) & ((1U << num) - 1); -} - -/* We take bits from the top: that way we can lock whole sections of the hash - * by using lock ranges. */ -static uint32_t use_bits(struct hash_info *h, unsigned num) -{ - h->hash_used += num; - return bits_from(h->h, 64 - h->hash_used, num); -} - static ntdb_bool_err key_matches(struct ntdb_context *ntdb, - const struct ntdb_used_record *rec, - ntdb_off_t off, - const NTDB_DATA *key) + const struct ntdb_used_record *rec, + ntdb_off_t off, + const NTDB_DATA *key) { ntdb_bool_err ret = false; const char *rkey; @@ -102,792 +57,577 @@ static ntdb_bool_err key_matches(struct ntdb_context *ntdb, /* Does entry match? */ static ntdb_bool_err match(struct ntdb_context *ntdb, - struct hash_info *h, - const NTDB_DATA *key, - ntdb_off_t val, - struct ntdb_used_record *rec) + uint32_t hash, + const NTDB_DATA *key, + ntdb_off_t val, + struct ntdb_used_record *rec, + const ntdb_off_t **mapped) { ntdb_off_t off; enum NTDB_ERROR ecode; ntdb->stats.compares++; - /* Desired bucket must match. */ - if (h->home_bucket != (val & NTDB_OFF_HASH_GROUP_MASK)) { - ntdb->stats.compare_wrong_bucket++; - return false; - } /* Top bits of offset == next bits of hash. */ - if (bits_from(val, NTDB_OFF_HASH_EXTRA_BIT, NTDB_OFF_UPPER_STEAL_EXTRA) - != bits_from(h->h, 64 - h->hash_used - NTDB_OFF_UPPER_STEAL_EXTRA, - NTDB_OFF_UPPER_STEAL_EXTRA)) { + if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL) + != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) { ntdb->stats.compare_wrong_offsetbits++; return false; } + /* Unmap before we try to read actual record, which may cause expand */ + if (mapped) { + ntdb_access_release(ntdb, *mapped); + *mapped = NULL; + } + off = val & NTDB_OFF_MASK; ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec)); if (ecode != NTDB_SUCCESS) { return (ntdb_bool_err)ecode; } - if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) { - ntdb->stats.compare_wrong_rechash++; - return false; - } - return key_matches(ntdb, rec, off, key); } -static ntdb_off_t hbucket_off(ntdb_off_t group_start, unsigned bucket) -{ - return group_start - + (bucket % (1 << NTDB_HASH_GROUP_BITS)) * sizeof(ntdb_off_t); -} - -bool is_subhash(ntdb_off_t val) +static bool is_chain(ntdb_off_t val) { - return (val >> NTDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1; + return val & (1ULL << NTDB_OFF_CHAIN_BIT); } -/* FIXME: Guess the depth, don't over-lock! */ -static ntdb_off_t hlock_range(ntdb_off_t group, ntdb_off_t *size) +static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx) { - *size = 1ULL << (64 - (NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS)); - return group << (64 - (NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS)); -} - -static ntdb_off_t COLD find_in_chain(struct ntdb_context *ntdb, - NTDB_DATA key, - ntdb_off_t chain, - struct hash_info *h, - struct ntdb_used_record *rec, - struct traverse_info *tinfo) -{ - ntdb_off_t off, next; - enum NTDB_ERROR ecode; - - /* In case nothing is free, we set these to zero. */ - h->home_bucket = h->found_bucket = 0; - - for (off = chain; off; off = next) { - unsigned int i; - - h->group_start = off; - ecode = ntdb_read_convert(ntdb, off, h->group, sizeof(h->group)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) { - ntdb_off_t recoff; - if (!h->group[i]) { - /* Remember this empty bucket. */ - h->home_bucket = h->found_bucket = i; - continue; - } - - /* We can insert extra bits via add_to_hash - * empty bucket logic. */ - recoff = h->group[i] & NTDB_OFF_MASK; - ecode = ntdb_read_convert(ntdb, recoff, rec, - sizeof(*rec)); - if (ecode != NTDB_SUCCESS) { - return NTDB_ERR_TO_OFF(ecode); - } - - ecode = NTDB_OFF_TO_ERR(key_matches(ntdb, rec, recoff, - &key)); - if (ecode < 0) { - return NTDB_ERR_TO_OFF(ecode); - } - if (ecode == (enum NTDB_ERROR)1) { - h->home_bucket = h->found_bucket = i; - - if (tinfo) { - tinfo->levels[tinfo->num_levels] - .hashtable = off; - tinfo->levels[tinfo->num_levels] - .total_buckets - = 1 << NTDB_HASH_GROUP_BITS; - tinfo->levels[tinfo->num_levels].entry - = i; - tinfo->num_levels++; - } - return recoff; - } - } - next = ntdb_read_off(ntdb, off - + offsetof(struct ntdb_chain, next)); - if (NTDB_OFF_IS_ERR(next)) { - return next; - } - if (next) - next += sizeof(struct ntdb_used_record); - } - return 0; + return base + sizeof(struct ntdb_used_record) + + idx * sizeof(ntdb_off_t); } /* This is the core routine which searches the hashtable for an entry. * On error, no locks are held and -ve is returned. - * Otherwise, hinfo is filled in (and the optional tinfo). + * Otherwise, hinfo is filled in. * If not found, the return value is 0. * If found, the return value is the offset, and *rec is the record. */ ntdb_off_t find_and_lock(struct ntdb_context *ntdb, NTDB_DATA key, int ltype, struct hash_info *h, - struct ntdb_used_record *rec, - struct traverse_info *tinfo) + struct ntdb_used_record *rec) { - uint32_t i, group; - ntdb_off_t hashtable; + ntdb_off_t off, val; + const ntdb_off_t *arr = NULL; + ntdb_len_t i; + bool found_empty; enum NTDB_ERROR ecode; + struct ntdb_used_record chdr; + ntdb_bool_err berr; h->h = ntdb_hash(ntdb, key.dptr, key.dsize); - h->hash_used = 0; - group = use_bits(h, NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS); - h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS); - h->hlock_start = hlock_range(group, &h->hlock_range); - ecode = ntdb_lock_hashes(ntdb, h->hlock_start, h->hlock_range, ltype, - NTDB_LOCK_WAIT); + h->table = NTDB_HASH_OFFSET; + h->table_size = 1 << ntdb->hash_bits; + h->bucket = bits_from(h->h, 0, ntdb->hash_bits); + h->old_val = 0; + + ecode = ntdb_lock_hash(ntdb, h->bucket, ltype); if (ecode != NTDB_SUCCESS) { return NTDB_ERR_TO_OFF(ecode); } - hashtable = offsetof(struct ntdb_header, hashtable); - if (tinfo) { - tinfo->toplevel_group = group; - tinfo->num_levels = 1; - tinfo->levels[0].entry = 0; - tinfo->levels[0].hashtable = hashtable - + (group << NTDB_HASH_GROUP_BITS) * sizeof(ntdb_off_t); - tinfo->levels[0].total_buckets = 1 << NTDB_HASH_GROUP_BITS; + off = hbucket_off(h->table, h->bucket); + val = ntdb_read_off(ntdb, off); + if (NTDB_OFF_IS_ERR(val)) { + ecode = NTDB_OFF_TO_ERR(val); + goto fail; } - while (h->hash_used <= 64) { - /* Read in the hash group. */ - h->group_start = hashtable - + group * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS); - - ecode = ntdb_read_convert(ntdb, h->group_start, &h->group, - sizeof(h->group)); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - - /* Pointer to another hash table? Go down... */ - if (is_subhash(h->group[h->home_bucket])) { - hashtable = (h->group[h->home_bucket] & NTDB_OFF_MASK) - + sizeof(struct ntdb_used_record); - if (tinfo) { - /* When we come back, use *next* bucket */ - tinfo->levels[tinfo->num_levels-1].entry - += h->home_bucket + 1; + /* Directly in hash table? */ + if (!is_chain(val)) { + if (val) { + berr = match(ntdb, h->h, &key, val, rec, NULL); + if (berr < 0) { + ecode = NTDB_OFF_TO_ERR(berr); + goto fail; } - group = use_bits(h, NTDB_SUBLEVEL_HASH_BITS - - NTDB_HASH_GROUP_BITS); - h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS); - if (tinfo) { - tinfo->levels[tinfo->num_levels].hashtable - = hashtable; - tinfo->levels[tinfo->num_levels].total_buckets - = 1 << NTDB_SUBLEVEL_HASH_BITS; - tinfo->levels[tinfo->num_levels].entry - = group << NTDB_HASH_GROUP_BITS; - tinfo->num_levels++; + if (berr) { + return val & NTDB_OFF_MASK; } - continue; + /* If you want to insert here, make a chain. */ + h->old_val = val; } + return 0; + } - /* It's in this group: search (until 0 or all searched) */ - for (i = 0, h->found_bucket = h->home_bucket; - i < (1 << NTDB_HASH_GROUP_BITS); - i++, h->found_bucket = ((h->found_bucket+1) - % (1 << NTDB_HASH_GROUP_BITS))) { - ntdb_bool_err berr; - if (is_subhash(h->group[h->found_bucket])) - continue; + /* Nope? Iterate through chain. */ + h->table = val & NTDB_OFF_MASK; - if (!h->group[h->found_bucket]) - break; + ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + goto fail; + } + + if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "find_and_lock:" + " corrupt record %#x at %llu", + rec_magic(&chdr), (long long)off); + goto fail; + } - berr = match(ntdb, h, &key, h->group[h->found_bucket], - rec); + h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t); + + found_empty = false; + for (i = 0; i < h->table_size; i++) { + /* Careful! match has to unmap this if we access a + * record (may cause mmap of database to move. */ + if (!arr) { + arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0), + rec_data_length(&chdr), true); + if (NTDB_PTR_IS_ERR(arr)) { + ecode = NTDB_PTR_ERR(arr); + goto fail; + } + } + + val = arr[i]; + if (val == 0) { + if (!found_empty) { + h->bucket = i; + found_empty = true; + } + } else { + berr = match(ntdb, h->h, &key, val, rec, &arr); if (berr < 0) { ecode = NTDB_OFF_TO_ERR(berr); + if (arr) { + ntdb_access_release(ntdb, arr); + } goto fail; } if (berr) { - if (tinfo) { - tinfo->levels[tinfo->num_levels-1].entry - += h->found_bucket; + /* We found it! */ + h->bucket = i; + off = val & NTDB_OFF_MASK; + if (arr) { + ntdb_access_release(ntdb, arr); } - return h->group[h->found_bucket] & NTDB_OFF_MASK; + return off; } } - /* Didn't find it: h indicates where it would go. */ - return 0; + } + if (!found_empty) { + /* Set to any non-zero value */ + h->old_val = 1; + h->bucket = i; } - return find_in_chain(ntdb, key, hashtable, h, rec, tinfo); + if (arr) { + ntdb_access_release(ntdb, arr); + } + return 0; fail: - ntdb_unlock_hashes(ntdb, h->hlock_start, h->hlock_range, ltype); + ntdb_unlock_hash(ntdb, h->bucket, ltype); return NTDB_ERR_TO_OFF(ecode); } -/* I wrote a simple test, expanding a hash to 2GB, for the following - * cases: - * 1) Expanding all the buckets at once, - * 2) Expanding the bucket we wanted to place the new entry into. - * 3) Expanding the most-populated bucket, - * - * I measured the worst/average/best density during this process. - * 1) 3%/16%/30% - * 2) 4%/20%/38% - * 3) 6%/22%/41% - * - * So we figure out the busiest bucket for the moment. - */ -static unsigned fullest_bucket(struct ntdb_context *ntdb, - const ntdb_off_t *group, - unsigned new_bucket) -{ - unsigned counts[1 << NTDB_HASH_GROUP_BITS] = { 0 }; - unsigned int i, best_bucket; - - /* Count the new entry. */ - counts[new_bucket]++; - best_bucket = new_bucket; - - for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) { - unsigned this_bucket; - - if (is_subhash(group[i])) - continue; - this_bucket = group[i] & NTDB_OFF_HASH_GROUP_MASK; - if (++counts[this_bucket] > counts[best_bucket]) - best_bucket = this_bucket; - } - - return best_bucket; -} - -static bool put_into_group(ntdb_off_t *group, - unsigned bucket, ntdb_off_t encoded) +static ntdb_off_t encode_offset(const struct ntdb_context *ntdb, + ntdb_off_t new_off, uint32_t hash) { - unsigned int i; + ntdb_off_t extra; - for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) { - unsigned b = (bucket + i) % (1 << NTDB_HASH_GROUP_BITS); + assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0); + assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0); + /* We pack extra hash bits into the upper bits of the offset. */ + extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL); + extra <<= (64 - NTDB_OFF_UPPER_STEAL); - if (group[b] == 0) { - group[b] = encoded; - return true; - } - } - return false; + return new_off | extra; } -static void force_into_group(ntdb_off_t *group, - unsigned bucket, ntdb_off_t encoded) +/* Simply overwrite the hash entry we found before. */ +enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, + const struct hash_info *h, + ntdb_off_t new_off) { - if (!put_into_group(group, bucket, encoded)) - abort(); + return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), + encode_offset(ntdb, new_off, h->h)); } -static ntdb_off_t encode_offset(ntdb_off_t new_off, struct hash_info *h) +enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, + const struct hash_info *h) { - return h->home_bucket - | new_off - | ((uint64_t)bits_from(h->h, - 64 - h->hash_used - NTDB_OFF_UPPER_STEAL_EXTRA, - NTDB_OFF_UPPER_STEAL_EXTRA) - << NTDB_OFF_HASH_EXTRA_BIT); + return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0); } -/* Simply overwrite the hash entry we found before. */ -enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, - struct hash_info *h, - ntdb_off_t new_off) -{ - return ntdb_write_off(ntdb, hbucket_off(h->group_start, h->found_bucket), - encode_offset(new_off, h)); -} -/* We slot in anywhere that's empty in the chain. */ -static enum NTDB_ERROR COLD add_to_chain(struct ntdb_context *ntdb, - ntdb_off_t subhash, - ntdb_off_t new_off) +enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, + const struct hash_info *h, + ntdb_off_t new_off) { - ntdb_off_t entry; enum NTDB_ERROR ecode; + ntdb_off_t chain; + struct ntdb_used_record chdr; + const ntdb_off_t *old; + ntdb_off_t *new; - entry = ntdb_find_zero_off(ntdb, subhash, 1<<NTDB_HASH_GROUP_BITS); - if (NTDB_OFF_IS_ERR(entry)) { - return NTDB_OFF_TO_ERR(entry); + /* We hit an empty bucket during search? That's where it goes. */ + if (!h->old_val) { + return replace_in_hash(ntdb, h, new_off); } - if (entry == 1 << NTDB_HASH_GROUP_BITS) { - ntdb_off_t next; + /* Full at top-level? Create a 2-element chain. */ + if (h->table == NTDB_HASH_OFFSET) { + ntdb_off_t pair[2]; - next = ntdb_read_off(ntdb, subhash - + offsetof(struct ntdb_chain, next)); - if (NTDB_OFF_IS_ERR(next)) { - return NTDB_OFF_TO_ERR(next); - } + /* One element is old value, the other is the new value. */ + pair[0] = h->old_val; + pair[1] = encode_offset(ntdb, new_off, h->h); - if (!next) { - next = alloc(ntdb, 0, sizeof(struct ntdb_chain), 0, - NTDB_CHAIN_MAGIC, false); - if (NTDB_OFF_IS_ERR(next)) - return NTDB_OFF_TO_ERR(next); - ecode = zero_out(ntdb, - next+sizeof(struct ntdb_used_record), - sizeof(struct ntdb_chain)); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - ecode = ntdb_write_off(ntdb, subhash - + offsetof(struct ntdb_chain, - next), - next); - if (ecode != NTDB_SUCCESS) { - return ecode; - } + chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true); + if (NTDB_OFF_IS_ERR(chain)) { + return NTDB_OFF_TO_ERR(chain); } - return add_to_chain(ntdb, next, new_off); + ecode = ntdb_write_convert(ntdb, + chain + + sizeof(struct ntdb_used_record), + pair, sizeof(pair)); + if (ecode == NTDB_SUCCESS) { + ecode = ntdb_write_off(ntdb, + hbucket_off(h->table, h->bucket), + chain + | (1ULL << NTDB_OFF_CHAIN_BIT)); + } + return ecode; } - return ntdb_write_off(ntdb, subhash + entry * sizeof(ntdb_off_t), - new_off); -} - -/* Add into a newly created subhash. */ -static enum NTDB_ERROR add_to_subhash(struct ntdb_context *ntdb, ntdb_off_t subhash, - unsigned hash_used, ntdb_off_t val) -{ - ntdb_off_t off = (val & NTDB_OFF_MASK), *group; - struct hash_info h; - unsigned int gnum; - - h.hash_used = hash_used; - - if (hash_used + NTDB_SUBLEVEL_HASH_BITS > 64) - return add_to_chain(ntdb, subhash, off); - - h.h = hash_record(ntdb, off); - gnum = use_bits(&h, NTDB_SUBLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS); - h.group_start = subhash - + gnum * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS); - h.home_bucket = use_bits(&h, NTDB_HASH_GROUP_BITS); - - group = ntdb_access_write(ntdb, h.group_start, - sizeof(*group) << NTDB_HASH_GROUP_BITS, true); - if (NTDB_PTR_IS_ERR(group)) { - return NTDB_PTR_ERR(group); + /* Full bucket. Expand. */ + ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; } - force_into_group(group, h.home_bucket, encode_offset(off, &h)); - return ntdb_access_commit(ntdb, group); -} -static enum NTDB_ERROR expand_group(struct ntdb_context *ntdb, struct hash_info *h) -{ - unsigned bucket, num_vals, i, magic; - size_t subsize; - ntdb_off_t subhash; - ntdb_off_t vals[1 << NTDB_HASH_GROUP_BITS]; - enum NTDB_ERROR ecode; + if (rec_extra_padding(&chdr) >= sizeof(new_off)) { + /* Expand in place. */ + uint64_t dlen = rec_data_length(&chdr); - /* Attach new empty subhash under fullest bucket. */ - bucket = fullest_bucket(ntdb, h->group, h->home_bucket); + ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0, + dlen + sizeof(new_off), + dlen + rec_extra_padding(&chdr)); - if (h->hash_used == 64) { - ntdb->stats.alloc_chain++; - subsize = sizeof(struct ntdb_chain); - magic = NTDB_CHAIN_MAGIC; - } else { - ntdb->stats.alloc_subhash++; - subsize = (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS); - magic = NTDB_HTABLE_MAGIC; + if (ecode != NTDB_SUCCESS) { + return ecode; + } + /* find_and_lock set up h to point to last bucket. */ + ecode = replace_in_hash(ntdb, h, new_off); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + /* For futureproofing, we always make the first byte of padding + * a zero. */ + if (rec_extra_padding(&chdr)) { + ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr) + + dlen + sizeof(new_off), + "", 1); + } + return ecode; } - subhash = alloc(ntdb, 0, subsize, 0, magic, false); - if (NTDB_OFF_IS_ERR(subhash)) { - return NTDB_OFF_TO_ERR(subhash); + /* We need to reallocate the chain. */ + chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t), + NTDB_CHAIN_MAGIC, true); + if (NTDB_OFF_IS_ERR(chain)) { + return NTDB_OFF_TO_ERR(chain); } - ecode = zero_out(ntdb, subhash + sizeof(struct ntdb_used_record), - subsize); - if (ecode != NTDB_SUCCESS) { - return ecode; + /* Map both and copy across old buckets. */ + old = ntdb_access_read(ntdb, hbucket_off(h->table, 0), + h->table_size*sizeof(ntdb_off_t), true); + if (NTDB_PTR_IS_ERR(old)) { + return NTDB_PTR_ERR(old); } - - /* Remove any which are destined for bucket or are in wrong place. */ - num_vals = 0; - for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) { - unsigned home_bucket = h->group[i] & NTDB_OFF_HASH_GROUP_MASK; - if (!h->group[i] || is_subhash(h->group[i])) - continue; - if (home_bucket == bucket || home_bucket != i) { - vals[num_vals++] = h->group[i]; - h->group[i] = 0; - } + new = ntdb_access_write(ntdb, hbucket_off(chain, 0), + (h->table_size + 1)*sizeof(ntdb_off_t), true); + if (NTDB_PTR_IS_ERR(new)) { + ntdb_access_release(ntdb, old); + return NTDB_PTR_ERR(new); } - /* FIXME: This assert is valid, but we do this during unit test :( */ - /* assert(num_vals); */ - - /* Overwrite expanded bucket with subhash pointer. */ - h->group[bucket] = subhash | (1ULL << NTDB_OFF_UPPER_STEAL_SUBHASH_BIT); - /* Point to actual contents of record. */ - subhash += sizeof(struct ntdb_used_record); + memcpy(new, old, h->bucket * sizeof(ntdb_off_t)); + new[h->bucket] = encode_offset(ntdb, new_off, h->h); + ntdb_access_release(ntdb, old); - /* Put values back. */ - for (i = 0; i < num_vals; i++) { - unsigned this_bucket = vals[i] & NTDB_OFF_HASH_GROUP_MASK; - - if (this_bucket == bucket) { - ecode = add_to_subhash(ntdb, subhash, h->hash_used, - vals[i]); - if (ecode != NTDB_SUCCESS) - return ecode; - } else { - /* There should be room to put this back. */ - force_into_group(h->group, this_bucket, vals[i]); - } + ecode = ntdb_access_commit(ntdb, new); + if (ecode != NTDB_SUCCESS) { + return ecode; } - return NTDB_SUCCESS; + + /* Free the old chain. */ + ecode = add_free_record(ntdb, h->table, + sizeof(struct ntdb_used_record) + + rec_data_length(&chdr) + + rec_extra_padding(&chdr), + NTDB_LOCK_WAIT, true); + + /* Replace top-level to point to new chain */ + return ntdb_write_off(ntdb, + hbucket_off(NTDB_HASH_OFFSET, + bits_from(h->h, 0, ntdb->hash_bits)), + chain | (1ULL << NTDB_OFF_CHAIN_BIT)); } -enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, struct hash_info *h) +/* Traverse support: returns offset of record, or 0 or -ve error. */ +static ntdb_off_t iterate_chain(struct ntdb_context *ntdb, + ntdb_off_t val, + struct hash_info *h) { - unsigned int i, num_movers = 0; - ntdb_off_t movers[1 << NTDB_HASH_GROUP_BITS]; + ntdb_off_t i; + enum NTDB_ERROR ecode; + struct ntdb_used_record chdr; - h->group[h->found_bucket] = 0; - for (i = 1; i < (1 << NTDB_HASH_GROUP_BITS); i++) { - unsigned this_bucket; + /* First load up chain header. */ + h->table = val & NTDB_OFF_MASK; + ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr)); + if (ecode != NTDB_SUCCESS) { + return ecode; + } - this_bucket = (h->found_bucket+i) % (1 << NTDB_HASH_GROUP_BITS); - /* Empty bucket? We're done. */ - if (!h->group[this_bucket]) - break; + if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) { + return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "get_table:" + " corrupt record %#x at %llu", + rec_magic(&chdr), + (long long)h->table); + } - /* Ignore subhashes. */ - if (is_subhash(h->group[this_bucket])) - continue; + /* Chain length is implied by data length. */ + h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t); - /* If this one is not happy where it is, we'll move it. */ - if ((h->group[this_bucket] & NTDB_OFF_HASH_GROUP_MASK) - != this_bucket) { - movers[num_movers++] = h->group[this_bucket]; - h->group[this_bucket] = 0; - } + i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket, + h->table_size); + if (NTDB_OFF_IS_ERR(i)) { + return i; } - /* Put back the ones we erased. */ - for (i = 0; i < num_movers; i++) { - force_into_group(h->group, movers[i] & NTDB_OFF_HASH_GROUP_MASK, - movers[i]); + if (i != h->table_size) { + /* Return to next bucket. */ + h->bucket = i + 1; + val = ntdb_read_off(ntdb, hbucket_off(h->table, i)); + if (NTDB_OFF_IS_ERR(val)) { + return val; + } + return val & NTDB_OFF_MASK; } - /* Now we write back the hash group */ - return ntdb_write_convert(ntdb, h->group_start, - h->group, sizeof(h->group)); + /* Go back up to hash table. */ + h->table = NTDB_HASH_OFFSET; + h->table_size = 1 << ntdb->hash_bits; + h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1; + return 0; } -enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, struct hash_info *h, - ntdb_off_t new_off) +/* Keeps hash locked unless returns 0 or error. */ +static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb, + struct hash_info *h) { + ntdb_off_t val, i; enum NTDB_ERROR ecode; - /* We hit an empty bucket during search? That's where it goes. */ - if (!h->group[h->found_bucket]) { - h->group[h->found_bucket] = encode_offset(new_off, h); - /* Write back the modified group. */ - return ntdb_write_convert(ntdb, h->group_start, - h->group, sizeof(h->group)); - } - - if (h->hash_used > 64) - return add_to_chain(ntdb, h->group_start, new_off); - - /* We're full. Expand. */ - ecode = expand_group(ntdb, h); - if (ecode != NTDB_SUCCESS) { - return ecode; - } + if (h->table != NTDB_HASH_OFFSET) { + /* We're in a chain. */ + i = bits_from(h->h, 0, ntdb->hash_bits); + ecode = ntdb_lock_hash(ntdb, i, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } - if (is_subhash(h->group[h->home_bucket])) { - /* We were expanded! */ - ntdb_off_t hashtable; - unsigned int gnum; + /* We dropped lock, bucket might have moved! */ + val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i)); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; + } - /* Write back the modified group. */ - ecode = ntdb_write_convert(ntdb, h->group_start, h->group, - sizeof(h->group)); - if (ecode != NTDB_SUCCESS) { - return ecode; + /* We don't remove chains: there should still be one there! */ + if (!val || !is_chain(val)) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "iterate_hash:" + " vanished hchain %llu at %llu", + (long long)val, + (long long)i); + val = NTDB_ERR_TO_OFF(ecode); + goto unlock; } - /* Move hashinfo down a level. */ - hashtable = (h->group[h->home_bucket] & NTDB_OFF_MASK) - + sizeof(struct ntdb_used_record); - gnum = use_bits(h,NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS); - h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS); - h->group_start = hashtable - + gnum * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS); - ecode = ntdb_read_convert(ntdb, h->group_start, &h->group, - sizeof(h->group)); - if (ecode != NTDB_SUCCESS) { - return ecode; + /* Find next bucket in the chain. */ + val = iterate_chain(ntdb, val, h); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; } - } + if (val != 0) { + return val; + } + ntdb_unlock_hash(ntdb, i, F_RDLCK); - /* Expanding the group must have made room if it didn't choose this - * bucket. */ - if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){ - return ntdb_write_convert(ntdb, h->group_start, - h->group, sizeof(h->group)); + /* OK, we've reset h back to top level. */ } - /* This can happen if all hashes in group (and us) dropped into same - * group in subhash. */ - return add_to_hash(ntdb, h, new_off); -} - -/* Traverse support: returns offset of record, or 0 or -ve error. */ -static ntdb_off_t iterate_hash(struct ntdb_context *ntdb, - struct traverse_info *tinfo) -{ - ntdb_off_t off, val, i; - struct traverse_level *tlevel; - - tlevel = &tinfo->levels[tinfo->num_levels-1]; - -again: - for (i = ntdb_find_nonzero_off(ntdb, tlevel->hashtable, - tlevel->entry, tlevel->total_buckets); - i != tlevel->total_buckets; - i = ntdb_find_nonzero_off(ntdb, tlevel->hashtable, - i+1, tlevel->total_buckets)) { - if (NTDB_OFF_IS_ERR(i)) { - return i; + /* We do this unlocked, then re-check. */ + for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), + h->bucket, h->table_size); + i != h->table_size; + i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), + i+1, h->table_size)) { + ecode = ntdb_lock_hash(ntdb, i, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); } - val = ntdb_read_off(ntdb, tlevel->hashtable+sizeof(ntdb_off_t)*i); + val = ntdb_read_off(ntdb, hbucket_off(h->table, i)); if (NTDB_OFF_IS_ERR(val)) { - return val; + goto unlock; } - off = val & NTDB_OFF_MASK; - - /* This makes the delete-all-in-traverse case work - * (and simplifies our logic a little). */ - if (off == tinfo->prev) + /* Lost race, and it's empty? */ + if (!val) { + ntdb->stats.traverse_val_vanished++; + ntdb_unlock_hash(ntdb, i, F_RDLCK); continue; + } - tlevel->entry = i; - - if (!is_subhash(val)) { - /* Found one. */ - tinfo->prev = off; - return off; + if (!is_chain(val)) { + /* So caller knows what lock to free. */ + h->h = i; + /* Return to next bucket. */ + h->bucket = i + 1; + val &= NTDB_OFF_MASK; + return val; } - /* When we come back, we want the next one */ - tlevel->entry++; - tinfo->num_levels++; - tlevel++; - tlevel->hashtable = off + sizeof(struct ntdb_used_record); - tlevel->entry = 0; - /* Next level is a chain? */ - if (unlikely(tinfo->num_levels == NTDB_MAX_LEVELS + 1)) - tlevel->total_buckets = (1 << NTDB_HASH_GROUP_BITS); - else - tlevel->total_buckets = (1 << NTDB_SUBLEVEL_HASH_BITS); - goto again; - } - - /* Nothing there? */ - if (tinfo->num_levels == 1) - return 0; + /* Start at beginning of chain */ + h->bucket = 0; + h->h = i; - /* Handle chained entries. */ - if (unlikely(tinfo->num_levels == NTDB_MAX_LEVELS + 1)) { - tlevel->hashtable = ntdb_read_off(ntdb, tlevel->hashtable - + offsetof(struct ntdb_chain, - next)); - if (NTDB_OFF_IS_ERR(tlevel->hashtable)) { - return tlevel->hashtable; + val = iterate_chain(ntdb, val, h); + if (NTDB_OFF_IS_ERR(val)) { + goto unlock; } - if (tlevel->hashtable) { - tlevel->hashtable += sizeof(struct ntdb_used_record); - tlevel->entry = 0; - goto again; + if (val != 0) { + return val; } + + /* Otherwise, bucket has been set to i+1 */ + ntdb_unlock_hash(ntdb, i, F_RDLCK); } + return 0; - /* Go back up and keep searching. */ - tinfo->num_levels--; - tlevel--; - goto again; +unlock: + ntdb_unlock_hash(ntdb, i, F_RDLCK); + return val; } /* Return success if we find something, NTDB_ERR_NOEXIST if none. */ enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb, - struct traverse_info *tinfo, - NTDB_DATA *kbuf, size_t *dlen) + struct hash_info *h, + NTDB_DATA *kbuf, size_t *dlen) { - const unsigned group_bits = NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS; - ntdb_off_t hl_start, hl_range, off; + ntdb_off_t off; + struct ntdb_used_record rec; enum NTDB_ERROR ecode; - while (tinfo->toplevel_group < (1 << group_bits)) { - hl_start = (ntdb_off_t)tinfo->toplevel_group - << (64 - group_bits); - hl_range = 1ULL << group_bits; - ecode = ntdb_lock_hashes(ntdb, hl_start, hl_range, F_RDLCK, - NTDB_LOCK_WAIT); - if (ecode != NTDB_SUCCESS) { - return ecode; - } - - off = iterate_hash(ntdb, tinfo); - if (off) { - struct ntdb_used_record rec; + off = lock_and_iterate_hash(ntdb, h); - if (NTDB_OFF_IS_ERR(off)) { - ecode = NTDB_OFF_TO_ERR(off); - goto fail; - } - - ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); - if (ecode != NTDB_SUCCESS) { - goto fail; - } - if (rec_magic(&rec) != NTDB_USED_MAGIC) { - ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, - NTDB_LOG_ERROR, - "next_in_hash:" - " corrupt record at %llu", - (long long)off); - goto fail; - } + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); + } else if (off == 0) { + return NTDB_ERR_NOEXIST; + } - kbuf->dsize = rec_key_length(&rec); - - /* They want data as well? */ - if (dlen) { - *dlen = rec_data_length(&rec); - kbuf->dptr = ntdb_alloc_read(ntdb, - off + sizeof(rec), - kbuf->dsize - + *dlen); - } else { - kbuf->dptr = ntdb_alloc_read(ntdb, - off + sizeof(rec), - kbuf->dsize); - } - ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK); - if (NTDB_PTR_IS_ERR(kbuf->dptr)) { - return NTDB_PTR_ERR(kbuf->dptr); - } - return NTDB_SUCCESS; - } + /* The hash for this key is still locked. */ + ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec)); + if (ecode != NTDB_SUCCESS) { + goto unlock; + } + if (rec_magic(&rec) != NTDB_USED_MAGIC) { + ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, + NTDB_LOG_ERROR, + "next_in_hash:" + " corrupt record at %llu", + (long long)off); + goto unlock; + } - ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK); + kbuf->dsize = rec_key_length(&rec); - tinfo->toplevel_group++; - tinfo->levels[0].hashtable - += (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS); - tinfo->levels[0].entry = 0; + /* They want data as well? */ + if (dlen) { + *dlen = rec_data_length(&rec); + kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec), + kbuf->dsize + *dlen); + } else { + kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec), + kbuf->dsize); } - return NTDB_ERR_NOEXIST; + if (NTDB_PTR_IS_ERR(kbuf->dptr)) { + ecode = NTDB_PTR_ERR(kbuf->dptr); + goto unlock; + } + ecode = NTDB_SUCCESS; -fail: - ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK); +unlock: + ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK); return ecode; } enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb, - struct traverse_info *tinfo, + struct hash_info *h, NTDB_DATA *kbuf, size_t *dlen) { - tinfo->prev = 0; - tinfo->toplevel_group = 0; - tinfo->num_levels = 1; - tinfo->levels[0].hashtable = offsetof(struct ntdb_header, hashtable); - tinfo->levels[0].entry = 0; - tinfo->levels[0].total_buckets = (1 << NTDB_HASH_GROUP_BITS); - - return next_in_hash(ntdb, tinfo, kbuf, dlen); + h->table = NTDB_HASH_OFFSET; + h->table_size = 1 << ntdb->hash_bits; + h->bucket = 0; + + return next_in_hash(ntdb, h, kbuf, dlen); } /* Even if the entry isn't in this hash bucket, you'd have to lock this * bucket to find it. */ -static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb, const NTDB_DATA *key, - int ltype, enum ntdb_lock_flags waitflag, - const char *func) +static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb, + const NTDB_DATA *key, int ltype) { - enum NTDB_ERROR ecode; - uint64_t h = ntdb_hash(ntdb, key->dptr, key->dsize); - ntdb_off_t lockstart, locksize; - unsigned int group, gbits; - - gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS; - group = bits_from(h, 64 - gbits, gbits); + uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize); - lockstart = hlock_range(group, &locksize); - - ecode = ntdb_lock_hashes(ntdb, lockstart, locksize, ltype, waitflag); - ntdb_trace_1rec(ntdb, func, *key); - return ecode; + return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype); } /* lock/unlock one hash chain. This is meant to be used to reduce contention - it cannot guarantee how many records will be locked */ _PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key) { - return chainlock(ntdb, &key, F_WRLCK, NTDB_LOCK_WAIT, "ntdb_chainlock"); + return chainlock(ntdb, &key, F_WRLCK); } _PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key) { - uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - ntdb_off_t lockstart, locksize; - unsigned int group, gbits; - - gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS; - group = bits_from(h, 64 - gbits, gbits); + uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - lockstart = hlock_range(group, &locksize); - - ntdb_trace_1rec(ntdb, "ntdb_chainunlock", key); - ntdb_unlock_hashes(ntdb, lockstart, locksize, F_WRLCK); + ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK); } -_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key) +_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, + NTDB_DATA key) { - return chainlock(ntdb, &key, F_RDLCK, NTDB_LOCK_WAIT, - "ntdb_chainlock_read"); + return chainlock(ntdb, &key, F_RDLCK); } _PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key) { - uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - ntdb_off_t lockstart, locksize; - unsigned int group, gbits; - - gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS; - group = bits_from(h, 64 - gbits, gbits); - - lockstart = hlock_range(group, &locksize); + uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - ntdb_trace_1rec(ntdb, "ntdb_chainunlock_read", key); - ntdb_unlock_hashes(ntdb, lockstart, locksize, F_RDLCK); + ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK); } diff --git a/lib/ntdb/io.c b/lib/ntdb/io.c index b0588132e0..11d5b1f367 100644 --- a/lib/ntdb/io.c +++ b/lib/ntdb/io.c @@ -26,7 +26,6 @@ License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "private.h" -#include <assert.h> #include <ccan/likely/likely.h> void ntdb_munmap(struct ntdb_file *file) diff --git a/lib/ntdb/lock.c b/lib/ntdb/lock.c index 95824db742..f6a811a3fa 100644 --- a/lib/ntdb/lock.c +++ b/lib/ntdb/lock.c @@ -26,7 +26,6 @@ */ #include "private.h" -#include <assert.h> #include <ccan/build_assert/build_assert.h> /* If we were threaded, we could wait for unlock, but we're not, so fail. */ @@ -346,12 +345,8 @@ static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb, struct ntdb_lock *new_lck; enum NTDB_ERROR ecode; - if (offset > (NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE - + ntdb->file->map_size / 8)) { - return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR, - "ntdb_nest_lock: invalid offset %zu ltype=%d", - (size_t)offset, ltype); - } + assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits) + + ntdb->file->map_size / 8)); if (ntdb->flags & NTDB_NOLOCK) return NTDB_SUCCESS; @@ -581,17 +576,17 @@ enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype, again: /* Lock hashes, gradually. */ ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START, - NTDB_HASH_LOCK_RANGE); + 1 << ntdb->hash_bits); if (ecode != NTDB_SUCCESS) return ecode; /* Lock free tables: there to end of file. */ ecode = ntdb_brlock(ntdb, ltype, - NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE, - 0, flags); + NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits), + 0, flags); if (ecode != NTDB_SUCCESS) { ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, - NTDB_HASH_LOCK_RANGE); + 1 << ntdb->hash_bits); return ecode; } @@ -700,7 +695,7 @@ bool ntdb_has_hash_locks(struct ntdb_context *ntdb) for (i=0; i<ntdb->file->num_lockrecs; i++) { if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START - + NTDB_HASH_LOCK_RANGE)) + + (1 << ntdb->hash_bits))) return true; } return false; @@ -715,20 +710,19 @@ static bool ntdb_has_free_lock(struct ntdb_context *ntdb) for (i=0; i<ntdb->file->num_lockrecs; i++) { if (ntdb->file->lockrecs[i].off - > NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) + > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)) return true; } return false; } -enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb, - ntdb_off_t hash_lock, - ntdb_len_t hash_range, - int ltype, enum ntdb_lock_flags waitflag) +enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb, + unsigned int h, + int ltype) { - /* FIXME: Do this properly, using hlock_range */ - unsigned l = NTDB_HASH_LOCK_START - + (hash_lock >> (64 - NTDB_HASH_LOCK_RANGE_BITS)); + unsigned l = NTDB_HASH_LOCK_START + h; + + assert(h < (1 << ntdb->hash_bits)); /* a allrecord lock allows us to avoid per chain locks */ if (ntdb->file->allrecord_lock.count) { @@ -760,15 +754,13 @@ enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb, " already have expansion lock"); } - return ntdb_nest_lock(ntdb, l, ltype, waitflag); + return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT); } -enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb, - ntdb_off_t hash_lock, - ntdb_len_t hash_range, int ltype) +enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb, + unsigned int h, int ltype) { - unsigned l = NTDB_HASH_LOCK_START - + (hash_lock >> (64 - NTDB_HASH_LOCK_RANGE_BITS)); + unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1)); if (ntdb->flags & NTDB_NOLOCK) return 0; @@ -791,14 +783,15 @@ enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb, return ntdb_nest_unlock(ntdb, l, ltype); } -/* Hash locks use NTDB_HASH_LOCK_START + the next 30 bits. +/* Hash locks use NTDB_HASH_LOCK_START + <number of hash entries>.. * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide. * The result is that on 32 bit systems we don't use lock values > 2^31 on * files that are less than 4GB. */ -static ntdb_off_t free_lock_off(ntdb_off_t b_off) +static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb, + ntdb_off_t b_off) { - return NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE + return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits) + b_off / sizeof(ntdb_off_t); } @@ -834,7 +827,8 @@ enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_of } #endif - return ntdb_nest_lock(ntdb, free_lock_off(b_off), F_WRLCK, waitflag); + return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK, + waitflag); } void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off) @@ -842,7 +836,7 @@ void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off) if (ntdb->file->allrecord_lock.count) return; - ntdb_nest_unlock(ntdb, free_lock_off(b_off), F_WRLCK); + ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK); } _PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb) diff --git a/lib/ntdb/ntdb.c b/lib/ntdb/ntdb.c index a74e0f4b78..8d50ba60c1 100644 --- a/lib/ntdb/ntdb.c +++ b/lib/ntdb/ntdb.c @@ -25,14 +25,13 @@ static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t keylen, ntdb_len_t datalen, - struct ntdb_used_record *rec, - uint64_t h) + struct ntdb_used_record *rec) { uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); enum NTDB_ERROR ecode; ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen, - keylen + dataroom, h); + keylen + dataroom); if (ecode == NTDB_SUCCESS) { ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec)); } @@ -49,8 +48,7 @@ static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb, enum NTDB_ERROR ecode; /* Allocate a new record. */ - new_off = alloc(ntdb, key.dsize, dbuf.dsize, h->h, NTDB_USED_MAGIC, - growing); + new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing); if (NTDB_OFF_IS_ERR(new_off)) { return NTDB_OFF_TO_ERR(new_off); } @@ -116,7 +114,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, struct ntdb_used_record rec; enum NTDB_ERROR ecode; - off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec); if (NTDB_OFF_IS_ERR(off)) { return NTDB_OFF_TO_ERR(off); } @@ -135,7 +133,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, /* Can modify in-place. Easy! */ ecode = update_rec_hdr(ntdb, off, key.dsize, dbuf.dsize, - &rec, h.h); + &rec); if (ecode != NTDB_SUCCESS) { goto out; } @@ -146,8 +144,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, if (ecode != NTDB_SUCCESS) { goto out; } - ntdb_unlock_hashes(ntdb, h.hlock_start, - h.hlock_range, F_WRLCK); + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); return NTDB_SUCCESS; } } else { @@ -164,7 +161,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb, /* If we didn't use the old record, this implies we're growing. */ ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off); out: - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK); + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); return ecode; } @@ -179,7 +176,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, NTDB_DATA new_dbuf; enum NTDB_ERROR ecode; - off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec); if (NTDB_OFF_IS_ERR(off)) { return NTDB_OFF_TO_ERR(off); } @@ -191,8 +188,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, /* Fast path: can append in place. */ if (rec_extra_padding(&rec) >= dbuf.dsize) { ecode = update_rec_hdr(ntdb, off, key.dsize, - old_dlen + dbuf.dsize, &rec, - h.h); + old_dlen + dbuf.dsize, &rec); if (ecode != NTDB_SUCCESS) { goto out; } @@ -233,7 +229,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb, out_free_newdata: ntdb->free_fn(newdata, ntdb->alloc_data); out: - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK); + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); return ecode; } @@ -245,7 +241,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key, struct hash_info h; enum NTDB_ERROR ecode; - off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec); if (NTDB_OFF_IS_ERR(off)) { return NTDB_OFF_TO_ERR(off); } @@ -262,7 +258,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key, ecode = NTDB_SUCCESS; } - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); return ecode; } @@ -272,11 +268,11 @@ _PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key) struct ntdb_used_record rec; struct hash_info h; - off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec); if (NTDB_OFF_IS_ERR(off)) { return false; } - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); return off ? true : false; } @@ -288,7 +284,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key) struct hash_info h; enum NTDB_ERROR ecode; - off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec); if (NTDB_OFF_IS_ERR(off)) { return NTDB_OFF_TO_ERR(off); } @@ -316,7 +312,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key) ntdb_inc_seqnum(ntdb); unlock: - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK); + ntdb_unlock_hash(ntdb, h.h, F_WRLCK); return ecode; } @@ -486,7 +482,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb, struct hash_info h; enum NTDB_ERROR ecode; - off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec); if (NTDB_OFF_IS_ERR(off)) { return NTDB_OFF_TO_ERR(off); } @@ -507,7 +503,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb, } } - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); return ecode; } diff --git a/lib/ntdb/ntdb.h b/lib/ntdb/ntdb.h index 87f3f5bbfa..09622787d4 100644 --- a/lib/ntdb/ntdb.h +++ b/lib/ntdb/ntdb.h @@ -765,7 +765,7 @@ struct ntdb_attribute_log { */ struct ntdb_attribute_hash { struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */ - uint64_t (*fn)(const void *key, size_t len, uint64_t seed, + uint32_t (*fn)(const void *key, size_t len, uint32_t seed, void *data); void *data; }; @@ -809,7 +809,6 @@ struct ntdb_attribute_stats { uint64_t alloc_coalesce_succeeded; uint64_t alloc_coalesce_num_merged; uint64_t compares; - uint64_t compare_wrong_bucket; uint64_t compare_wrong_offsetbits; uint64_t compare_wrong_keylen; uint64_t compare_wrong_rechash; @@ -822,6 +821,8 @@ struct ntdb_attribute_stats { uint64_t transaction_read_direct_fail; uint64_t transaction_write_direct; uint64_t transaction_write_direct_fail; + uint64_t traverses; + uint64_t traverse_val_vanished; uint64_t expands; uint64_t frees; uint64_t locks; diff --git a/lib/ntdb/open.c b/lib/ntdb/open.c index 6aac46ab2f..4b8c45298b 100644 --- a/lib/ntdb/open.c +++ b/lib/ntdb/open.c @@ -17,7 +17,6 @@ */ #include "private.h" #include <ccan/build_assert/build_assert.h> -#include <assert.h> /* all tdbs, to detect double-opens (fcntl file don't nest!) */ static struct ntdb_context *tdbs = NULL; @@ -53,10 +52,10 @@ static bool read_all(int fd, void *buf, size_t len) return true; } -static uint64_t random_number(struct ntdb_context *ntdb) +static uint32_t random_number(struct ntdb_context *ntdb) { int fd; - uint64_t ret = 0; + uint32_t ret = 0; struct timeval now; fd = open("/dev/urandom", O_RDONLY); @@ -71,14 +70,14 @@ static uint64_t random_number(struct ntdb_context *ntdb) fd = open("/dev/egd-pool", O_RDWR); if (fd >= 0) { /* Command is 1, next byte is size we want to read. */ - char cmd[2] = { 1, sizeof(uint64_t) }; + char cmd[2] = { 1, sizeof(uint32_t) }; if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) { - char reply[1 + sizeof(uint64_t)]; + char reply[1 + sizeof(uint32_t)]; int r = read(fd, reply, sizeof(reply)); if (r > 1) { /* Copy at least some bytes. */ memcpy(&ret, reply+1, r - 1); - if (reply[0] == sizeof(uint64_t) + if (reply[0] == sizeof(uint32_t) && r == sizeof(reply)) { close(fd); return ret; @@ -105,92 +104,119 @@ static void ntdb_context_init(struct ntdb_context *ntdb) ntdb->access = NULL; } -struct new_database { - struct ntdb_header hdr; - struct ntdb_freetable ftable; - struct ntdb_free_record remainder; -}; +/* initialise a new database: + * + * struct ntdb_header; + * struct { + * struct ntdb_used_record hash_header; + * ntdb_off_t hash_buckets[1 << ntdb->hash_bits]; + * } hash; + * struct ntdb_freetable ftable; + * struct { + * struct ntdb_free_record free_header; + * char forty_three[...]; + * } remainder; + */ +#define NEW_DATABASE_HDR_SIZE(hbits) \ + (sizeof(struct ntdb_header) \ + + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \ + + sizeof(struct ntdb_freetable) \ + + sizeof(struct ntdb_free_record)) -/* initialise a new database */ static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb, - struct ntdb_attribute_seed *seed, - struct ntdb_header *hdr) + struct ntdb_attribute_seed *seed, + struct ntdb_header *rhdr) { /* We make it up in memory, then write it out if not internal */ - struct new_database *newdb; + struct ntdb_freetable *ftable; + struct ntdb_used_record *htable; + struct ntdb_header *hdr; + struct ntdb_free_record *remainder; + char *mem; unsigned int magic_len; ssize_t rlen; - size_t dbsize, remaindersize; + size_t dbsize, hashsize, hdrsize, remaindersize; enum NTDB_ERROR ecode; + hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits; + /* Always make db a multiple of NTDB_PGSIZE */ - dbsize = (sizeof(*newdb) + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1); - remaindersize = dbsize - sizeof(*newdb); - newdb = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data); - if (!newdb) { + hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits); + dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1); + + mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data); + if (!mem) { return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, "ntdb_new_database: failed to allocate"); } + hdr = (void *)mem; + htable = (void *)(mem + sizeof(*hdr)); + ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize); + remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize + + sizeof(*ftable)); + /* Fill in the header */ - newdb->hdr.version = NTDB_VERSION; + hdr->version = NTDB_VERSION; if (seed) - newdb->hdr.hash_seed = seed->seed; + hdr->hash_seed = seed->seed; else - newdb->hdr.hash_seed = random_number(ntdb); - newdb->hdr.hash_test = NTDB_HASH_MAGIC; - newdb->hdr.hash_test = ntdb->hash_fn(&newdb->hdr.hash_test, - sizeof(newdb->hdr.hash_test), - newdb->hdr.hash_seed, - ntdb->hash_data); - newdb->hdr.recovery = 0; - newdb->hdr.features_used = newdb->hdr.features_offered = NTDB_FEATURE_MASK; - newdb->hdr.seqnum = 0; - newdb->hdr.capabilities = 0; - memset(newdb->hdr.reserved, 0, sizeof(newdb->hdr.reserved)); - /* Initial hashes are empty. */ - memset(newdb->hdr.hashtable, 0, sizeof(newdb->hdr.hashtable)); + hdr->hash_seed = random_number(ntdb); + hdr->hash_test = NTDB_HASH_MAGIC; + hdr->hash_test = ntdb->hash_fn(&hdr->hash_test, + sizeof(hdr->hash_test), + hdr->hash_seed, + ntdb->hash_data); + hdr->hash_bits = ntdb->hash_bits; + hdr->recovery = 0; + hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK; + hdr->seqnum = 0; + hdr->capabilities = 0; + memset(hdr->reserved, 0, sizeof(hdr->reserved)); + + /* Hash is all zero after header. */ + set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize); + memset(htable + 1, 0, hashsize); /* Free is empty. */ - newdb->hdr.free_table = offsetof(struct new_database, ftable); - memset(&newdb->ftable, 0, sizeof(newdb->ftable)); - ecode = set_header(NULL, &newdb->ftable.hdr, NTDB_FTABLE_MAGIC, 0, - sizeof(newdb->ftable) - sizeof(newdb->ftable.hdr), - sizeof(newdb->ftable) - sizeof(newdb->ftable.hdr), - 0); + hdr->free_table = (char *)ftable - (char *)hdr; + memset(ftable, 0, sizeof(*ftable)); + ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0, + sizeof(*ftable) - sizeof(ftable->hdr), + sizeof(*ftable) - sizeof(ftable->hdr)); if (ecode != NTDB_SUCCESS) { goto out; } /* Rest of database is a free record, containing junk. */ - newdb->remainder.ftable_and_len - = (remaindersize + sizeof(newdb->remainder) + remaindersize = dbsize - hdrsize; + remainder->ftable_and_len + = (remaindersize + sizeof(*remainder) - sizeof(struct ntdb_used_record)); - newdb->remainder.next = 0; - newdb->remainder.magic_and_prev + remainder->next = 0; + remainder->magic_and_prev = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL)) - | offsetof(struct new_database, remainder); - memset(&newdb->remainder + 1, 0x43, remaindersize); + | ((char *)remainder - (char *)hdr); + memset(remainder + 1, 0x43, remaindersize); /* Put in our single free entry. */ - newdb->ftable.buckets[size_to_bucket(remaindersize)] = - offsetof(struct new_database, remainder); + ftable->buckets[size_to_bucket(remaindersize)] = + (char *)remainder - (char *)hdr; /* Magic food */ - memset(newdb->hdr.magic_food, 0, sizeof(newdb->hdr.magic_food)); - strcpy(newdb->hdr.magic_food, NTDB_MAGIC_FOOD); + memset(hdr->magic_food, 0, sizeof(hdr->magic_food)); + strcpy(hdr->magic_food, NTDB_MAGIC_FOOD); /* This creates an endian-converted database, as if read from disk */ - magic_len = sizeof(newdb->hdr.magic_food); - ntdb_convert(ntdb, - (char *)&newdb->hdr + magic_len, - sizeof(*newdb) - magic_len); + magic_len = sizeof(hdr->magic_food); + ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len); - *hdr = newdb->hdr; + /* Return copy of header. */ + *rhdr = *hdr; if (ntdb->flags & NTDB_INTERNAL) { ntdb->file->map_size = dbsize; - ntdb->file->map_ptr = newdb; + ntdb->file->map_ptr = hdr; return NTDB_SUCCESS; } if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) { @@ -207,7 +233,7 @@ static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb, goto out; } - rlen = write(ntdb->file->fd, newdb, dbsize); + rlen = write(ntdb->file->fd, hdr, dbsize); if (rlen != dbsize) { if (rlen >= 0) errno = ENOSPC; @@ -218,7 +244,7 @@ static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb, } out: - ntdb->free_fn(newdb, ntdb->alloc_data); + ntdb->free_fn(hdr, ntdb->alloc_data); return ecode; } @@ -487,6 +513,7 @@ _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags, ntdb->alloc_fn = default_alloc; ntdb->expand_fn = default_expand; ntdb->free_fn = default_free; + ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */ while (attr) { switch (attr->base.attr) { @@ -687,6 +714,7 @@ _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags, ntdb_context_init(ntdb); ntdb_convert(ntdb, &hdr, sizeof(hdr)); + ntdb->hash_bits = hdr.hash_bits; ntdb->hash_seed = hdr.hash_seed; hash_test = NTDB_HASH_MAGIC; hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test)); diff --git a/lib/ntdb/private.h b/lib/ntdb/private.h index 488e99a0f9..957d85e494 100644 --- a/lib/ntdb/private.h +++ b/lib/ntdb/private.h @@ -48,6 +48,7 @@ #include <utime.h> #include <unistd.h> #endif +#include <assert.h> #ifndef TEST_IT #define TEST_IT(cond) @@ -114,30 +115,17 @@ typedef int ntdb_bool_err; /* Hash chain locks. */ #define NTDB_HASH_LOCK_START 64 -/* Range for hash locks. */ -#define NTDB_HASH_LOCK_RANGE_BITS 30 -#define NTDB_HASH_LOCK_RANGE (1 << NTDB_HASH_LOCK_RANGE_BITS) - -/* We have 1024 entries in the top level. */ -#define NTDB_TOPLEVEL_HASH_BITS 10 -/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */ -#define NTDB_SUBLEVEL_HASH_BITS 6 -/* And 8 entries in each group, ie 8 groups per sublevel. */ -#define NTDB_HASH_GROUP_BITS 3 -/* This is currently 10: beyond this we chain. */ -#define NTDB_MAX_LEVELS (1+(64-NTDB_TOPLEVEL_HASH_BITS) / NTDB_SUBLEVEL_HASH_BITS) - /* Extend file by least 100 times larger than needed. */ #define NTDB_EXTENSION_FACTOR 100 -/* We steal bits from the offsets to store hash info. */ -#define NTDB_OFF_HASH_GROUP_MASK ((1ULL << NTDB_HASH_GROUP_BITS) - 1) /* We steal this many upper bits, giving a maximum offset of 64 exabytes. */ #define NTDB_OFF_UPPER_STEAL 8 -#define NTDB_OFF_UPPER_STEAL_EXTRA 7 -/* The bit number where we store extra hash bits. */ -#define NTDB_OFF_HASH_EXTRA_BIT 57 -#define NTDB_OFF_UPPER_STEAL_SUBHASH_BIT 56 + +/* And we use the lower bit, too. */ +#define NTDB_OFF_CHAIN_BIT 0 + +/* Hash table sits just after the header. */ +#define NTDB_HASH_OFFSET (sizeof(struct ntdb_header)) /* Additional features we understand. Currently: none. */ #define NTDB_FEATURE_MASK ((uint64_t)0) @@ -145,7 +133,7 @@ typedef int ntdb_bool_err; /* The bit number where we store the extra hash bits. */ /* Convenience mask to get actual offset. */ #define NTDB_OFF_MASK \ - (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - NTDB_OFF_HASH_GROUP_MASK) + (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - (1<<NTDB_OFF_CHAIN_BIT)) /* How many buckets in a free list: see size_to_bucket(). */ #define NTDB_FREE_BUCKETS (64 - NTDB_OFF_UPPER_STEAL) @@ -157,12 +145,14 @@ typedef int ntdb_bool_err; /* Indicates this entry is not on an flist (can happen during coalescing) */ #define NTDB_FTABLE_NONE ((1ULL << NTDB_OFF_UPPER_STEAL) - 1) +/* By default, hash is 64k bytes */ +#define NTDB_DEFAULT_HBITS 13 + struct ntdb_used_record { /* For on-disk compatibility, we avoid bitfields: magic: 16, (highest) key_len_bits: 5, extra_padding: 32 - hash_bits: 11 */ uint64_t magic_and_meta; /* The bottom key_len_bits*2 are key length, rest is data length. */ @@ -189,11 +179,6 @@ static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r) return (r->magic_and_meta >> 11) & 0xFFFFFFFF; } -static inline uint32_t rec_hash(const struct ntdb_used_record *r) -{ - return r->magic_and_meta & ((1 << 11) - 1); -} - static inline uint16_t rec_magic(const struct ntdb_used_record *r) { return (r->magic_and_meta >> 48); @@ -236,17 +221,12 @@ struct ntdb_recovery_record { uint64_t eof; }; -/* If we bottom out of the subhashes, we chain. */ -struct ntdb_chain { - ntdb_off_t rec[1 << NTDB_HASH_GROUP_BITS]; - ntdb_off_t next; -}; - /* this is stored at the front of every database */ struct ntdb_header { char magic_food[64]; /* for /etc/magic */ /* FIXME: Make me 32 bit? */ uint64_t version; /* version of the code */ + uint64_t hash_bits; /* bits for toplevel hash table. */ uint64_t hash_test; /* result of hashing HASH_MAGIC. */ uint64_t hash_seed; /* "random" seed written at creation time. */ ntdb_off_t free_table; /* (First) free table. */ @@ -260,8 +240,12 @@ struct ntdb_header { ntdb_off_t capabilities; /* Optional linked list of capabilities. */ ntdb_off_t reserved[22]; - /* Top level hash table. */ - ntdb_off_t hashtable[1ULL << NTDB_TOPLEVEL_HASH_BITS]; + /* + * Hash table is next: + * + * struct ntdb_used_record htable_hdr; + * ntdb_off_t htable[1 << hash_bits]; + */ }; struct ntdb_freetable { @@ -280,33 +264,15 @@ struct ntdb_capability { /* Information about a particular (locked) hash entry. */ struct hash_info { /* Full hash value of entry. */ - uint64_t h; - /* Start and length of lock acquired. */ - ntdb_off_t hlock_start; - ntdb_len_t hlock_range; - /* Start of hash group. */ - ntdb_off_t group_start; - /* Bucket we belong in. */ - unsigned int home_bucket; + uint32_t h; + /* Start of hash table / chain. */ + ntdb_off_t table; + /* Number of entries in this table/chain. */ + ntdb_off_t table_size; /* Bucket we (or an empty space) were found in. */ - unsigned int found_bucket; - /* How many bits of the hash are already used. */ - unsigned int hash_used; - /* Current working group. */ - ntdb_off_t group[1 << NTDB_HASH_GROUP_BITS]; -}; - -struct traverse_info { - struct traverse_level { - ntdb_off_t hashtable; - /* We ignore groups here, and treat it as a big array. */ - unsigned entry; - unsigned int total_buckets; - } levels[NTDB_MAX_LEVELS + 1]; - unsigned int num_levels; - unsigned int toplevel_group; - /* This makes delete-everything-inside-traverse work as expected. */ - ntdb_off_t prev; + ntdb_off_t bucket; + /* Old value that was in that entry (if not found) */ + ntdb_off_t old_val; }; enum ntdb_lock_flags { @@ -375,40 +341,46 @@ struct ntdb_methods { /* internal prototypes */ +/* Get bits from a value. */ +static inline uint32_t bits_from(uint64_t val, unsigned start, unsigned num) +{ + assert(num <= 32); + return (val >> start) & ((1U << num) - 1); +} + + /* hash.c: */ -uint64_t ntdb_jenkins_hash(const void *key, size_t length, uint64_t seed, +uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed, void *unused); enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb, - struct traverse_info *tinfo, + struct hash_info *h, NTDB_DATA *kbuf, size_t *dlen); enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb, - struct traverse_info *tinfo, + struct hash_info *h, NTDB_DATA *kbuf, size_t *dlen); /* Hash random memory. */ -uint64_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len); - -/* Hash on disk. */ -uint64_t hash_record(struct ntdb_context *ntdb, ntdb_off_t off); +uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len); /* Find and lock a hash entry (or where it would be). */ ntdb_off_t find_and_lock(struct ntdb_context *ntdb, NTDB_DATA key, int ltype, struct hash_info *h, - struct ntdb_used_record *rec, - struct traverse_info *tinfo); + struct ntdb_used_record *rec); enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb, - struct hash_info *h, + const struct hash_info *h, ntdb_off_t new_off); -enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, struct hash_info *h, +enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, + const struct hash_info *h, ntdb_off_t new_off); -enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, struct hash_info *h); +enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, + const struct hash_info *h); /* For ntdb_check */ bool is_subhash(ntdb_off_t val); @@ -424,7 +396,7 @@ ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable); /* This returns space or -ve error number. */ ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen, - uint64_t hash, unsigned magic, bool growing); + unsigned magic, bool growing); /* Put this record in a free list. */ enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb, @@ -436,7 +408,7 @@ enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb, enum NTDB_ERROR set_header(struct ntdb_context *ntdb, struct ntdb_used_record *rec, unsigned magic, uint64_t keylen, uint64_t datalen, - uint64_t actuallen, unsigned hashlow); + uint64_t actuallen); /* Used by ntdb_check to verify. */ unsigned int size_to_bucket(ntdb_len_t data_len); @@ -504,13 +476,12 @@ enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call); /* If we fork, we no longer really own locks. */ bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log); -/* Lock/unlock a range of hashes. */ -enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb, - ntdb_off_t hash_lock, ntdb_len_t hash_range, - int ltype, enum ntdb_lock_flags waitflag); -enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb, - ntdb_off_t hash_lock, - ntdb_len_t hash_range, int ltype); +/* Lock/unlock a hash bucket. */ +enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb, + unsigned int hbucket, + int ltype); +enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb, + unsigned int hash, int ltype); /* For closing the file. */ void ntdb_lock_cleanup(struct ntdb_context *ntdb); @@ -588,9 +559,11 @@ struct ntdb_context { struct ntdb_file *file; /* Hash function. */ - uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *); + uint32_t (*hash_fn)(const void *key, size_t len, uint32_t seed, void *); void *hash_data; - uint64_t hash_seed; + uint32_t hash_seed; + /* Bits in toplevel hash table. */ + unsigned int hash_bits; /* Allocate and free functions. */ void *(*alloc_fn)(const void *owner, size_t len, void *priv_data); diff --git a/lib/ntdb/summary.c b/lib/ntdb/summary.c index f5313bec55..5a75dc5b11 100644 --- a/lib/ntdb/summary.c +++ b/lib/ntdb/summary.c @@ -16,7 +16,6 @@ License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "private.h" -#include <assert.h> #include <ccan/tally/tally.h> #define SUMMARY_FORMAT \ @@ -30,9 +29,8 @@ "Number of uncoalesced records: %zu\n" \ "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ "Toplevel hash used: %u of %u\n" \ - "Number of chains: %zu\n" \ - "Number of subhashes: %zu\n" \ - "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \ + "Number of hashes: %zu\n" \ + "Smallest/average/largest hash chains: %zu/%zu/%zu\n%s" \ "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" #define BUCKET_SUMMARY_FORMAT_A \ @@ -48,17 +46,17 @@ #define HISTO_HEIGHT 20 static ntdb_off_t count_hash(struct ntdb_context *ntdb, - ntdb_off_t hash_off, unsigned bits) + ntdb_off_t hash_off, + ntdb_off_t num) { const ntdb_off_t *h; - ntdb_off_t count = 0; - unsigned int i; + ntdb_off_t i, count = 0; - h = ntdb_access_read(ntdb, hash_off, sizeof(*h) << bits, true); + h = ntdb_access_read(ntdb, hash_off, sizeof(*h) * num, true); if (NTDB_PTR_IS_ERR(h)) { return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h)); } - for (i = 0; i < (1 << bits); i++) + for (i = 0; i < num; i++) count += (h[i] != 0); ntdb_access_release(ntdb, h); @@ -66,14 +64,13 @@ static ntdb_off_t count_hash(struct ntdb_context *ntdb, } static enum NTDB_ERROR summarize(struct ntdb_context *ntdb, - struct tally *hashes, struct tally *ftables, struct tally *fr, struct tally *keys, struct tally *data, struct tally *extra, struct tally *uncoal, - struct tally *chains, + struct tally *hashes, size_t *num_caps) { ntdb_off_t off; @@ -119,8 +116,8 @@ static enum NTDB_ERROR summarize(struct ntdb_context *ntdb, tally_add(extra, rec_extra_padding(&p->u)); } else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) { ntdb_off_t count = count_hash(ntdb, - off + sizeof(p->u), - NTDB_SUBLEVEL_HASH_BITS); + off + sizeof(p->u), + 1 << ntdb->hash_bits); if (NTDB_OFF_IS_ERR(count)) { return NTDB_OFF_TO_ERR(count); } @@ -139,7 +136,8 @@ static enum NTDB_ERROR summarize(struct ntdb_context *ntdb, len = sizeof(p->u) + rec_data_length(&p->u) + rec_extra_padding(&p->u); - tally_add(chains, 1); + tally_add(hashes, + rec_data_length(&p->u)/sizeof(ntdb_off_t)); tally_add(extra, rec_extra_padding(&p->u)); } else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) { len = sizeof(p->u) @@ -200,12 +198,11 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, { ntdb_len_t len; size_t num_caps = 0; - struct tally *ftables, *hashes, *freet, *keys, *data, *extra, *uncoal, - *chains; - char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg; + struct tally *ftables, *freet, *keys, *data, *extra, *uncoal, *hashes; + char *freeg, *keysg, *datag, *extrag, *uncoalg, *hashesg; enum NTDB_ERROR ecode; - hashesg = freeg = keysg = datag = extrag = uncoalg = NULL; + freeg = keysg = datag = extrag = uncoalg = hashesg = NULL; ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false); if (ecode != NTDB_SUCCESS) { @@ -220,44 +217,43 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, /* Start stats off empty. */ ftables = tally_new(HISTO_HEIGHT); - hashes = tally_new(HISTO_HEIGHT); freet = tally_new(HISTO_HEIGHT); keys = tally_new(HISTO_HEIGHT); data = tally_new(HISTO_HEIGHT); extra = tally_new(HISTO_HEIGHT); uncoal = tally_new(HISTO_HEIGHT); - chains = tally_new(HISTO_HEIGHT); - if (!ftables || !hashes || !freet || !keys || !data || !extra - || !uncoal || !chains) { + hashes = tally_new(HISTO_HEIGHT); + if (!ftables || !freet || !keys || !data || !extra + || !uncoal || !hashes) { ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, "ntdb_summary: failed to allocate" " tally structures"); goto unlock; } - ecode = summarize(ntdb, hashes, ftables, freet, keys, data, extra, - uncoal, chains, &num_caps); + ecode = summarize(ntdb, ftables, freet, keys, data, extra, + uncoal, hashes, &num_caps); if (ecode != NTDB_SUCCESS) { goto unlock; } if (flags & NTDB_SUMMARY_HISTOGRAMS) { - hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT); keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT); datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT); extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT); uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT); + hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); } /* 20 is max length of a %llu. */ len = strlen(SUMMARY_FORMAT) + 33*20 + 1 - + (hashesg ? strlen(hashesg) : 0) + (freeg ? strlen(freeg) : 0) + (keysg ? strlen(keysg) : 0) + (datag ? strlen(datag) : 0) + (extrag ? strlen(extrag) : 0) + (uncoalg ? strlen(uncoalg) : 0) + + (hashesg ? strlen(hashesg) : 0) + num_caps * (strlen(CAPABILITY_FORMAT) + 20 + strlen(" (uncheckable,read-only)")); @@ -284,11 +280,9 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, tally_total(uncoal, NULL), tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal), uncoalg ? uncoalg : "", - (unsigned)count_hash(ntdb, offsetof(struct ntdb_header, - hashtable), - NTDB_TOPLEVEL_HASH_BITS), - 1 << NTDB_TOPLEVEL_HASH_BITS, - tally_num(chains), + (unsigned)count_hash(ntdb, sizeof(struct ntdb_header), + 1 << ntdb->hash_bits), + 1 << ntdb->hash_bits, tally_num(hashes), tally_min(hashes), tally_mean(hashes), tally_max(hashes), hashesg ? hashesg : "", @@ -300,29 +294,26 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb, * sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size, tally_num(ftables) * sizeof(struct ntdb_freetable) * 100.0 / ntdb->file->map_size, - (tally_num(hashes) - * (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS) - + (sizeof(ntdb_off_t) << NTDB_TOPLEVEL_HASH_BITS) - + sizeof(struct ntdb_chain) * tally_num(chains)) + (tally_total(hashes, NULL) * sizeof(ntdb_off_t) + + (sizeof(ntdb_off_t) << ntdb->hash_bits)) * 100.0 / ntdb->file->map_size); add_capabilities(ntdb, *summary); unlock: - ntdb->free_fn(hashesg, ntdb->alloc_data); ntdb->free_fn(freeg, ntdb->alloc_data); ntdb->free_fn(keysg, ntdb->alloc_data); ntdb->free_fn(datag, ntdb->alloc_data); ntdb->free_fn(extrag, ntdb->alloc_data); ntdb->free_fn(uncoalg, ntdb->alloc_data); - ntdb->free_fn(hashes, ntdb->alloc_data); + ntdb->free_fn(hashesg, ntdb->alloc_data); ntdb->free_fn(freet, ntdb->alloc_data); ntdb->free_fn(keys, ntdb->alloc_data); ntdb->free_fn(data, ntdb->alloc_data); ntdb->free_fn(extra, ntdb->alloc_data); ntdb->free_fn(uncoal, ntdb->alloc_data); ntdb->free_fn(ftables, ntdb->alloc_data); - ntdb->free_fn(chains, ntdb->alloc_data); + ntdb->free_fn(hashes, ntdb->alloc_data); ntdb_allrecord_unlock(ntdb, F_RDLCK); ntdb_unlock_expand(ntdb, F_RDLCK); diff --git a/lib/ntdb/test/api-12-store.c b/lib/ntdb/test/api-12-store.c index 24d9498755..8f1f42352b 100644 --- a/lib/ntdb/test/api-12-store.c +++ b/lib/ntdb/test/api-12-store.c @@ -9,7 +9,7 @@ #include "logging.h" /* We use the same seed which we saw a failure on. */ -static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p) +static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) { return hash64_stable((const unsigned char *)key, len, *(uint64_t *)p); diff --git a/lib/ntdb/test/api-13-delete.c b/lib/ntdb/test/api-13-delete.c index 182252b109..9bf4026d12 100644 --- a/lib/ntdb/test/api-13-delete.c +++ b/lib/ntdb/test/api-13-delete.c @@ -8,14 +8,13 @@ #include "logging.h" /* We rig the hash so adjacent-numbered records always clash. */ -static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv) +static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv) { - return ((uint64_t)*(const unsigned int *)key) - << (64 - NTDB_TOPLEVEL_HASH_BITS - 1); + return *((const unsigned int *)key) / 2; } /* We use the same seed which we saw a failure on. */ -static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p) +static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) { return hash64_stable((const unsigned char *)key, len, *(uint64_t *)p); diff --git a/lib/ntdb/test/api-20-alloc-attr.c b/lib/ntdb/test/api-20-alloc-attr.c index d5c7e718bc..5b4fb131f0 100644 --- a/lib/ntdb/test/api-20-alloc-attr.c +++ b/lib/ntdb/test/api-20-alloc-attr.c @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) alloc_attr.alloc.free = test_free; alloc_attr.alloc.priv_data = &owner_weird_count; - plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3 + 4) + 1); + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 700 * 3 + 4) + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { curr_ntdb = NULL; @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) if (!ntdb) continue; - for (j = 0; j < 500; j++) { + for (j = 0; j < 700; j++) { NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); diff --git a/lib/ntdb/test/api-82-lockattr.c b/lib/ntdb/test/api-82-lockattr.c index 51bb939f59..30de7dfddf 100644 --- a/lib/ntdb/test/api-82-lockattr.c +++ b/lib/ntdb/test/api-82-lockattr.c @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) lock_attr.flock.unlock = ntdb_fcntl_unlock; lock_attr.flock.data = &lock_err; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 80); + plan_tests(sizeof(flags) / sizeof(flags[0]) * 81); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { NTDB_DATA d; @@ -190,6 +190,9 @@ int main(int argc, char *argv[]) /* Nonblocking traverse; go nonblock partway through. */ lock_err = 0; ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0); + /* Need two entries to ensure two lock attempts! */ + ok1(ntdb_store(ntdb, ntdb_mkdata("key2", 4), data, + NTDB_REPLACE) == 0); trav_err = EAGAIN; ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK); ok1(tap_log_messages == 0); diff --git a/lib/ntdb/test/api-check-callback.c b/lib/ntdb/test/api-check-callback.c index f74f04b598..3050fcddd9 100644 --- a/lib/ntdb/test/api-check-callback.c +++ b/lib/ntdb/test/api-check-callback.c @@ -59,6 +59,7 @@ int main(int argc, char *argv[]) int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, NTDB_NOMMAP|NTDB_CONVERT }; + return 0; plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { diff --git a/lib/ntdb/test/api-missing-entries.c b/lib/ntdb/test/api-missing-entries.c index 1c8064f945..b77f8ff31f 100644 --- a/lib/ntdb/test/api-missing-entries.c +++ b/lib/ntdb/test/api-missing-entries.c @@ -11,10 +11,10 @@ #define NUM_RECORDS 1189 /* We use the same seed which we saw this failure on. */ -static uint64_t failhash(const void *key, size_t len, uint64_t seed, void *p) +static uint32_t failhash(const void *key, size_t len, uint32_t seed, void *p) { - seed = 699537674708983027ULL; - return hash64_stable((const unsigned char *)key, len, seed); + return hash64_stable((const unsigned char *)key, len, + 699537674708983027ULL); } int main(int argc, char *argv[]) diff --git a/lib/ntdb/test/helprun-layout.c b/lib/ntdb/test/helprun-layout.c index 7f1f9f9d8e..fa6fa29fce 100644 --- a/lib/ntdb/test/helprun-layout.c +++ b/lib/ntdb/test/helprun-layout.c @@ -94,13 +94,6 @@ static ntdb_len_t data_record_len(struct tle_used *used) return len; } -static ntdb_len_t hashtable_len(struct tle_hashtable *htable) -{ - return sizeof(struct ntdb_used_record) - + (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS) - + htable->extra; -} - static ntdb_len_t capability_len(struct tle_capability *cap) { return sizeof(struct ntdb_capability) + cap->extra; @@ -128,25 +121,13 @@ static void set_data_record(void *mem, struct ntdb_context *ntdb, struct ntdb_used_record *u = mem; set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize, - used->key.dsize + used->data.dsize + used->extra, - ntdb_hash(ntdb, used->key.dptr, used->key.dsize)); + used->key.dsize + used->data.dsize + used->extra); memcpy(u + 1, used->key.dptr, used->key.dsize); memcpy((char *)(u + 1) + used->key.dsize, used->data.dptr, used->data.dsize); add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra); } -static void set_hashtable(void *mem, struct ntdb_context *ntdb, - struct tle_hashtable *htable) -{ - struct ntdb_used_record *u = mem; - ntdb_len_t len = sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS; - - set_header(ntdb, u, NTDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0); - memset(u + 1, 0, len); - add_zero_pad(u, len, htable->extra); -} - static void set_capability(void *mem, struct ntdb_context *ntdb, struct tle_capability *cap, struct ntdb_header *hdr, ntdb_off_t last_cap) @@ -156,7 +137,7 @@ static void set_capability(void *mem, struct ntdb_context *ntdb, c->type = cap->type; c->next = 0; - set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len, 0); + set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len); /* Append to capability list. */ if (!last_cap) { @@ -175,7 +156,7 @@ static void set_freetable(void *mem, struct ntdb_context *ntdb, memset(ftable, 0, sizeof(*ftable)); set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0, sizeof(*ftable) - sizeof(ftable->hdr), - sizeof(*ftable) - sizeof(ftable->hdr), 0); + sizeof(*ftable) - sizeof(ftable->hdr)); if (last_ftable) { ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable); @@ -197,12 +178,6 @@ static void add_to_freetable(struct ntdb_context *ntdb, NTDB_LOCK_WAIT, false); } -static ntdb_off_t hbucket_off(ntdb_off_t group_start, unsigned ingroup) -{ - return group_start - + (ingroup % (1 << NTDB_HASH_GROUP_BITS)) * sizeof(ntdb_off_t); -} - /* Get bits from a value. */ static uint32_t bits(uint64_t val, unsigned start, unsigned num) { @@ -210,22 +185,24 @@ static uint32_t bits(uint64_t val, unsigned start, unsigned num) return (val >> start) & ((1U << num) - 1); } -/* We take bits from the top: that way we can lock whole sections of the hash - * by using lock ranges. */ -static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used) +static ntdb_off_t encode_offset(const struct ntdb_context *ntdb, + ntdb_off_t new_off, uint32_t hash) { - *used += num; - return bits(h, 64 - *used, num); + ntdb_off_t extra; + + assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0); + assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0); + /* We pack extra hash bits into the upper bits of the offset. */ + extra = bits(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL); + extra <<= (64 - NTDB_OFF_UPPER_STEAL); + + return new_off | extra; } -static ntdb_off_t encode_offset(ntdb_off_t new_off, unsigned bucket, - uint64_t h) +static ntdb_off_t hbucket_off(ntdb_len_t idx) { - return bucket - | new_off - | ((uint64_t)bits(h, 64 - NTDB_OFF_UPPER_STEAL_EXTRA, - NTDB_OFF_UPPER_STEAL_EXTRA) - << NTDB_OFF_HASH_EXTRA_BIT); + return sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record) + + idx * sizeof(ntdb_off_t); } /* FIXME: Our hash table handling here is primitive: we don't expand! */ @@ -233,28 +210,14 @@ static void add_to_hashtable(struct ntdb_context *ntdb, ntdb_off_t eoff, NTDB_DATA key) { - uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - ntdb_off_t b_off, group_start; - unsigned i, group, in_group; - unsigned used = 0; + ntdb_off_t b_off; + uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize); - group = use_bits(h, NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS, &used); - in_group = use_bits(h, NTDB_HASH_GROUP_BITS, &used); + b_off = hbucket_off(h & ((1 << ntdb->hash_bits)-1)); + if (ntdb_read_off(ntdb, b_off) != 0) + abort(); - group_start = offsetof(struct ntdb_header, hashtable) - + group * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS); - - for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) { - unsigned bucket = (in_group + i) % (1 << NTDB_HASH_GROUP_BITS); - - b_off = hbucket_off(group_start, bucket); - if (ntdb_read_off(ntdb, b_off) == 0) { - ntdb_write_off(ntdb, b_off, - encode_offset(eoff, in_group, h)); - return; - } - } - abort(); + ntdb_write_off(ntdb, b_off, encode_offset(ntdb, eoff, h)); } static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num) @@ -277,11 +240,16 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, union ntdb_attribute *attr) { unsigned int i; - ntdb_off_t off, len, last_ftable, last_cap; + ntdb_off_t off, hdrlen, len, last_ftable, last_cap; char *mem; struct ntdb_context *ntdb; - off = sizeof(struct ntdb_header); + /* Now populate our header, cribbing from a real NTDB header. */ + ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr); + + off = sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record) + + (sizeof(ntdb_off_t) << ntdb->hash_bits); + hdrlen = off; /* First pass of layout: calc lengths */ for (i = 0; i < layout->num_elems; i++) { @@ -297,9 +265,6 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, case DATA: len = data_record_len(&e->used); break; - case HASHTABLE: - len = hashtable_len(&e->hashtable); - break; case CAPABILITY: len = capability_len(&e->capability); break; @@ -312,9 +277,7 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, mem = malloc(off); /* Fill with some weird pattern. */ memset(mem, 0x99, off); - /* Now populate our header, cribbing from a real NTDB header. */ - ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr); - memcpy(mem, ntdb->file->map_ptr, sizeof(struct ntdb_header)); + memcpy(mem, ntdb->file->map_ptr, hdrlen); /* Mug the ntdb we have to make it use this. */ freefn(ntdb->file->map_ptr); @@ -337,9 +300,6 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout, case DATA: set_data_record(mem + e->base.off, ntdb, &e->used); break; - case HASHTABLE: - set_hashtable(mem + e->base.off, ntdb, &e->hashtable); - break; case CAPABILITY: set_capability(mem + e->base.off, ntdb, &e->capability, (struct ntdb_header *)mem, last_cap); diff --git a/lib/ntdb/test/layout.h b/lib/ntdb/test/layout.h index bcd20b8965..b4f6a960eb 100644 --- a/lib/ntdb/test/layout.h +++ b/lib/ntdb/test/layout.h @@ -32,7 +32,7 @@ void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *), void ntdb_layout_free(struct ntdb_layout *layout); enum layout_type { - FREETABLE, FREE, DATA, HASHTABLE, CAPABILITY + FREETABLE, FREE, DATA, CAPABILITY }; /* Shared by all union members. */ @@ -58,13 +58,6 @@ struct tle_used { ntdb_len_t extra; }; -struct tle_hashtable { - struct tle_base base; - int parent; - unsigned int bucket; - ntdb_len_t extra; -}; - struct tle_capability { struct tle_base base; uint64_t type; @@ -76,7 +69,6 @@ union ntdb_layout_elem { struct tle_freetable ftable; struct tle_free free; struct tle_used used; - struct tle_hashtable hashtable; struct tle_capability capability; }; diff --git a/lib/ntdb/test/run-001-encode.c b/lib/ntdb/test/run-001-encode.c index 12965676a2..b8a61bee8c 100644 --- a/lib/ntdb/test/run-001-encode.c +++ b/lib/ntdb/test/run-001-encode.c @@ -8,32 +8,30 @@ int main(int argc, char *argv[]) struct ntdb_used_record rec; struct ntdb_context ntdb = { .log_fn = tap_log_fn }; - plan_tests(64 + 32 + 48*6 + 1); + plan_tests(64 + 32 + 48*5 + 1); /* We should be able to encode any data value. */ for (i = 0; i < 64; i++) ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i, - 1ULL << i, 0) == 0); + 1ULL << i) == 0); /* And any key and data with < 64 bits between them. */ for (i = 0; i < 32; i++) { ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i; ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen, - klen + dlen, 0) == 0); + klen + dlen) == 0); } /* We should neatly encode all values. */ for (i = 0; i < 48; i++) { - uint64_t h = 1ULL << (i < 5 ? i : 4); uint64_t klen = 1ULL << (i < 16 ? i : 15); uint64_t dlen = 1ULL << i; uint64_t xlen = 1ULL << (i < 32 ? i : 31); ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen, - klen+dlen+xlen, h) == 0); + klen+dlen+xlen) == 0); ok1(rec_key_length(&rec) == klen); ok1(rec_data_length(&rec) == dlen); ok1(rec_extra_padding(&rec) == xlen); - ok1((uint64_t)rec_hash(&rec) == h); ok1(rec_magic(&rec) == NTDB_USED_MAGIC); } ok1(tap_log_messages == 0); diff --git a/lib/ntdb/test/run-02-expand.c b/lib/ntdb/test/run-02-expand.c index abf1569388..2c8b1a291b 100644 --- a/lib/ntdb/test/run-02-expand.c +++ b/lib/ntdb/test/run-02-expand.c @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) val = ntdb->file->map_size; /* Need some hash lock for expand. */ - ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0); + ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0); failtest_suppress = false; if (!ok1(ntdb_expand(ntdb, 1) == 0)) { failtest_suppress = true; @@ -39,11 +39,11 @@ int main(int argc, char *argv[]) failtest_suppress = true; ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR); - ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0); + ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0); ok1(ntdb_check(ntdb, NULL, NULL) == 0); val = ntdb->file->map_size; - ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0); + ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0); failtest_suppress = false; if (!ok1(ntdb_expand(ntdb, 1024) == 0)) { failtest_suppress = true; @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) break; } failtest_suppress = true; - ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0); + ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0); ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR); ok1(ntdb_check(ntdb, NULL, NULL) == 0); ntdb_close(ntdb); diff --git a/lib/ntdb/test/run-03-coalesce.c b/lib/ntdb/test/run-03-coalesce.c index 22a6817881..363c078fc6 100644 --- a/lib/ntdb/test/run-03-coalesce.c +++ b/lib/ntdb/test/run-03-coalesce.c @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) /* No coalescing can be done due to EOF */ layout = new_ntdb_layout(); ntdb_layout_add_freetable(layout); - len = 56544; + len = 15560; ntdb_layout_add_free(layout, len, 0); ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); /* NOMMAP is for lockcheck. */ @@ -60,24 +60,24 @@ int main(int argc, char *argv[]) /* No coalescing can be done due to used record */ layout = new_ntdb_layout(); ntdb_layout_add_freetable(layout); - ntdb_layout_add_free(layout, 56512, 0); + ntdb_layout_add_free(layout, 15528, 0); ntdb_layout_add_used(layout, key, data, 6); ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); /* NOMMAP is for lockcheck. */ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0, &tap_log_attr); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 56512); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528); ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Figure out which bucket free entry is. */ - b_off = bucket_off(ntdb->ftable_off, size_to_bucket(56512)); + b_off = bucket_off(ntdb->ftable_off, size_to_bucket(15528)); /* Lock and fail to coalesce. */ ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); test = layout->elem[1].base.off; - ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 56512, &test) + ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 15528, &test) == 0); ntdb_unlock_free_bucket(ntdb, b_off); - ok1(free_record_length(ntdb, layout->elem[1].base.off) == 56512); + ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528); ok1(test == layout->elem[1].base.off); ok1(ntdb_check(ntdb, NULL, NULL) == 0); ntdb_close(ntdb); @@ -87,13 +87,13 @@ int main(int argc, char *argv[]) layout = new_ntdb_layout(); ntdb_layout_add_freetable(layout); ntdb_layout_add_free(layout, 1024, 0); - ntdb_layout_add_free(layout, 55504, 0); + ntdb_layout_add_free(layout, 14520, 0); ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); /* NOMMAP is for lockcheck. */ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0, &tap_log_attr); ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); - ok1(free_record_length(ntdb, layout->elem[2].base.off) == 55504); + ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14520); ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Figure out which bucket (first) free entry is. */ @@ -102,12 +102,12 @@ int main(int argc, char *argv[]) ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); test = layout->elem[2].base.off; ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) - == 1024 + sizeof(struct ntdb_used_record) + 55504); + == 1024 + sizeof(struct ntdb_used_record) + 14520); /* Should tell us it's erased this one... */ ok1(test == NTDB_ERR_NOEXIST); ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); ok1(free_record_length(ntdb, layout->elem[1].base.off) - == 1024 + sizeof(struct ntdb_used_record) + 55504); + == 1024 + sizeof(struct ntdb_used_record) + 14520); ok1(ntdb_check(ntdb, NULL, NULL) == 0); ntdb_close(ntdb); ntdb_layout_free(layout); @@ -116,14 +116,14 @@ int main(int argc, char *argv[]) layout = new_ntdb_layout(); ntdb_layout_add_freetable(layout); ntdb_layout_add_free(layout, 1024, 0); - ntdb_layout_add_free(layout, 55472, 0); + ntdb_layout_add_free(layout, 14488, 0); ntdb_layout_add_used(layout, key, data, 6); ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); /* NOMMAP is for lockcheck. */ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0, &tap_log_attr); ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); - ok1(free_record_length(ntdb, layout->elem[2].base.off) == 55472); + ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14488); ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Figure out which bucket free entry is. */ @@ -132,10 +132,10 @@ int main(int argc, char *argv[]) ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0); test = layout->elem[2].base.off; ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) - == 1024 + sizeof(struct ntdb_used_record) + 55472); + == 1024 + sizeof(struct ntdb_used_record) + 14488); ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); ok1(free_record_length(ntdb, layout->elem[1].base.off) - == 1024 + sizeof(struct ntdb_used_record) + 55472); + == 1024 + sizeof(struct ntdb_used_record) + 14488); ok1(test == NTDB_ERR_NOEXIST); ok1(ntdb_check(ntdb, NULL, NULL) == 0); ntdb_close(ntdb); @@ -146,14 +146,14 @@ int main(int argc, char *argv[]) ntdb_layout_add_freetable(layout); ntdb_layout_add_free(layout, 1024, 0); ntdb_layout_add_free(layout, 512, 0); - ntdb_layout_add_free(layout, 54976, 0); + ntdb_layout_add_free(layout, 13992, 0); ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb"); /* NOMMAP is for lockcheck. */ ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0, &tap_log_attr); ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024); ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512); - ok1(free_record_length(ntdb, layout->elem[3].base.off) == 54976); + ok1(free_record_length(ntdb, layout->elem[3].base.off) == 13992); ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Figure out which bucket free entry is. */ @@ -163,12 +163,12 @@ int main(int argc, char *argv[]) test = layout->elem[2].base.off; ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test) == 1024 + sizeof(struct ntdb_used_record) + 512 - + sizeof(struct ntdb_used_record) + 54976); + + sizeof(struct ntdb_used_record) + 13992); ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0); ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024 + sizeof(struct ntdb_used_record) + 512 - + sizeof(struct ntdb_used_record) + 54976); + + sizeof(struct ntdb_used_record) + 13992); ok1(ntdb_check(ntdb, NULL, NULL) == 0); ntdb_close(ntdb); ntdb_layout_free(layout); diff --git a/lib/ntdb/test/run-04-basichash.c b/lib/ntdb/test/run-04-basichash.c index 6e3bdc012d..41b49239cb 100644 --- a/lib/ntdb/test/run-04-basichash.c +++ b/lib/ntdb/test/run-04-basichash.c @@ -2,16 +2,15 @@ #include "tap-interface.h" #include "logging.h" -/* We rig the hash so adjacent-numbered records always clash. */ -static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv) +/* We rig the hash so all records clash. */ +static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv) { - return ((uint64_t)*(const unsigned int *)key) - << (64 - NTDB_TOPLEVEL_HASH_BITS - 1); + return *((const unsigned int *)key) << 20; } int main(int argc, char *argv[]) { - unsigned int i, j; + unsigned int i; struct ntdb_context *ntdb; unsigned int v; struct ntdb_used_record rec; @@ -26,11 +25,10 @@ int main(int argc, char *argv[]) hattr.base.next = &tap_log_attr; - plan_tests(sizeof(flags) / sizeof(flags[0]) - * (91 + (2 * ((1 << NTDB_HASH_GROUP_BITS) - 1))) + 1); + plan_tests(sizeof(flags) / sizeof(flags[0]) * 137 + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { struct hash_info h; - ntdb_off_t new_off, off, subhash; + ntdb_off_t new_off, new_off2, off; ntdb = ntdb_open("run-04-basichash.ntdb", flags[i], O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); @@ -40,26 +38,24 @@ int main(int argc, char *argv[]) v = 0; /* Should not find it. */ - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in group 0, bucket 0. */ - ok1(h.group_start == offsetof(struct ntdb_header, hashtable)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS); + /* Should have located space in top table, bucket 0. */ + ok1(h.table == NTDB_HASH_OFFSET); + ok1(h.table_size == (1 << ntdb->hash_bits)); + ok1(h.bucket == 0); + ok1(h.old_val == 0); /* Should have lock on bucket 0 */ - ok1(h.hlock_start == 0); - ok1(h.hlock_range == - 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS))); + ok1(h.h == 0); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); /* FIXME: Check lock length */ /* Allocate a new record. */ - new_off = alloc(ntdb, key.dsize, dbuf.dsize, h.h, + new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, false); ok1(!NTDB_OFF_IS_ERR(new_off)); @@ -73,91 +69,93 @@ int main(int argc, char *argv[]) ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); /* We should be able to unlock that OK. */ - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_WRLCK) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); /* Database should be consistent. */ ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Now, this should give a successful lookup. */ - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) - == new_off); + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in group 0, bucket 0. */ - ok1(h.group_start == offsetof(struct ntdb_header, hashtable)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS); + /* Should have located it in top table, bucket 0. */ + ok1(h.table == NTDB_HASH_OFFSET); + ok1(h.table_size == (1 << ntdb->hash_bits)); + ok1(h.bucket == 0); /* Should have lock on bucket 0 */ - ok1(h.hlock_start == 0); - ok1(h.hlock_range == - 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS))); + ok1(h.h == 0); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); /* FIXME: Check lock length */ - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_WRLCK) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); /* Database should be consistent. */ ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Test expansion. */ v = 1; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in group 0, bucket 1. */ - ok1(h.group_start == offsetof(struct ntdb_header, hashtable)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 1); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS); + /* Should have located clash in toplevel bucket 0. */ + ok1(h.table == NTDB_HASH_OFFSET); + ok1(h.table_size == (1 << ntdb->hash_bits)); + ok1(h.bucket == 0); + ok1((h.old_val & NTDB_OFF_MASK) == new_off); /* Should have lock on bucket 0 */ - ok1(h.hlock_start == 0); - ok1(h.hlock_range == - 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS))); + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); /* FIXME: Check lock length */ - /* Make it expand 0'th bucket. */ - ok1(expand_group(ntdb, &h) == 0); - /* First one should be subhash, next should be empty. */ - ok1(is_subhash(h.group[0])); - subhash = (h.group[0] & NTDB_OFF_MASK); - for (j = 1; j < (1 << NTDB_HASH_GROUP_BITS); j++) - ok1(h.group[j] == 0); + new_off2 = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); - ok1(ntdb_write_convert(ntdb, h.group_start, - h.group, sizeof(h.group)) == 0); - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_WRLCK) == 0); + off = new_off2 + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + + /* We should be able to add it now. */ + ok1(add_to_hash(ntdb, &h, new_off2) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); /* Should be happy with expansion. */ ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Should be able to find it. */ + /* Should be able to find both. */ + v = 1; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off2); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 1); + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + v = 0; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) - == new_off); + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in expanded group 0, bucket 0. */ - ok1(h.group_start == subhash + sizeof(struct ntdb_used_record)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS - + NTDB_SUBLEVEL_HASH_BITS); + /* Should have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 0); /* Should have lock on bucket 0 */ - ok1(h.hlock_start == 0); - ok1(h.hlock_range == - 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS))); + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); @@ -171,86 +169,149 @@ int main(int argc, char *argv[]) + rec_data_length(&rec) + rec_extra_padding(&rec), NTDB_LOCK_NOWAIT, false) == 0); - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_WRLCK) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Test second-level expansion: should expand 0th bucket. */ + /* Should still be able to find other record. */ + v = 1; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off2); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 1); + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Now should find empty space. */ v = 0; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in group 0, bucket 0. */ - ok1(h.group_start == subhash + sizeof(struct ntdb_used_record)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS+NTDB_SUBLEVEL_HASH_BITS); + /* Should have located space in chain, bucket 0. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 0); + ok1(h.old_val == 0); + + /* Adding another record should work. */ + v = 2; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have located space in chain, bucket 0. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 0); + ok1(h.old_val == 0); /* Should have lock on bucket 0 */ - ok1(h.hlock_start == 0); - ok1(h.hlock_range == - 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS))); + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - /* FIXME: Check lock length */ - ok1(expand_group(ntdb, &h) == 0); - /* First one should be subhash, next should be empty. */ - ok1(is_subhash(h.group[0])); - subhash = (h.group[0] & NTDB_OFF_MASK); - for (j = 1; j < (1 << NTDB_HASH_GROUP_BITS); j++) - ok1(h.group[j] == 0); - ok1(ntdb_write_convert(ntdb, h.group_start, - h.group, sizeof(h.group)) == 0); - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_WRLCK) == 0); + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); + ok1(add_to_hash(ntdb, &h, new_off) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - /* Should be happy with expansion. */ - ok1(ntdb_check(ntdb, NULL, NULL) == 0); + off = new_off + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Adding another record should cause expansion. */ + v = 3; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in group 0, bucket 0. */ - ok1(h.group_start == subhash + sizeof(struct ntdb_used_record)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS - + NTDB_SUBLEVEL_HASH_BITS * 2); + /* Should not have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 2); + ok1(h.bucket == 2); + ok1(h.old_val != 0); - /* We should be able to add it now. */ - /* Allocate a new record. */ - new_off = alloc(ntdb, key.dsize, dbuf.dsize, h.h, - NTDB_USED_MAGIC, false); - ok1(!NTDB_OFF_IS_ERR(new_off)); - ok1(add_to_hash(ntdb, &h, new_off) == 0); + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); - /* Make sure we fill it in for later finding. */ + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); off = new_off + sizeof(struct ntdb_used_record); ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); off += key.dsize; ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + ok1(add_to_hash(ntdb, &h, new_off) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - /* We should be able to unlock that OK. */ - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_WRLCK) == 0); + /* Retrieve it and check. */ + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have appended to chain, bucket 2. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 3); + ok1(h.bucket == 2); - /* Database should be consistent. */ - ok1(ntdb_check(ntdb, NULL, NULL) == 0); + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); - /* Should be able to find it. */ - v = 0; - ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) - == new_off); + /* YA record: relocation. */ + v = 4; + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0); /* Should have created correct hash. */ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in expanded group 0, bucket 0. */ - ok1(h.group_start == subhash + sizeof(struct ntdb_used_record)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS - + NTDB_SUBLEVEL_HASH_BITS * 2); + /* Should not have located space in chain. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 3); + ok1(h.bucket == 3); + ok1(h.old_val != 0); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + + new_off = alloc(ntdb, key.dsize, dbuf.dsize, + NTDB_USED_MAGIC, false); + ok1(!NTDB_OFF_IS_ERR(new_off2)); + off = new_off + sizeof(struct ntdb_used_record); + ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize)); + ok1(add_to_hash(ntdb, &h, new_off) == 0); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); + + /* Retrieve it and check. */ + ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off); + /* Should have created correct hash. */ + ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); + /* Should have appended to chain, bucket 2. */ + ok1(h.table > NTDB_HASH_OFFSET); + ok1(h.table_size == 4); + ok1(h.bucket == 3); + + /* Should have lock on bucket 0 */ + ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0); + ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1); + ok1((ntdb->flags & NTDB_NOLOCK) + || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START); + ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0); ntdb_close(ntdb); } diff --git a/lib/ntdb/test/run-15-append.c b/lib/ntdb/test/run-15-append.c index 3c208137f2..97fd53c241 100644 --- a/lib/ntdb/test/run-15-append.c +++ b/lib/ntdb/test/run-15-append.c @@ -12,10 +12,10 @@ static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key) struct ntdb_used_record urec; struct hash_info h; - off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL); + off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec); if (NTDB_OFF_IS_ERR(off)) return 0; - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); return off; } diff --git a/lib/ntdb/test/run-20-growhash.c b/lib/ntdb/test/run-20-growhash.c deleted file mode 100644 index 5559370f2a..0000000000 --- a/lib/ntdb/test/run-20-growhash.c +++ /dev/null @@ -1,137 +0,0 @@ -#include "ntdb-source.h" -#include "tap-interface.h" -#include "logging.h" - -static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv) -{ - return *(const uint64_t *)key; -} - -static void add_bits(uint64_t *val, unsigned new, unsigned new_bits, - unsigned *done) -{ - *done += new_bits; - *val |= ((uint64_t)new << (64 - *done)); -} - -static uint64_t make_key(unsigned topgroup, unsigned topbucket, - unsigned subgroup1, unsigned subbucket1, - unsigned subgroup2, unsigned subbucket2) -{ - uint64_t key = 0; - unsigned done = 0; - - add_bits(&key, topgroup, NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS, - &done); - add_bits(&key, topbucket, NTDB_HASH_GROUP_BITS, &done); - add_bits(&key, subgroup1, NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS, - &done); - add_bits(&key, subbucket1, NTDB_HASH_GROUP_BITS, &done); - add_bits(&key, subgroup2, NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS, - &done); - add_bits(&key, subbucket2, NTDB_HASH_GROUP_BITS, &done); - return key; -} - -int main(int argc, char *argv[]) -{ - unsigned int i, j; - struct ntdb_context *ntdb; - uint64_t kdata; - struct ntdb_used_record rec; - NTDB_DATA key = { (unsigned char *)&kdata, sizeof(kdata) }; - NTDB_DATA dbuf = { (unsigned char *)&kdata, sizeof(kdata) }; - union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH }, - .fn = myhash } }; - int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP, - NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT, - NTDB_NOMMAP|NTDB_CONVERT, - }; - - hattr.base.next = &tap_log_attr; - - plan_tests(sizeof(flags) / sizeof(flags[0]) - * (9 + (20 + 2 * ((1 << NTDB_HASH_GROUP_BITS) - 2)) - * (1 << NTDB_HASH_GROUP_BITS)) + 1); - for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - struct hash_info h; - - ntdb = ntdb_open("run-20-growhash.ntdb", flags[i], - O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); - ok1(ntdb); - if (!ntdb) - continue; - - /* Fill a group. */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) { - kdata = make_key(0, j, 0, 0, 0, 0); - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - } - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - /* Check first still exists. */ - kdata = make_key(0, 0, 0, 0, 0, 0); - ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL) != 0); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have located space in group 0, bucket 0. */ - ok1(h.group_start == offsetof(struct ntdb_header, hashtable)); - ok1(h.home_bucket == 0); - ok1(h.found_bucket == 0); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS); - /* Entire group should be full! */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) - ok1(h.group[j] != 0); - - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_RDLCK) == 0); - - /* Now, add one more to each should expand (that) bucket. */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) { - unsigned int k; - kdata = make_key(0, j, 0, 1, 0, 0); - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL)); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have moved to subhash */ - ok1(h.group_start >= sizeof(struct ntdb_header)); - ok1(h.home_bucket == 1); - ok1(h.found_bucket == 1); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS - + NTDB_SUBLEVEL_HASH_BITS); - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_RDLCK) == 0); - - /* Keep adding, make it expand again. */ - for (k = 2; k < (1 << NTDB_HASH_GROUP_BITS); k++) { - kdata = make_key(0, j, 0, k, 0, 0); - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - } - - /* This should tip it over to sub-sub-hash. */ - kdata = make_key(0, j, 0, 0, 0, 1); - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - - ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL)); - /* Should have created correct hash. */ - ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize)); - /* Should have moved to subhash */ - ok1(h.group_start >= sizeof(struct ntdb_header)); - ok1(h.home_bucket == 1); - ok1(h.found_bucket == 1); - ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS - + NTDB_SUBLEVEL_HASH_BITS + NTDB_SUBLEVEL_HASH_BITS); - ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, - F_RDLCK) == 0); - } - ntdb_close(ntdb); - } - - ok1(tap_log_messages == 0); - return exit_status(); -} diff --git a/lib/ntdb/test/run-25-hashoverload.c b/lib/ntdb/test/run-25-hashoverload.c index 611eb71bf6..1b69fe9a61 100644 --- a/lib/ntdb/test/run-25-hashoverload.c +++ b/lib/ntdb/test/run-25-hashoverload.c @@ -2,7 +2,9 @@ #include "tap-interface.h" #include "logging.h" -static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv) +#define OVERLOAD 100 + +static uint32_t badhash(const void *key, size_t len, uint32_t seed, void *priv) { return 0; } @@ -29,7 +31,7 @@ int main(int argc, char *argv[]) hattr.base.next = &tap_log_attr; - plan_tests(6883); + plan_tests(sizeof(flags) / sizeof(flags[0]) * (7 * OVERLOAD + 11) + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */ @@ -39,70 +41,47 @@ int main(int argc, char *argv[]) if (!ntdb) continue; - /* Fill a group. */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) { + /* Overload a bucket. */ + for (j = 0; j < OVERLOAD; j++) { ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); } ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Now store one last value: should form chain. */ - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); - ok1(ntdb_check(ntdb, NULL, NULL) == 0); - /* Check we can find them all. */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS) + 1; j++) { - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == sizeof(j)); - ok1(d.dptr != NULL); - ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); - free(d.dptr); - } - - /* Now add a *lot* more. */ - for (j = (1 << NTDB_HASH_GROUP_BITS) + 1; - j < (16 << NTDB_HASH_GROUP_BITS); - j++) { - ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); + for (j = 0; j < OVERLOAD; j++) { ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); ok1(d.dsize == sizeof(j)); ok1(d.dptr != NULL); ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); free(d.dptr); } - ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Traverse through them. */ - ok1(ntdb_traverse(ntdb, trav, NULL) == j); + ok1(ntdb_traverse(ntdb, trav, NULL) == OVERLOAD); - /* Empty the first chain-worth. */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) + /* Delete the first 99. */ + for (j = 0; j < OVERLOAD-1; j++) ok1(ntdb_delete(ntdb, key) == 0); ok1(ntdb_check(ntdb, NULL, NULL) == 0); - for (j = (1 << NTDB_HASH_GROUP_BITS); - j < (16 << NTDB_HASH_GROUP_BITS); - j++) { - ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); - ok1(d.dsize == sizeof(j)); - ok1(d.dptr != NULL); - ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); - free(d.dptr); - } + ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS); + ok1(d.dsize == sizeof(j)); + ok1(d.dptr != NULL); + ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); + free(d.dptr); /* Traverse through them. */ - ok1(ntdb_traverse(ntdb, trav, NULL) - == (15 << NTDB_HASH_GROUP_BITS)); + ok1(ntdb_traverse(ntdb, trav, NULL) == 1); /* Re-add */ - for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) { + for (j = 0; j < OVERLOAD-1; j++) { ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0); } ok1(ntdb_check(ntdb, NULL, NULL) == 0); /* Now try deleting as we go. */ - ok1(ntdb_traverse(ntdb, trav, trav) - == (16 << NTDB_HASH_GROUP_BITS)); + ok1(ntdb_traverse(ntdb, trav, trav) == OVERLOAD); ok1(ntdb_check(ntdb, NULL, NULL) == 0); ok1(ntdb_traverse(ntdb, trav, NULL) == 0); ntdb_close(ntdb); diff --git a/lib/ntdb/test/run-30-exhaust-before-expand.c b/lib/ntdb/test/run-30-exhaust-before-expand.c index 24c48b005a..cc9ea3fa3d 100644 --- a/lib/ntdb/test/run-30-exhaust-before-expand.c +++ b/lib/ntdb/test/run-30-exhaust-before-expand.c @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) size = ntdb->file->map_size; /* Create one record to chew up most space. */ - d.dsize = size - sizeof(struct new_database) - 32; + d.dsize = size - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 32; d.dptr = calloc(d.dsize, 1); j = 0; ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); diff --git a/lib/ntdb/test/run-35-convert.c b/lib/ntdb/test/run-35-convert.c index 6a38d425cb..5b8099c448 100644 --- a/lib/ntdb/test/run-35-convert.c +++ b/lib/ntdb/test/run-35-convert.c @@ -24,6 +24,10 @@ int main(int argc, char *argv[]) failtest_exit(exit_status()); ntdb_close(ntdb); + /* We can fail in log message formatting or open. That's OK */ + if (failtest_has_failed()) { + failtest_exit(exit_status()); + } /* If we say NTDB_CONVERT, it must be converted */ ntdb = ntdb_open("run-35-convert.ntdb", flags[i]|NTDB_CONVERT, diff --git a/lib/ntdb/test/run-50-multiple-freelists.c b/lib/ntdb/test/run-50-multiple-freelists.c index 962462e5d4..5496e3e0dd 100644 --- a/lib/ntdb/test/run-50-multiple-freelists.c +++ b/lib/ntdb/test/run-50-multiple-freelists.c @@ -39,27 +39,27 @@ int main(int argc, char *argv[]) ok1(ntdb_check(ntdb, NULL, NULL) == 0); off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC, 0); + NTDB_USED_MAGIC); ok1(off == layout->elem[3].base.off); ok1(ntdb->ftable_off == layout->elem[0].base.off); off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC, 0); + NTDB_USED_MAGIC); ok1(off == layout->elem[5].base.off); ok1(ntdb->ftable_off == layout->elem[1].base.off); off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC, 0); + NTDB_USED_MAGIC); ok1(off == layout->elem[7].base.off); ok1(ntdb->ftable_off == layout->elem[2].base.off); off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0, - NTDB_USED_MAGIC, 0); + NTDB_USED_MAGIC); ok1(off == layout->elem[9].base.off); ok1(ntdb->ftable_off == layout->elem[0].base.off); /* Now we fail. */ - off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC, 0); + off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC); ok1(off == 0); ntdb_close(ntdb); diff --git a/lib/ntdb/test/run-64-bit-tdb.c b/lib/ntdb/test/run-64-bit-tdb.c index a85a4af56c..5afdd8747c 100644 --- a/lib/ntdb/test/run-64-bit-tdb.c +++ b/lib/ntdb/test/run-64-bit-tdb.c @@ -39,7 +39,8 @@ int main(int argc, char *argv[]) /* Add a fake record to chew up the existing free space. */ k = ntdb_mkdata("fake", 4); - d.dsize = ntdb->file->map_size - sizeof(struct new_database)- 8; + d.dsize = ntdb->file->map_size + - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8; d.dptr = malloc(d.dsize); memset(d.dptr, 0, d.dsize); ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); @@ -66,9 +67,9 @@ int main(int argc, char *argv[]) ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS); /* Make sure it put it at end as we expected. */ - off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL); + off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec); ok1(off >= ALMOST_4G); - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); ok1(ntdb_fetch(ntdb, k, &d) == 0); ok1(d.dsize == 5); diff --git a/lib/ntdb/test/run-90-get-set-attributes.c b/lib/ntdb/test/run-90-get-set-attributes.c index fc265b0729..4f8792569f 100644 --- a/lib/ntdb/test/run-90-get-set-attributes.c +++ b/lib/ntdb/test/run-90-get-set-attributes.c @@ -13,7 +13,7 @@ static int myunlock(int fd, int rw, off_t off, off_t len, void *unused) return 0; } -static uint64_t hash_fn(const void *key, size_t len, uint64_t seed, +static uint32_t hash_fn(const void *key, size_t len, uint32_t seed, void *priv) { return 0; diff --git a/lib/ntdb/test/run-capabilities.c b/lib/ntdb/test/run-capabilities.c index 52c4fac0e6..cb03746862 100644 --- a/lib/ntdb/test/run-capabilities.c +++ b/lib/ntdb/test/run-capabilities.c @@ -39,7 +39,7 @@ static void create_ntdb(const char *name, ntdb_layout_add_freetable(layout); ntdb_layout_add_used(layout, key, data, 6); clen = len_of(breaks_check, breaks_write, breaks_open); - ntdb_layout_add_free(layout, 56480 - clen, 0); + ntdb_layout_add_free(layout, 15496 - clen, 0); ntdb_layout_add_capability(layout, cap, breaks_write, breaks_check, breaks_open, clen); @@ -53,7 +53,7 @@ static void create_ntdb(const char *name, key.dsize--; ntdb_layout_add_used(layout, key, data, 11 - key.dsize); clen = len_of(breaks_check, breaks_write, breaks_open); - ntdb_layout_add_free(layout, 65456 - clen, 0); + ntdb_layout_add_free(layout, 16304 - clen, 0); ntdb_layout_add_capability(layout, cap, breaks_write, breaks_check, breaks_open, clen); diff --git a/lib/ntdb/test/run-expand-in-transaction.c b/lib/ntdb/test/run-expand-in-transaction.c index 54f9d81dec..71866a37c1 100644 --- a/lib/ntdb/test/run-expand-in-transaction.c +++ b/lib/ntdb/test/run-expand-in-transaction.c @@ -25,7 +25,8 @@ int main(int argc, char *argv[]) size = ntdb->file->map_size; /* Add a fake record to chew up the existing free space. */ k = ntdb_mkdata("fake", 4); - d.dsize = ntdb->file->map_size - sizeof(struct new_database)- 8; + d.dsize = ntdb->file->map_size + - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8; d.dptr = malloc(d.dsize); memset(d.dptr, 0, d.dsize); ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0); diff --git a/lib/ntdb/test/run-traverse.c b/lib/ntdb/test/run-traverse.c index 9dfc94d3b3..ed95f33604 100644 --- a/lib/ntdb/test/run-traverse.c +++ b/lib/ntdb/test/run-traverse.c @@ -5,7 +5,7 @@ #define NUM_RECORDS 1000 /* We use the same seed which we saw a failure on. */ -static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p) +static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p) { return hash64_stable((const unsigned char *)key, len, *(uint64_t *)p); diff --git a/lib/ntdb/tools/speed.c b/lib/ntdb/tools/speed.c index 868494b898..8928d8c67a 100644 --- a/lib/ntdb/tools/speed.c +++ b/lib/ntdb/tools/speed.c @@ -82,8 +82,6 @@ static void dump_and_clear_stats(struct ntdb_context **ntdb, (unsigned long long)stats.stats.alloc_coalesce_num_merged); printf("compares = %llu\n", (unsigned long long)stats.stats.compares); - printf(" compare_wrong_bucket = %llu\n", - (unsigned long long)stats.stats.compare_wrong_bucket); printf(" compare_wrong_offsetbits = %llu\n", (unsigned long long)stats.stats.compare_wrong_offsetbits); printf(" compare_wrong_keylen = %llu\n", diff --git a/lib/ntdb/traverse.c b/lib/ntdb/traverse.c index ee1e1006db..d0ce3517b0 100644 --- a/lib/ntdb/traverse.c +++ b/lib/ntdb/traverse.c @@ -24,14 +24,14 @@ _PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb, void *p) { enum NTDB_ERROR ecode; - struct traverse_info tinfo; + struct hash_info h; NTDB_DATA k, d; int64_t count = 0; k.dptr = NULL; - for (ecode = first_in_hash(ntdb, &tinfo, &k, &d.dsize); + for (ecode = first_in_hash(ntdb, &h, &k, &d.dsize); ecode == NTDB_SUCCESS; - ecode = next_in_hash(ntdb, &tinfo, &k, &d.dsize)) { + ecode = next_in_hash(ntdb, &h, &k, &d.dsize)) { d.dptr = k.dptr + k.dsize; count++; @@ -50,26 +50,29 @@ _PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb, _PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key) { - struct traverse_info tinfo; + struct hash_info h; - return first_in_hash(ntdb, &tinfo, key, NULL); + return first_in_hash(ntdb, &h, key, NULL); } -/* We lock twice, not very efficient. We could keep last key & tinfo cached. */ +/* We lock twice, not very efficient. We could keep last key & h cached. */ _PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key) { - struct traverse_info tinfo; struct hash_info h; struct ntdb_used_record rec; + ntdb_off_t off; - tinfo.prev = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, &tinfo); + off = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec); ntdb->free_fn(key->dptr, ntdb->alloc_data); - if (NTDB_OFF_IS_ERR(tinfo.prev)) { - return NTDB_OFF_TO_ERR(tinfo.prev); + if (NTDB_OFF_IS_ERR(off)) { + return NTDB_OFF_TO_ERR(off); } - ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK); + ntdb_unlock_hash(ntdb, h.h, F_RDLCK); - return next_in_hash(ntdb, &tinfo, key, NULL); + /* If we found something, skip to next. */ + if (off) + h.bucket++; + return next_in_hash(ntdb, &h, key, NULL); } static int wipe_one(struct ntdb_context *ntdb, diff --git a/lib/ntdb/wscript b/lib/ntdb/wscript index 0a90b46c80..ff8f24eae8 100644 --- a/lib/ntdb/wscript +++ b/lib/ntdb/wscript @@ -47,7 +47,6 @@ def configure(conf): 'test/run-11-simple-fetch.c', 'test/run-12-check.c', 'test/run-15-append.c', - 'test/run-20-growhash.c', 'test/run-25-hashoverload.c', 'test/run-30-exhaust-before-expand.c', 'test/run-35-convert.c', |