summaryrefslogtreecommitdiff
path: root/lib/ntdb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ntdb')
-rw-r--r--lib/ntdb/check.c484
-rw-r--r--lib/ntdb/free.c20
-rw-r--r--lib/ntdb/hash.c1050
-rw-r--r--lib/ntdb/io.c1
-rw-r--r--lib/ntdb/lock.c56
-rw-r--r--lib/ntdb/ntdb.c40
-rw-r--r--lib/ntdb/ntdb.h5
-rw-r--r--lib/ntdb/open.c146
-rw-r--r--lib/ntdb/private.h139
-rw-r--r--lib/ntdb/summary.c67
-rw-r--r--lib/ntdb/test/api-12-store.c2
-rw-r--r--lib/ntdb/test/api-13-delete.c7
-rw-r--r--lib/ntdb/test/api-20-alloc-attr.c4
-rw-r--r--lib/ntdb/test/api-82-lockattr.c5
-rw-r--r--lib/ntdb/test/api-check-callback.c1
-rw-r--r--lib/ntdb/test/api-missing-entries.c6
-rw-r--r--lib/ntdb/test/helprun-layout.c102
-rw-r--r--lib/ntdb/test/layout.h10
-rw-r--r--lib/ntdb/test/run-001-encode.c10
-rw-r--r--lib/ntdb/test/run-02-expand.c8
-rw-r--r--lib/ntdb/test/run-03-coalesce.c36
-rw-r--r--lib/ntdb/test/run-04-basichash.c297
-rw-r--r--lib/ntdb/test/run-15-append.c4
-rw-r--r--lib/ntdb/test/run-20-growhash.c137
-rw-r--r--lib/ntdb/test/run-25-hashoverload.c57
-rw-r--r--lib/ntdb/test/run-30-exhaust-before-expand.c2
-rw-r--r--lib/ntdb/test/run-35-convert.c4
-rw-r--r--lib/ntdb/test/run-50-multiple-freelists.c10
-rw-r--r--lib/ntdb/test/run-64-bit-tdb.c7
-rw-r--r--lib/ntdb/test/run-90-get-set-attributes.c2
-rw-r--r--lib/ntdb/test/run-capabilities.c4
-rw-r--r--lib/ntdb/test/run-expand-in-transaction.c3
-rw-r--r--lib/ntdb/test/run-traverse.c2
-rw-r--r--lib/ntdb/tools/speed.c2
-rw-r--r--lib/ntdb/traverse.c27
-rw-r--r--lib/ntdb/wscript1
36 files changed, 1096 insertions, 1662 deletions
diff --git a/lib/ntdb/check.c b/lib/ntdb/check.c
index be27003a51..2790c68eaf 100644
--- a/lib/ntdb/check.c
+++ b/lib/ntdb/check.c
@@ -38,8 +38,10 @@ static bool append(struct ntdb_context *ntdb,
return true;
}
-static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, ntdb_off_t *recovery,
- uint64_t *features, size_t *num_capabilities)
+static enum NTDB_ERROR check_header(struct ntdb_context *ntdb,
+ ntdb_off_t *recovery,
+ uint64_t *features,
+ size_t *num_capabilities)
{
uint64_t hash_test;
struct ntdb_header hdr;
@@ -112,374 +114,227 @@ static enum NTDB_ERROR check_header(struct ntdb_context *ntdb, ntdb_off_t *recov
return NTDB_SUCCESS;
}
-static enum NTDB_ERROR check_hash_tree(struct ntdb_context *ntdb,
- ntdb_off_t off, unsigned int group_bits,
- uint64_t hprefix,
- unsigned hprefix_bits,
- ntdb_off_t used[],
- size_t num_used,
- size_t *num_found,
- enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA, void *),
- void *data);
+static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b)
+{
+ /* Can overflow an int. */
+ return *a > *b ? 1
+ : *a < *b ? -1
+ : 0;
+}
-static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb,
- ntdb_off_t off,
- uint64_t hash,
- ntdb_off_t used[],
- size_t num_used,
- size_t *num_found,
- enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA,
- void *),
- void *data)
+static enum NTDB_ERROR check_entry(struct ntdb_context *ntdb,
+ ntdb_off_t off_and_hash,
+ ntdb_len_t bucket,
+ ntdb_off_t used[],
+ size_t num_used,
+ size_t *num_found,
+ enum NTDB_ERROR (*check)(NTDB_DATA,
+ NTDB_DATA,
+ void *),
+ void *data)
{
- struct ntdb_used_record rec;
enum NTDB_ERROR ecode;
-
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
+ const struct ntdb_used_record *r;
+ const unsigned char *kptr;
+ ntdb_len_t klen, dlen;
+ uint32_t hash;
+ ntdb_off_t off = off_and_hash & NTDB_OFF_MASK;
+ ntdb_off_t *p;
+
+ /* Empty bucket is fine. */
+ if (!off_and_hash) {
+ return NTDB_SUCCESS;
}
- if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) {
+ /* This can't point to a chain, we handled those at toplevel. */
+ if (off_and_hash & (1ULL << NTDB_OFF_CHAIN_BIT)) {
return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash chain magic %llu",
- (long long)rec_magic(&rec));
+ "ntdb_check: Invalid chain bit in offset "
+ " %llu", (long long)off_and_hash);
}
- if (rec_data_length(&rec) != sizeof(struct ntdb_chain)) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check:"
- " Bad hash chain length %llu vs %zu",
- (long long)rec_data_length(&rec),
- sizeof(struct ntdb_chain));
- }
- if (rec_key_length(&rec) != 0) {
+ p = asearch(&off, used, num_used, off_cmp);
+ if (!p) {
return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash chain key length %llu",
- (long long)rec_key_length(&rec));
- }
- if (rec_hash(&rec) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash chain hash value %llu",
- (long long)rec_hash(&rec));
+ "ntdb_check: Invalid offset"
+ " %llu in hash", (long long)off);
}
+ /* Mark it invalid. */
+ *p ^= 1;
+ (*num_found)++;
- off += sizeof(rec);
- ecode = check_hash_tree(ntdb, off, 0, hash, 64,
- used, num_used, num_found, check, data);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
+ r = ntdb_access_read(ntdb, off, sizeof(*r), true);
+ if (NTDB_PTR_IS_ERR(r)) {
+ return NTDB_PTR_ERR(r);
+ }
+ klen = rec_key_length(r);
+ dlen = rec_data_length(r);
+ ntdb_access_release(ntdb, r);
+
+ kptr = ntdb_access_read(ntdb, off + sizeof(*r), klen + dlen, false);
+ if (NTDB_PTR_IS_ERR(kptr)) {
+ return NTDB_PTR_ERR(kptr);
+ }
+
+ hash = ntdb_hash(ntdb, kptr, klen);
+
+ /* Are we in the right chain? */
+ if (bits_from(hash, 0, ntdb->hash_bits) != bucket) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: Bad bucket %u vs %llu",
+ bits_from(hash, 0, ntdb->hash_bits),
+ (long long)bucket);
+ /* Next 8 bits should be the same as top bits of bucket. */
+ } else if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
+ != bits_from(off_and_hash, 64-NTDB_OFF_UPPER_STEAL,
+ NTDB_OFF_UPPER_STEAL)) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash bits %llu vs %llu",
+ (long long)off_and_hash,
+ (long long)hash);
+ } else if (check) {
+ NTDB_DATA k, d;
+
+ k = ntdb_mkdata(kptr, klen);
+ d = ntdb_mkdata(kptr + klen, dlen);
+ ecode = check(k, d, data);
+ } else {
+ ecode = NTDB_SUCCESS;
}
+ ntdb_access_release(ntdb, kptr);
- off = ntdb_read_off(ntdb, off + offsetof(struct ntdb_chain, next));
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- }
- if (off == 0)
- return NTDB_SUCCESS;
- (*num_found)++;
- return check_hash_chain(ntdb, off, hash, used, num_used, num_found,
- check, data);
+ return ecode;
}
-static enum NTDB_ERROR check_hash_record(struct ntdb_context *ntdb,
+static enum NTDB_ERROR check_hash_chain(struct ntdb_context *ntdb,
ntdb_off_t off,
- uint64_t hprefix,
- unsigned hprefix_bits,
+ ntdb_len_t bucket,
ntdb_off_t used[],
size_t num_used,
size_t *num_found,
enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA,
- void *),
+ NTDB_DATA,
+ void *),
void *data)
{
struct ntdb_used_record rec;
enum NTDB_ERROR ecode;
+ const ntdb_off_t *entries;
+ ntdb_len_t i, num;
- if (hprefix_bits >= 64)
- return check_hash_chain(ntdb, off, hprefix, used, num_used,
- num_found, check, data);
+ /* This is a used entry. */
+ (*num_found)++;
ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
if (ecode != NTDB_SUCCESS) {
return ecode;
}
- if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) {
+ if (rec_magic(&rec) != NTDB_CHAIN_MAGIC) {
return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash table magic %llu",
+ "ntdb_check: Bad hash chain magic %llu",
(long long)rec_magic(&rec));
}
- if (rec_data_length(&rec)
- != sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS) {
+
+ if (rec_data_length(&rec) % sizeof(ntdb_off_t)) {
return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check:"
- " Bad hash table length %llu vs %llu",
- (long long)rec_data_length(&rec),
- (long long)sizeof(ntdb_off_t)
- << NTDB_SUBLEVEL_HASH_BITS);
+ "ntdb_check: Bad hash chain data length %llu",
+ (long long)rec_data_length(&rec));
}
+
if (rec_key_length(&rec) != 0) {
return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash table key length %llu",
+ "ntdb_check: Bad hash chain key length %llu",
(long long)rec_key_length(&rec));
}
- if (rec_hash(&rec) != 0) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Bad hash table hash value %llu",
- (long long)rec_hash(&rec));
- }
off += sizeof(rec);
- return check_hash_tree(ntdb, off,
- NTDB_SUBLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS,
- hprefix, hprefix_bits,
- used, num_used, num_found, check, data);
-}
-
-static int off_cmp(const ntdb_off_t *a, const ntdb_off_t *b)
-{
- /* Can overflow an int. */
- return *a > *b ? 1
- : *a < *b ? -1
- : 0;
-}
+ num = rec_data_length(&rec) / sizeof(ntdb_off_t);
+ entries = ntdb_access_read(ntdb, off, rec_data_length(&rec), true);
+ if (NTDB_PTR_IS_ERR(entries)) {
+ return NTDB_PTR_ERR(entries);
+ }
-static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
-{
- *used += num;
+ /* Check each non-deleted entry in chain. */
+ for (i = 0; i < num; i++) {
+ ecode = check_entry(ntdb, entries[i], bucket,
+ used, num_used, num_found, check, data);
+ if (ecode) {
+ break;
+ }
+ }
- return (h >> (64 - *used)) & ((1U << num) - 1);
+ ntdb_access_release(ntdb, entries);
+ return ecode;
}
-static enum NTDB_ERROR check_hash_tree(struct ntdb_context *ntdb,
- ntdb_off_t off, unsigned int group_bits,
- uint64_t hprefix,
- unsigned hprefix_bits,
- ntdb_off_t used[],
- size_t num_used,
- size_t *num_found,
- enum NTDB_ERROR (*check)(NTDB_DATA,
- NTDB_DATA, void *),
- void *data)
+static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb,
+ ntdb_off_t used[],
+ size_t num_used,
+ size_t num_other_used,
+ enum NTDB_ERROR (*check)(NTDB_DATA,
+ NTDB_DATA,
+ void *),
+ void *data)
{
- unsigned int g, b;
- const ntdb_off_t *hash;
- struct ntdb_used_record rec;
enum NTDB_ERROR ecode;
+ struct ntdb_used_record rec;
+ const ntdb_off_t *entries;
+ ntdb_len_t i;
+ /* Free tables and capabilities also show up as used, as do we. */
+ size_t num_found = num_other_used + 1;
- hash = ntdb_access_read(ntdb, off,
- sizeof(ntdb_off_t)
- << (group_bits + NTDB_HASH_GROUP_BITS),
- true);
- if (NTDB_PTR_IS_ERR(hash)) {
- return NTDB_PTR_ERR(hash);
- }
-
- for (g = 0; g < (1 << group_bits); g++) {
- const ntdb_off_t *group = hash + (g << NTDB_HASH_GROUP_BITS);
- for (b = 0; b < (1 << NTDB_HASH_GROUP_BITS); b++) {
- unsigned int bucket, i, used_bits;
- uint64_t h;
- ntdb_off_t *p;
- if (group[b] == 0)
- continue;
-
- off = group[b] & NTDB_OFF_MASK;
- p = asearch(&off, used, num_used, off_cmp);
- if (!p) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: Invalid offset"
- " %llu in hash",
- (long long)off);
- goto fail;
- }
- /* Mark it invalid. */
- *p ^= 1;
- (*num_found)++;
-
- if (hprefix_bits == 64) {
- /* Chained entries are unordered. */
- if (is_subhash(group[b])) {
- ecode = NTDB_ERR_CORRUPT;
- ntdb_logerr(ntdb, ecode,
- NTDB_LOG_ERROR,
- "ntdb_check: Invalid chain"
- " entry subhash");
- goto fail;
- }
- h = hash_record(ntdb, off);
- if (h != hprefix) {
- ecode = NTDB_ERR_CORRUPT;
- ntdb_logerr(ntdb, ecode,
- NTDB_LOG_ERROR,
- "check: bad hash chain"
- " placement"
- " 0x%llx vs 0x%llx",
- (long long)h,
- (long long)hprefix);
- goto fail;
- }
- ecode = ntdb_read_convert(ntdb, off, &rec,
- sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- goto check;
- }
-
- if (is_subhash(group[b])) {
- uint64_t subprefix;
- subprefix = (hprefix
- << (group_bits + NTDB_HASH_GROUP_BITS))
- + g * (1 << NTDB_HASH_GROUP_BITS) + b;
-
- ecode = check_hash_record(ntdb,
- group[b] & NTDB_OFF_MASK,
- subprefix,
- hprefix_bits
- + group_bits
- + NTDB_HASH_GROUP_BITS,
- used, num_used, num_found,
- check, data);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- continue;
- }
- /* A normal entry */
-
- /* Does it belong here at all? */
- h = hash_record(ntdb, off);
- used_bits = 0;
- if (get_bits(h, hprefix_bits, &used_bits) != hprefix
- && hprefix_bits) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "check: bad hash placement"
- " 0x%llx vs 0x%llx",
- (long long)h,
- (long long)hprefix);
- goto fail;
- }
-
- /* Does it belong in this group? */
- if (get_bits(h, group_bits, &used_bits) != g) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "check: bad group %llu"
- " vs %u",
- (long long)h, g);
- goto fail;
- }
-
- /* Are bucket bits correct? */
- bucket = group[b] & NTDB_OFF_HASH_GROUP_MASK;
- if (get_bits(h, NTDB_HASH_GROUP_BITS, &used_bits)
- != bucket) {
- used_bits -= NTDB_HASH_GROUP_BITS;
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "check: bad bucket %u vs %u",
- (unsigned)get_bits(h,
- NTDB_HASH_GROUP_BITS,
- &used_bits),
- bucket);
- goto fail;
- }
+ ecode = ntdb_read_convert(ntdb, NTDB_HASH_OFFSET, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
- /* There must not be any zero entries between
- * the bucket it belongs in and this one! */
- for (i = bucket;
- i != b;
- i = (i + 1) % (1 << NTDB_HASH_GROUP_BITS)) {
- if (group[i] == 0) {
- ecode = NTDB_ERR_CORRUPT;
- ntdb_logerr(ntdb, ecode,
- NTDB_LOG_ERROR,
- "check: bad group placement"
- " %u vs %u",
- b, bucket);
- goto fail;
- }
- }
+ if (rec_magic(&rec) != NTDB_HTABLE_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash table magic %llu",
+ (long long)rec_magic(&rec));
+ }
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
+ if (rec_data_length(&rec) != (sizeof(ntdb_off_t) << ntdb->hash_bits)) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash table data length %llu",
+ (long long)rec_data_length(&rec));
+ }
- /* Bottom bits must match header. */
- if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "ntdb_check: Bad hash magic"
- " at offset %llu"
- " (0x%llx vs 0x%llx)",
- (long long)off,
- (long long)h,
- (long long)rec_hash(&rec));
- goto fail;
- }
+ if (rec_key_length(&rec) != 0) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Bad hash table key length %llu",
+ (long long)rec_key_length(&rec));
+ }
- check:
- if (check) {
- NTDB_DATA k, d;
- const unsigned char *kptr;
-
- kptr = ntdb_access_read(ntdb,
- off + sizeof(rec),
- rec_key_length(&rec)
- + rec_data_length(&rec),
- false);
- if (NTDB_PTR_IS_ERR(kptr)) {
- ecode = NTDB_PTR_ERR(kptr);
- goto fail;
- }
+ entries = ntdb_access_read(ntdb, NTDB_HASH_OFFSET + sizeof(rec),
+ rec_data_length(&rec), true);
+ if (NTDB_PTR_IS_ERR(entries)) {
+ return NTDB_PTR_ERR(entries);
+ }
- k = ntdb_mkdata(kptr, rec_key_length(&rec));
- d = ntdb_mkdata(kptr + k.dsize,
- rec_data_length(&rec));
- ecode = check(k, d, data);
- ntdb_access_release(ntdb, kptr);
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- }
+ for (i = 0; i < (1 << ntdb->hash_bits); i++) {
+ ntdb_off_t off = entries[i] & NTDB_OFF_MASK;
+ if (entries[i] & (1ULL << NTDB_OFF_CHAIN_BIT)) {
+ ecode = check_hash_chain(ntdb, off, i,
+ used, num_used, &num_found,
+ check, data);
+ } else {
+ ecode = check_entry(ntdb, entries[i], i,
+ used, num_used, &num_found,
+ check, data);
+ }
+ if (ecode) {
+ break;
}
}
- ntdb_access_release(ntdb, hash);
- return NTDB_SUCCESS;
-
-fail:
- ntdb_access_release(ntdb, hash);
- return ecode;
-}
+ ntdb_access_release(ntdb, entries);
-static enum NTDB_ERROR check_hash(struct ntdb_context *ntdb,
- ntdb_off_t used[],
- size_t num_used, size_t num_other_used,
- enum NTDB_ERROR (*check)(NTDB_DATA, NTDB_DATA, void *),
- void *data)
-{
- /* Free tables and capabilities also show up as used. */
- size_t num_found = num_other_used;
- enum NTDB_ERROR ecode;
-
- ecode = check_hash_tree(ntdb, offsetof(struct ntdb_header, hashtable),
- NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS,
- 0, 0, used, num_used, &num_found,
- check, data);
- if (ecode == NTDB_SUCCESS) {
- if (num_found != num_used) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
- "ntdb_check: Not all entries"
- " are in hash");
- }
+ if (ecode == NTDB_SUCCESS && num_found != num_used) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
+ "ntdb_check: Not all entries are in hash");
}
return ecode;
}
@@ -547,8 +402,7 @@ static enum NTDB_ERROR check_free_table(struct ntdb_context *ntdb,
if (rec_magic(&ft.hdr) != NTDB_FTABLE_MAGIC
|| rec_key_length(&ft.hdr) != 0
- || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
- || rec_hash(&ft.hdr) != 0) {
+ || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)) {
return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT, NTDB_LOG_ERROR,
"ntdb_check: Invalid header on free table");
}
diff --git a/lib/ntdb/free.c b/lib/ntdb/free.c
index 3e31937382..971436f5a3 100644
--- a/lib/ntdb/free.c
+++ b/lib/ntdb/free.c
@@ -19,7 +19,6 @@
#include <ccan/likely/likely.h>
#include <ccan/ilog/ilog.h>
#include <time.h>
-#include <assert.h>
#include <limits.h>
static unsigned fls64(uint64_t val)
@@ -646,8 +645,7 @@ static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb,
ntdb_off_t bucket,
size_t keylen, size_t datalen,
bool want_extra,
- unsigned magic,
- unsigned hashlow)
+ unsigned magic)
{
ntdb_off_t off, b_off,best_off;
struct ntdb_free_record best = { 0 };
@@ -741,7 +739,7 @@ static ntdb_off_t lock_and_alloc(struct ntdb_context *ntdb,
/* We need to mark non-free before we drop lock, otherwise
* coalesce() could try to merge it! */
ecode = set_header(ntdb, &rec, magic, keylen, datalen,
- frec_len(&best) - leftover, hashlow);
+ frec_len(&best) - leftover);
if (ecode != NTDB_SUCCESS) {
goto unlock_err;
}
@@ -788,7 +786,7 @@ unlock_err:
/* Get a free block from current free list, or 0 if none, -ve on error. */
static ntdb_off_t get_free(struct ntdb_context *ntdb,
size_t keylen, size_t datalen, bool want_extra,
- unsigned magic, unsigned hashlow)
+ unsigned magic)
{
ntdb_off_t off, ftable_off;
ntdb_off_t start_b, b, ftable;
@@ -811,7 +809,7 @@ static ntdb_off_t get_free(struct ntdb_context *ntdb,
/* Try getting one from list. */
off = lock_and_alloc(ntdb, ftable_off,
b, keylen, datalen, want_extra,
- magic, hashlow);
+ magic);
if (NTDB_OFF_IS_ERR(off))
return off;
if (off != 0) {
@@ -854,13 +852,11 @@ static ntdb_off_t get_free(struct ntdb_context *ntdb,
enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
struct ntdb_used_record *rec,
unsigned magic, uint64_t keylen, uint64_t datalen,
- uint64_t actuallen, unsigned hashlow)
+ uint64_t actuallen)
{
uint64_t keybits = (fls64(keylen) + 1) / 2;
- /* Use bottom bits of hash, so it's independent of hash table size. */
- rec->magic_and_meta = (hashlow & ((1 << 11)-1))
- | ((actuallen - (keylen + datalen)) << 11)
+ rec->magic_and_meta = ((actuallen - (keylen + datalen)) << 11)
| (keybits << 43)
| ((uint64_t)magic << 48);
rec->key_and_data_len = (keylen | (datalen << (keybits*2)));
@@ -958,7 +954,7 @@ static enum NTDB_ERROR ntdb_expand(struct ntdb_context *ntdb, ntdb_len_t size)
/* This won't fail: it will expand the database if it has to. */
ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
- uint64_t hash, unsigned magic, bool growing)
+ unsigned magic, bool growing)
{
ntdb_off_t off;
@@ -967,7 +963,7 @@ ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
for (;;) {
enum NTDB_ERROR ecode;
- off = get_free(ntdb, keylen, datalen, growing, magic, hash);
+ off = get_free(ntdb, keylen, datalen, growing, magic);
if (likely(off != 0))
break;
diff --git a/lib/ntdb/hash.c b/lib/ntdb/hash.c
index e87705b123..69192a119b 100644
--- a/lib/ntdb/hash.c
+++ b/lib/ntdb/hash.c
@@ -17,68 +17,23 @@
*/
#include "private.h"
#include <ccan/hash/hash.h>
-#include <assert.h>
/* Default hash function. */
-uint64_t ntdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
+uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
void *unused)
{
- uint64_t ret;
- /* hash64_stable assumes lower bits are more important; they are a
- * slightly better hash. We use the upper bits first, so swap them. */
- ret = hash64_stable((const unsigned char *)key, length, seed);
- return (ret >> 32) | (ret << 32);
+ return hash_stable((const unsigned char *)key, length, seed);
}
-uint64_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len)
+uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len)
{
return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data);
}
-uint64_t hash_record(struct ntdb_context *ntdb, ntdb_off_t off)
-{
- const struct ntdb_used_record *r;
- const void *key;
- uint64_t klen, hash;
-
- r = ntdb_access_read(ntdb, off, sizeof(*r), true);
- if (NTDB_PTR_IS_ERR(r)) {
- /* FIXME */
- return 0;
- }
-
- klen = rec_key_length(r);
- ntdb_access_release(ntdb, r);
-
- key = ntdb_access_read(ntdb, off + sizeof(*r), klen, false);
- if (NTDB_PTR_IS_ERR(key)) {
- return 0;
- }
-
- hash = ntdb_hash(ntdb, key, klen);
- ntdb_access_release(ntdb, key);
- return hash;
-}
-
-/* Get bits from a value. */
-static uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
-{
- assert(num <= 32);
- return (val >> start) & ((1U << num) - 1);
-}
-
-/* We take bits from the top: that way we can lock whole sections of the hash
- * by using lock ranges. */
-static uint32_t use_bits(struct hash_info *h, unsigned num)
-{
- h->hash_used += num;
- return bits_from(h->h, 64 - h->hash_used, num);
-}
-
static ntdb_bool_err key_matches(struct ntdb_context *ntdb,
- const struct ntdb_used_record *rec,
- ntdb_off_t off,
- const NTDB_DATA *key)
+ const struct ntdb_used_record *rec,
+ ntdb_off_t off,
+ const NTDB_DATA *key)
{
ntdb_bool_err ret = false;
const char *rkey;
@@ -102,792 +57,577 @@ static ntdb_bool_err key_matches(struct ntdb_context *ntdb,
/* Does entry match? */
static ntdb_bool_err match(struct ntdb_context *ntdb,
- struct hash_info *h,
- const NTDB_DATA *key,
- ntdb_off_t val,
- struct ntdb_used_record *rec)
+ uint32_t hash,
+ const NTDB_DATA *key,
+ ntdb_off_t val,
+ struct ntdb_used_record *rec,
+ const ntdb_off_t **mapped)
{
ntdb_off_t off;
enum NTDB_ERROR ecode;
ntdb->stats.compares++;
- /* Desired bucket must match. */
- if (h->home_bucket != (val & NTDB_OFF_HASH_GROUP_MASK)) {
- ntdb->stats.compare_wrong_bucket++;
- return false;
- }
/* Top bits of offset == next bits of hash. */
- if (bits_from(val, NTDB_OFF_HASH_EXTRA_BIT, NTDB_OFF_UPPER_STEAL_EXTRA)
- != bits_from(h->h, 64 - h->hash_used - NTDB_OFF_UPPER_STEAL_EXTRA,
- NTDB_OFF_UPPER_STEAL_EXTRA)) {
+ if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
+ != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) {
ntdb->stats.compare_wrong_offsetbits++;
return false;
}
+ /* Unmap before we try to read actual record, which may cause expand */
+ if (mapped) {
+ ntdb_access_release(ntdb, *mapped);
+ *mapped = NULL;
+ }
+
off = val & NTDB_OFF_MASK;
ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec));
if (ecode != NTDB_SUCCESS) {
return (ntdb_bool_err)ecode;
}
- if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) {
- ntdb->stats.compare_wrong_rechash++;
- return false;
- }
-
return key_matches(ntdb, rec, off, key);
}
-static ntdb_off_t hbucket_off(ntdb_off_t group_start, unsigned bucket)
-{
- return group_start
- + (bucket % (1 << NTDB_HASH_GROUP_BITS)) * sizeof(ntdb_off_t);
-}
-
-bool is_subhash(ntdb_off_t val)
+static bool is_chain(ntdb_off_t val)
{
- return (val >> NTDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1;
+ return val & (1ULL << NTDB_OFF_CHAIN_BIT);
}
-/* FIXME: Guess the depth, don't over-lock! */
-static ntdb_off_t hlock_range(ntdb_off_t group, ntdb_off_t *size)
+static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx)
{
- *size = 1ULL << (64 - (NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS));
- return group << (64 - (NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS));
-}
-
-static ntdb_off_t COLD find_in_chain(struct ntdb_context *ntdb,
- NTDB_DATA key,
- ntdb_off_t chain,
- struct hash_info *h,
- struct ntdb_used_record *rec,
- struct traverse_info *tinfo)
-{
- ntdb_off_t off, next;
- enum NTDB_ERROR ecode;
-
- /* In case nothing is free, we set these to zero. */
- h->home_bucket = h->found_bucket = 0;
-
- for (off = chain; off; off = next) {
- unsigned int i;
-
- h->group_start = off;
- ecode = ntdb_read_convert(ntdb, off, h->group, sizeof(h->group));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
- ntdb_off_t recoff;
- if (!h->group[i]) {
- /* Remember this empty bucket. */
- h->home_bucket = h->found_bucket = i;
- continue;
- }
-
- /* We can insert extra bits via add_to_hash
- * empty bucket logic. */
- recoff = h->group[i] & NTDB_OFF_MASK;
- ecode = ntdb_read_convert(ntdb, recoff, rec,
- sizeof(*rec));
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
-
- ecode = NTDB_OFF_TO_ERR(key_matches(ntdb, rec, recoff,
- &key));
- if (ecode < 0) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- if (ecode == (enum NTDB_ERROR)1) {
- h->home_bucket = h->found_bucket = i;
-
- if (tinfo) {
- tinfo->levels[tinfo->num_levels]
- .hashtable = off;
- tinfo->levels[tinfo->num_levels]
- .total_buckets
- = 1 << NTDB_HASH_GROUP_BITS;
- tinfo->levels[tinfo->num_levels].entry
- = i;
- tinfo->num_levels++;
- }
- return recoff;
- }
- }
- next = ntdb_read_off(ntdb, off
- + offsetof(struct ntdb_chain, next));
- if (NTDB_OFF_IS_ERR(next)) {
- return next;
- }
- if (next)
- next += sizeof(struct ntdb_used_record);
- }
- return 0;
+ return base + sizeof(struct ntdb_used_record)
+ + idx * sizeof(ntdb_off_t);
}
/* This is the core routine which searches the hashtable for an entry.
* On error, no locks are held and -ve is returned.
- * Otherwise, hinfo is filled in (and the optional tinfo).
+ * Otherwise, hinfo is filled in.
* If not found, the return value is 0.
* If found, the return value is the offset, and *rec is the record. */
ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
NTDB_DATA key,
int ltype,
struct hash_info *h,
- struct ntdb_used_record *rec,
- struct traverse_info *tinfo)
+ struct ntdb_used_record *rec)
{
- uint32_t i, group;
- ntdb_off_t hashtable;
+ ntdb_off_t off, val;
+ const ntdb_off_t *arr = NULL;
+ ntdb_len_t i;
+ bool found_empty;
enum NTDB_ERROR ecode;
+ struct ntdb_used_record chdr;
+ ntdb_bool_err berr;
h->h = ntdb_hash(ntdb, key.dptr, key.dsize);
- h->hash_used = 0;
- group = use_bits(h, NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS);
- h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS);
- h->hlock_start = hlock_range(group, &h->hlock_range);
- ecode = ntdb_lock_hashes(ntdb, h->hlock_start, h->hlock_range, ltype,
- NTDB_LOCK_WAIT);
+ h->table = NTDB_HASH_OFFSET;
+ h->table_size = 1 << ntdb->hash_bits;
+ h->bucket = bits_from(h->h, 0, ntdb->hash_bits);
+ h->old_val = 0;
+
+ ecode = ntdb_lock_hash(ntdb, h->bucket, ltype);
if (ecode != NTDB_SUCCESS) {
return NTDB_ERR_TO_OFF(ecode);
}
- hashtable = offsetof(struct ntdb_header, hashtable);
- if (tinfo) {
- tinfo->toplevel_group = group;
- tinfo->num_levels = 1;
- tinfo->levels[0].entry = 0;
- tinfo->levels[0].hashtable = hashtable
- + (group << NTDB_HASH_GROUP_BITS) * sizeof(ntdb_off_t);
- tinfo->levels[0].total_buckets = 1 << NTDB_HASH_GROUP_BITS;
+ off = hbucket_off(h->table, h->bucket);
+ val = ntdb_read_off(ntdb, off);
+ if (NTDB_OFF_IS_ERR(val)) {
+ ecode = NTDB_OFF_TO_ERR(val);
+ goto fail;
}
- while (h->hash_used <= 64) {
- /* Read in the hash group. */
- h->group_start = hashtable
- + group * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
-
- ecode = ntdb_read_convert(ntdb, h->group_start, &h->group,
- sizeof(h->group));
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
-
- /* Pointer to another hash table? Go down... */
- if (is_subhash(h->group[h->home_bucket])) {
- hashtable = (h->group[h->home_bucket] & NTDB_OFF_MASK)
- + sizeof(struct ntdb_used_record);
- if (tinfo) {
- /* When we come back, use *next* bucket */
- tinfo->levels[tinfo->num_levels-1].entry
- += h->home_bucket + 1;
+ /* Directly in hash table? */
+ if (!is_chain(val)) {
+ if (val) {
+ berr = match(ntdb, h->h, &key, val, rec, NULL);
+ if (berr < 0) {
+ ecode = NTDB_OFF_TO_ERR(berr);
+ goto fail;
}
- group = use_bits(h, NTDB_SUBLEVEL_HASH_BITS
- - NTDB_HASH_GROUP_BITS);
- h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS);
- if (tinfo) {
- tinfo->levels[tinfo->num_levels].hashtable
- = hashtable;
- tinfo->levels[tinfo->num_levels].total_buckets
- = 1 << NTDB_SUBLEVEL_HASH_BITS;
- tinfo->levels[tinfo->num_levels].entry
- = group << NTDB_HASH_GROUP_BITS;
- tinfo->num_levels++;
+ if (berr) {
+ return val & NTDB_OFF_MASK;
}
- continue;
+ /* If you want to insert here, make a chain. */
+ h->old_val = val;
}
+ return 0;
+ }
- /* It's in this group: search (until 0 or all searched) */
- for (i = 0, h->found_bucket = h->home_bucket;
- i < (1 << NTDB_HASH_GROUP_BITS);
- i++, h->found_bucket = ((h->found_bucket+1)
- % (1 << NTDB_HASH_GROUP_BITS))) {
- ntdb_bool_err berr;
- if (is_subhash(h->group[h->found_bucket]))
- continue;
+ /* Nope? Iterate through chain. */
+ h->table = val & NTDB_OFF_MASK;
- if (!h->group[h->found_bucket])
- break;
+ ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ goto fail;
+ }
+
+ if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "find_and_lock:"
+ " corrupt record %#x at %llu",
+ rec_magic(&chdr), (long long)off);
+ goto fail;
+ }
- berr = match(ntdb, h, &key, h->group[h->found_bucket],
- rec);
+ h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
+
+ found_empty = false;
+ for (i = 0; i < h->table_size; i++) {
+ /* Careful! match has to unmap this if we access a
+ * record (may cause mmap of database to move. */
+ if (!arr) {
+ arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
+ rec_data_length(&chdr), true);
+ if (NTDB_PTR_IS_ERR(arr)) {
+ ecode = NTDB_PTR_ERR(arr);
+ goto fail;
+ }
+ }
+
+ val = arr[i];
+ if (val == 0) {
+ if (!found_empty) {
+ h->bucket = i;
+ found_empty = true;
+ }
+ } else {
+ berr = match(ntdb, h->h, &key, val, rec, &arr);
if (berr < 0) {
ecode = NTDB_OFF_TO_ERR(berr);
+ if (arr) {
+ ntdb_access_release(ntdb, arr);
+ }
goto fail;
}
if (berr) {
- if (tinfo) {
- tinfo->levels[tinfo->num_levels-1].entry
- += h->found_bucket;
+ /* We found it! */
+ h->bucket = i;
+ off = val & NTDB_OFF_MASK;
+ if (arr) {
+ ntdb_access_release(ntdb, arr);
}
- return h->group[h->found_bucket] & NTDB_OFF_MASK;
+ return off;
}
}
- /* Didn't find it: h indicates where it would go. */
- return 0;
+ }
+ if (!found_empty) {
+ /* Set to any non-zero value */
+ h->old_val = 1;
+ h->bucket = i;
}
- return find_in_chain(ntdb, key, hashtable, h, rec, tinfo);
+ if (arr) {
+ ntdb_access_release(ntdb, arr);
+ }
+ return 0;
fail:
- ntdb_unlock_hashes(ntdb, h->hlock_start, h->hlock_range, ltype);
+ ntdb_unlock_hash(ntdb, h->bucket, ltype);
return NTDB_ERR_TO_OFF(ecode);
}
-/* I wrote a simple test, expanding a hash to 2GB, for the following
- * cases:
- * 1) Expanding all the buckets at once,
- * 2) Expanding the bucket we wanted to place the new entry into.
- * 3) Expanding the most-populated bucket,
- *
- * I measured the worst/average/best density during this process.
- * 1) 3%/16%/30%
- * 2) 4%/20%/38%
- * 3) 6%/22%/41%
- *
- * So we figure out the busiest bucket for the moment.
- */
-static unsigned fullest_bucket(struct ntdb_context *ntdb,
- const ntdb_off_t *group,
- unsigned new_bucket)
-{
- unsigned counts[1 << NTDB_HASH_GROUP_BITS] = { 0 };
- unsigned int i, best_bucket;
-
- /* Count the new entry. */
- counts[new_bucket]++;
- best_bucket = new_bucket;
-
- for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
- unsigned this_bucket;
-
- if (is_subhash(group[i]))
- continue;
- this_bucket = group[i] & NTDB_OFF_HASH_GROUP_MASK;
- if (++counts[this_bucket] > counts[best_bucket])
- best_bucket = this_bucket;
- }
-
- return best_bucket;
-}
-
-static bool put_into_group(ntdb_off_t *group,
- unsigned bucket, ntdb_off_t encoded)
+static ntdb_off_t encode_offset(const struct ntdb_context *ntdb,
+ ntdb_off_t new_off, uint32_t hash)
{
- unsigned int i;
+ ntdb_off_t extra;
- for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
- unsigned b = (bucket + i) % (1 << NTDB_HASH_GROUP_BITS);
+ assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0);
+ assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0);
+ /* We pack extra hash bits into the upper bits of the offset. */
+ extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL);
+ extra <<= (64 - NTDB_OFF_UPPER_STEAL);
- if (group[b] == 0) {
- group[b] = encoded;
- return true;
- }
- }
- return false;
+ return new_off | extra;
}
-static void force_into_group(ntdb_off_t *group,
- unsigned bucket, ntdb_off_t encoded)
+/* Simply overwrite the hash entry we found before. */
+enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
+ ntdb_off_t new_off)
{
- if (!put_into_group(group, bucket, encoded))
- abort();
+ return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket),
+ encode_offset(ntdb, new_off, h->h));
}
-static ntdb_off_t encode_offset(ntdb_off_t new_off, struct hash_info *h)
+enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h)
{
- return h->home_bucket
- | new_off
- | ((uint64_t)bits_from(h->h,
- 64 - h->hash_used - NTDB_OFF_UPPER_STEAL_EXTRA,
- NTDB_OFF_UPPER_STEAL_EXTRA)
- << NTDB_OFF_HASH_EXTRA_BIT);
+ return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0);
}
-/* Simply overwrite the hash entry we found before. */
-enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- ntdb_off_t new_off)
-{
- return ntdb_write_off(ntdb, hbucket_off(h->group_start, h->found_bucket),
- encode_offset(new_off, h));
-}
-/* We slot in anywhere that's empty in the chain. */
-static enum NTDB_ERROR COLD add_to_chain(struct ntdb_context *ntdb,
- ntdb_off_t subhash,
- ntdb_off_t new_off)
+enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
+ ntdb_off_t new_off)
{
- ntdb_off_t entry;
enum NTDB_ERROR ecode;
+ ntdb_off_t chain;
+ struct ntdb_used_record chdr;
+ const ntdb_off_t *old;
+ ntdb_off_t *new;
- entry = ntdb_find_zero_off(ntdb, subhash, 1<<NTDB_HASH_GROUP_BITS);
- if (NTDB_OFF_IS_ERR(entry)) {
- return NTDB_OFF_TO_ERR(entry);
+ /* We hit an empty bucket during search? That's where it goes. */
+ if (!h->old_val) {
+ return replace_in_hash(ntdb, h, new_off);
}
- if (entry == 1 << NTDB_HASH_GROUP_BITS) {
- ntdb_off_t next;
+ /* Full at top-level? Create a 2-element chain. */
+ if (h->table == NTDB_HASH_OFFSET) {
+ ntdb_off_t pair[2];
- next = ntdb_read_off(ntdb, subhash
- + offsetof(struct ntdb_chain, next));
- if (NTDB_OFF_IS_ERR(next)) {
- return NTDB_OFF_TO_ERR(next);
- }
+ /* One element is old value, the other is the new value. */
+ pair[0] = h->old_val;
+ pair[1] = encode_offset(ntdb, new_off, h->h);
- if (!next) {
- next = alloc(ntdb, 0, sizeof(struct ntdb_chain), 0,
- NTDB_CHAIN_MAGIC, false);
- if (NTDB_OFF_IS_ERR(next))
- return NTDB_OFF_TO_ERR(next);
- ecode = zero_out(ntdb,
- next+sizeof(struct ntdb_used_record),
- sizeof(struct ntdb_chain));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- ecode = ntdb_write_off(ntdb, subhash
- + offsetof(struct ntdb_chain,
- next),
- next);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
+ chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true);
+ if (NTDB_OFF_IS_ERR(chain)) {
+ return NTDB_OFF_TO_ERR(chain);
}
- return add_to_chain(ntdb, next, new_off);
+ ecode = ntdb_write_convert(ntdb,
+ chain
+ + sizeof(struct ntdb_used_record),
+ pair, sizeof(pair));
+ if (ecode == NTDB_SUCCESS) {
+ ecode = ntdb_write_off(ntdb,
+ hbucket_off(h->table, h->bucket),
+ chain
+ | (1ULL << NTDB_OFF_CHAIN_BIT));
+ }
+ return ecode;
}
- return ntdb_write_off(ntdb, subhash + entry * sizeof(ntdb_off_t),
- new_off);
-}
-
-/* Add into a newly created subhash. */
-static enum NTDB_ERROR add_to_subhash(struct ntdb_context *ntdb, ntdb_off_t subhash,
- unsigned hash_used, ntdb_off_t val)
-{
- ntdb_off_t off = (val & NTDB_OFF_MASK), *group;
- struct hash_info h;
- unsigned int gnum;
-
- h.hash_used = hash_used;
-
- if (hash_used + NTDB_SUBLEVEL_HASH_BITS > 64)
- return add_to_chain(ntdb, subhash, off);
-
- h.h = hash_record(ntdb, off);
- gnum = use_bits(&h, NTDB_SUBLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS);
- h.group_start = subhash
- + gnum * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
- h.home_bucket = use_bits(&h, NTDB_HASH_GROUP_BITS);
-
- group = ntdb_access_write(ntdb, h.group_start,
- sizeof(*group) << NTDB_HASH_GROUP_BITS, true);
- if (NTDB_PTR_IS_ERR(group)) {
- return NTDB_PTR_ERR(group);
+ /* Full bucket. Expand. */
+ ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
}
- force_into_group(group, h.home_bucket, encode_offset(off, &h));
- return ntdb_access_commit(ntdb, group);
-}
-static enum NTDB_ERROR expand_group(struct ntdb_context *ntdb, struct hash_info *h)
-{
- unsigned bucket, num_vals, i, magic;
- size_t subsize;
- ntdb_off_t subhash;
- ntdb_off_t vals[1 << NTDB_HASH_GROUP_BITS];
- enum NTDB_ERROR ecode;
+ if (rec_extra_padding(&chdr) >= sizeof(new_off)) {
+ /* Expand in place. */
+ uint64_t dlen = rec_data_length(&chdr);
- /* Attach new empty subhash under fullest bucket. */
- bucket = fullest_bucket(ntdb, h->group, h->home_bucket);
+ ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0,
+ dlen + sizeof(new_off),
+ dlen + rec_extra_padding(&chdr));
- if (h->hash_used == 64) {
- ntdb->stats.alloc_chain++;
- subsize = sizeof(struct ntdb_chain);
- magic = NTDB_CHAIN_MAGIC;
- } else {
- ntdb->stats.alloc_subhash++;
- subsize = (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS);
- magic = NTDB_HTABLE_MAGIC;
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ /* find_and_lock set up h to point to last bucket. */
+ ecode = replace_in_hash(ntdb, h, new_off);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
+ /* For futureproofing, we always make the first byte of padding
+ * a zero. */
+ if (rec_extra_padding(&chdr)) {
+ ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr)
+ + dlen + sizeof(new_off),
+ "", 1);
+ }
+ return ecode;
}
- subhash = alloc(ntdb, 0, subsize, 0, magic, false);
- if (NTDB_OFF_IS_ERR(subhash)) {
- return NTDB_OFF_TO_ERR(subhash);
+ /* We need to reallocate the chain. */
+ chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t),
+ NTDB_CHAIN_MAGIC, true);
+ if (NTDB_OFF_IS_ERR(chain)) {
+ return NTDB_OFF_TO_ERR(chain);
}
- ecode = zero_out(ntdb, subhash + sizeof(struct ntdb_used_record),
- subsize);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
+ /* Map both and copy across old buckets. */
+ old = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
+ h->table_size*sizeof(ntdb_off_t), true);
+ if (NTDB_PTR_IS_ERR(old)) {
+ return NTDB_PTR_ERR(old);
}
-
- /* Remove any which are destined for bucket or are in wrong place. */
- num_vals = 0;
- for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
- unsigned home_bucket = h->group[i] & NTDB_OFF_HASH_GROUP_MASK;
- if (!h->group[i] || is_subhash(h->group[i]))
- continue;
- if (home_bucket == bucket || home_bucket != i) {
- vals[num_vals++] = h->group[i];
- h->group[i] = 0;
- }
+ new = ntdb_access_write(ntdb, hbucket_off(chain, 0),
+ (h->table_size + 1)*sizeof(ntdb_off_t), true);
+ if (NTDB_PTR_IS_ERR(new)) {
+ ntdb_access_release(ntdb, old);
+ return NTDB_PTR_ERR(new);
}
- /* FIXME: This assert is valid, but we do this during unit test :( */
- /* assert(num_vals); */
-
- /* Overwrite expanded bucket with subhash pointer. */
- h->group[bucket] = subhash | (1ULL << NTDB_OFF_UPPER_STEAL_SUBHASH_BIT);
- /* Point to actual contents of record. */
- subhash += sizeof(struct ntdb_used_record);
+ memcpy(new, old, h->bucket * sizeof(ntdb_off_t));
+ new[h->bucket] = encode_offset(ntdb, new_off, h->h);
+ ntdb_access_release(ntdb, old);
- /* Put values back. */
- for (i = 0; i < num_vals; i++) {
- unsigned this_bucket = vals[i] & NTDB_OFF_HASH_GROUP_MASK;
-
- if (this_bucket == bucket) {
- ecode = add_to_subhash(ntdb, subhash, h->hash_used,
- vals[i]);
- if (ecode != NTDB_SUCCESS)
- return ecode;
- } else {
- /* There should be room to put this back. */
- force_into_group(h->group, this_bucket, vals[i]);
- }
+ ecode = ntdb_access_commit(ntdb, new);
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
}
- return NTDB_SUCCESS;
+
+ /* Free the old chain. */
+ ecode = add_free_record(ntdb, h->table,
+ sizeof(struct ntdb_used_record)
+ + rec_data_length(&chdr)
+ + rec_extra_padding(&chdr),
+ NTDB_LOCK_WAIT, true);
+
+ /* Replace top-level to point to new chain */
+ return ntdb_write_off(ntdb,
+ hbucket_off(NTDB_HASH_OFFSET,
+ bits_from(h->h, 0, ntdb->hash_bits)),
+ chain | (1ULL << NTDB_OFF_CHAIN_BIT));
}
-enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, struct hash_info *h)
+/* Traverse support: returns offset of record, or 0 or -ve error. */
+static ntdb_off_t iterate_chain(struct ntdb_context *ntdb,
+ ntdb_off_t val,
+ struct hash_info *h)
{
- unsigned int i, num_movers = 0;
- ntdb_off_t movers[1 << NTDB_HASH_GROUP_BITS];
+ ntdb_off_t i;
+ enum NTDB_ERROR ecode;
+ struct ntdb_used_record chdr;
- h->group[h->found_bucket] = 0;
- for (i = 1; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
- unsigned this_bucket;
+ /* First load up chain header. */
+ h->table = val & NTDB_OFF_MASK;
+ ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
+ if (ecode != NTDB_SUCCESS) {
+ return ecode;
+ }
- this_bucket = (h->found_bucket+i) % (1 << NTDB_HASH_GROUP_BITS);
- /* Empty bucket? We're done. */
- if (!h->group[this_bucket])
- break;
+ if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
+ return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "get_table:"
+ " corrupt record %#x at %llu",
+ rec_magic(&chdr),
+ (long long)h->table);
+ }
- /* Ignore subhashes. */
- if (is_subhash(h->group[this_bucket]))
- continue;
+ /* Chain length is implied by data length. */
+ h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
- /* If this one is not happy where it is, we'll move it. */
- if ((h->group[this_bucket] & NTDB_OFF_HASH_GROUP_MASK)
- != this_bucket) {
- movers[num_movers++] = h->group[this_bucket];
- h->group[this_bucket] = 0;
- }
+ i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket,
+ h->table_size);
+ if (NTDB_OFF_IS_ERR(i)) {
+ return i;
}
- /* Put back the ones we erased. */
- for (i = 0; i < num_movers; i++) {
- force_into_group(h->group, movers[i] & NTDB_OFF_HASH_GROUP_MASK,
- movers[i]);
+ if (i != h->table_size) {
+ /* Return to next bucket. */
+ h->bucket = i + 1;
+ val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
+ if (NTDB_OFF_IS_ERR(val)) {
+ return val;
+ }
+ return val & NTDB_OFF_MASK;
}
- /* Now we write back the hash group */
- return ntdb_write_convert(ntdb, h->group_start,
- h->group, sizeof(h->group));
+ /* Go back up to hash table. */
+ h->table = NTDB_HASH_OFFSET;
+ h->table_size = 1 << ntdb->hash_bits;
+ h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1;
+ return 0;
}
-enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, struct hash_info *h,
- ntdb_off_t new_off)
+/* Keeps hash locked unless returns 0 or error. */
+static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb,
+ struct hash_info *h)
{
+ ntdb_off_t val, i;
enum NTDB_ERROR ecode;
- /* We hit an empty bucket during search? That's where it goes. */
- if (!h->group[h->found_bucket]) {
- h->group[h->found_bucket] = encode_offset(new_off, h);
- /* Write back the modified group. */
- return ntdb_write_convert(ntdb, h->group_start,
- h->group, sizeof(h->group));
- }
-
- if (h->hash_used > 64)
- return add_to_chain(ntdb, h->group_start, new_off);
-
- /* We're full. Expand. */
- ecode = expand_group(ntdb, h);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
+ if (h->table != NTDB_HASH_OFFSET) {
+ /* We're in a chain. */
+ i = bits_from(h->h, 0, ntdb->hash_bits);
+ ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
+ }
- if (is_subhash(h->group[h->home_bucket])) {
- /* We were expanded! */
- ntdb_off_t hashtable;
- unsigned int gnum;
+ /* We dropped lock, bucket might have moved! */
+ val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i));
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
+ }
- /* Write back the modified group. */
- ecode = ntdb_write_convert(ntdb, h->group_start, h->group,
- sizeof(h->group));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
+ /* We don't remove chains: there should still be one there! */
+ if (!val || !is_chain(val)) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "iterate_hash:"
+ " vanished hchain %llu at %llu",
+ (long long)val,
+ (long long)i);
+ val = NTDB_ERR_TO_OFF(ecode);
+ goto unlock;
}
- /* Move hashinfo down a level. */
- hashtable = (h->group[h->home_bucket] & NTDB_OFF_MASK)
- + sizeof(struct ntdb_used_record);
- gnum = use_bits(h,NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS);
- h->home_bucket = use_bits(h, NTDB_HASH_GROUP_BITS);
- h->group_start = hashtable
- + gnum * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
- ecode = ntdb_read_convert(ntdb, h->group_start, &h->group,
- sizeof(h->group));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
+ /* Find next bucket in the chain. */
+ val = iterate_chain(ntdb, val, h);
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
}
- }
+ if (val != 0) {
+ return val;
+ }
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
- /* Expanding the group must have made room if it didn't choose this
- * bucket. */
- if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){
- return ntdb_write_convert(ntdb, h->group_start,
- h->group, sizeof(h->group));
+ /* OK, we've reset h back to top level. */
}
- /* This can happen if all hashes in group (and us) dropped into same
- * group in subhash. */
- return add_to_hash(ntdb, h, new_off);
-}
-
-/* Traverse support: returns offset of record, or 0 or -ve error. */
-static ntdb_off_t iterate_hash(struct ntdb_context *ntdb,
- struct traverse_info *tinfo)
-{
- ntdb_off_t off, val, i;
- struct traverse_level *tlevel;
-
- tlevel = &tinfo->levels[tinfo->num_levels-1];
-
-again:
- for (i = ntdb_find_nonzero_off(ntdb, tlevel->hashtable,
- tlevel->entry, tlevel->total_buckets);
- i != tlevel->total_buckets;
- i = ntdb_find_nonzero_off(ntdb, tlevel->hashtable,
- i+1, tlevel->total_buckets)) {
- if (NTDB_OFF_IS_ERR(i)) {
- return i;
+ /* We do this unlocked, then re-check. */
+ for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
+ h->bucket, h->table_size);
+ i != h->table_size;
+ i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
+ i+1, h->table_size)) {
+ ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
+ if (ecode != NTDB_SUCCESS) {
+ return NTDB_ERR_TO_OFF(ecode);
}
- val = ntdb_read_off(ntdb, tlevel->hashtable+sizeof(ntdb_off_t)*i);
+ val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
if (NTDB_OFF_IS_ERR(val)) {
- return val;
+ goto unlock;
}
- off = val & NTDB_OFF_MASK;
-
- /* This makes the delete-all-in-traverse case work
- * (and simplifies our logic a little). */
- if (off == tinfo->prev)
+ /* Lost race, and it's empty? */
+ if (!val) {
+ ntdb->stats.traverse_val_vanished++;
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
continue;
+ }
- tlevel->entry = i;
-
- if (!is_subhash(val)) {
- /* Found one. */
- tinfo->prev = off;
- return off;
+ if (!is_chain(val)) {
+ /* So caller knows what lock to free. */
+ h->h = i;
+ /* Return to next bucket. */
+ h->bucket = i + 1;
+ val &= NTDB_OFF_MASK;
+ return val;
}
- /* When we come back, we want the next one */
- tlevel->entry++;
- tinfo->num_levels++;
- tlevel++;
- tlevel->hashtable = off + sizeof(struct ntdb_used_record);
- tlevel->entry = 0;
- /* Next level is a chain? */
- if (unlikely(tinfo->num_levels == NTDB_MAX_LEVELS + 1))
- tlevel->total_buckets = (1 << NTDB_HASH_GROUP_BITS);
- else
- tlevel->total_buckets = (1 << NTDB_SUBLEVEL_HASH_BITS);
- goto again;
- }
-
- /* Nothing there? */
- if (tinfo->num_levels == 1)
- return 0;
+ /* Start at beginning of chain */
+ h->bucket = 0;
+ h->h = i;
- /* Handle chained entries. */
- if (unlikely(tinfo->num_levels == NTDB_MAX_LEVELS + 1)) {
- tlevel->hashtable = ntdb_read_off(ntdb, tlevel->hashtable
- + offsetof(struct ntdb_chain,
- next));
- if (NTDB_OFF_IS_ERR(tlevel->hashtable)) {
- return tlevel->hashtable;
+ val = iterate_chain(ntdb, val, h);
+ if (NTDB_OFF_IS_ERR(val)) {
+ goto unlock;
}
- if (tlevel->hashtable) {
- tlevel->hashtable += sizeof(struct ntdb_used_record);
- tlevel->entry = 0;
- goto again;
+ if (val != 0) {
+ return val;
}
+
+ /* Otherwise, bucket has been set to i+1 */
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
}
+ return 0;
- /* Go back up and keep searching. */
- tinfo->num_levels--;
- tlevel--;
- goto again;
+unlock:
+ ntdb_unlock_hash(ntdb, i, F_RDLCK);
+ return val;
}
/* Return success if we find something, NTDB_ERR_NOEXIST if none. */
enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
- struct traverse_info *tinfo,
- NTDB_DATA *kbuf, size_t *dlen)
+ struct hash_info *h,
+ NTDB_DATA *kbuf, size_t *dlen)
{
- const unsigned group_bits = NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS;
- ntdb_off_t hl_start, hl_range, off;
+ ntdb_off_t off;
+ struct ntdb_used_record rec;
enum NTDB_ERROR ecode;
- while (tinfo->toplevel_group < (1 << group_bits)) {
- hl_start = (ntdb_off_t)tinfo->toplevel_group
- << (64 - group_bits);
- hl_range = 1ULL << group_bits;
- ecode = ntdb_lock_hashes(ntdb, hl_start, hl_range, F_RDLCK,
- NTDB_LOCK_WAIT);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
-
- off = iterate_hash(ntdb, tinfo);
- if (off) {
- struct ntdb_used_record rec;
+ off = lock_and_iterate_hash(ntdb, h);
- if (NTDB_OFF_IS_ERR(off)) {
- ecode = NTDB_OFF_TO_ERR(off);
- goto fail;
- }
-
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- if (rec_magic(&rec) != NTDB_USED_MAGIC) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "next_in_hash:"
- " corrupt record at %llu",
- (long long)off);
- goto fail;
- }
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
+ } else if (off == 0) {
+ return NTDB_ERR_NOEXIST;
+ }
- kbuf->dsize = rec_key_length(&rec);
-
- /* They want data as well? */
- if (dlen) {
- *dlen = rec_data_length(&rec);
- kbuf->dptr = ntdb_alloc_read(ntdb,
- off + sizeof(rec),
- kbuf->dsize
- + *dlen);
- } else {
- kbuf->dptr = ntdb_alloc_read(ntdb,
- off + sizeof(rec),
- kbuf->dsize);
- }
- ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK);
- if (NTDB_PTR_IS_ERR(kbuf->dptr)) {
- return NTDB_PTR_ERR(kbuf->dptr);
- }
- return NTDB_SUCCESS;
- }
+ /* The hash for this key is still locked. */
+ ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
+ if (ecode != NTDB_SUCCESS) {
+ goto unlock;
+ }
+ if (rec_magic(&rec) != NTDB_USED_MAGIC) {
+ ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
+ NTDB_LOG_ERROR,
+ "next_in_hash:"
+ " corrupt record at %llu",
+ (long long)off);
+ goto unlock;
+ }
- ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK);
+ kbuf->dsize = rec_key_length(&rec);
- tinfo->toplevel_group++;
- tinfo->levels[0].hashtable
- += (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
- tinfo->levels[0].entry = 0;
+ /* They want data as well? */
+ if (dlen) {
+ *dlen = rec_data_length(&rec);
+ kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
+ kbuf->dsize + *dlen);
+ } else {
+ kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
+ kbuf->dsize);
}
- return NTDB_ERR_NOEXIST;
+ if (NTDB_PTR_IS_ERR(kbuf->dptr)) {
+ ecode = NTDB_PTR_ERR(kbuf->dptr);
+ goto unlock;
+ }
+ ecode = NTDB_SUCCESS;
-fail:
- ntdb_unlock_hashes(ntdb, hl_start, hl_range, F_RDLCK);
+unlock:
+ ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK);
return ecode;
}
enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
- struct traverse_info *tinfo,
+ struct hash_info *h,
NTDB_DATA *kbuf, size_t *dlen)
{
- tinfo->prev = 0;
- tinfo->toplevel_group = 0;
- tinfo->num_levels = 1;
- tinfo->levels[0].hashtable = offsetof(struct ntdb_header, hashtable);
- tinfo->levels[0].entry = 0;
- tinfo->levels[0].total_buckets = (1 << NTDB_HASH_GROUP_BITS);
-
- return next_in_hash(ntdb, tinfo, kbuf, dlen);
+ h->table = NTDB_HASH_OFFSET;
+ h->table_size = 1 << ntdb->hash_bits;
+ h->bucket = 0;
+
+ return next_in_hash(ntdb, h, kbuf, dlen);
}
/* Even if the entry isn't in this hash bucket, you'd have to lock this
* bucket to find it. */
-static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb, const NTDB_DATA *key,
- int ltype, enum ntdb_lock_flags waitflag,
- const char *func)
+static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb,
+ const NTDB_DATA *key, int ltype)
{
- enum NTDB_ERROR ecode;
- uint64_t h = ntdb_hash(ntdb, key->dptr, key->dsize);
- ntdb_off_t lockstart, locksize;
- unsigned int group, gbits;
-
- gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS;
- group = bits_from(h, 64 - gbits, gbits);
+ uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize);
- lockstart = hlock_range(group, &locksize);
-
- ecode = ntdb_lock_hashes(ntdb, lockstart, locksize, ltype, waitflag);
- ntdb_trace_1rec(ntdb, func, *key);
- return ecode;
+ return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype);
}
/* lock/unlock one hash chain. This is meant to be used to reduce
contention - it cannot guarantee how many records will be locked */
_PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key)
{
- return chainlock(ntdb, &key, F_WRLCK, NTDB_LOCK_WAIT, "ntdb_chainlock");
+ return chainlock(ntdb, &key, F_WRLCK);
}
_PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key)
{
- uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- ntdb_off_t lockstart, locksize;
- unsigned int group, gbits;
-
- gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS;
- group = bits_from(h, 64 - gbits, gbits);
+ uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- lockstart = hlock_range(group, &locksize);
-
- ntdb_trace_1rec(ntdb, "ntdb_chainunlock", key);
- ntdb_unlock_hashes(ntdb, lockstart, locksize, F_WRLCK);
+ ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK);
}
-_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
+_PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb,
+ NTDB_DATA key)
{
- return chainlock(ntdb, &key, F_RDLCK, NTDB_LOCK_WAIT,
- "ntdb_chainlock_read");
+ return chainlock(ntdb, &key, F_RDLCK);
}
_PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
{
- uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- ntdb_off_t lockstart, locksize;
- unsigned int group, gbits;
-
- gbits = NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS;
- group = bits_from(h, 64 - gbits, gbits);
-
- lockstart = hlock_range(group, &locksize);
+ uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- ntdb_trace_1rec(ntdb, "ntdb_chainunlock_read", key);
- ntdb_unlock_hashes(ntdb, lockstart, locksize, F_RDLCK);
+ ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK);
}
diff --git a/lib/ntdb/io.c b/lib/ntdb/io.c
index b0588132e0..11d5b1f367 100644
--- a/lib/ntdb/io.c
+++ b/lib/ntdb/io.c
@@ -26,7 +26,6 @@
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
-#include <assert.h>
#include <ccan/likely/likely.h>
void ntdb_munmap(struct ntdb_file *file)
diff --git a/lib/ntdb/lock.c b/lib/ntdb/lock.c
index 95824db742..f6a811a3fa 100644
--- a/lib/ntdb/lock.c
+++ b/lib/ntdb/lock.c
@@ -26,7 +26,6 @@
*/
#include "private.h"
-#include <assert.h>
#include <ccan/build_assert/build_assert.h>
/* If we were threaded, we could wait for unlock, but we're not, so fail. */
@@ -346,12 +345,8 @@ static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb,
struct ntdb_lock *new_lck;
enum NTDB_ERROR ecode;
- if (offset > (NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE
- + ntdb->file->map_size / 8)) {
- return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
- "ntdb_nest_lock: invalid offset %zu ltype=%d",
- (size_t)offset, ltype);
- }
+ assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
+ + ntdb->file->map_size / 8));
if (ntdb->flags & NTDB_NOLOCK)
return NTDB_SUCCESS;
@@ -581,17 +576,17 @@ enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
again:
/* Lock hashes, gradually. */
ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START,
- NTDB_HASH_LOCK_RANGE);
+ 1 << ntdb->hash_bits);
if (ecode != NTDB_SUCCESS)
return ecode;
/* Lock free tables: there to end of file. */
ecode = ntdb_brlock(ntdb, ltype,
- NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE,
- 0, flags);
+ NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits),
+ 0, flags);
if (ecode != NTDB_SUCCESS) {
ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START,
- NTDB_HASH_LOCK_RANGE);
+ 1 << ntdb->hash_bits);
return ecode;
}
@@ -700,7 +695,7 @@ bool ntdb_has_hash_locks(struct ntdb_context *ntdb)
for (i=0; i<ntdb->file->num_lockrecs; i++) {
if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START
&& ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START
- + NTDB_HASH_LOCK_RANGE))
+ + (1 << ntdb->hash_bits)))
return true;
}
return false;
@@ -715,20 +710,19 @@ static bool ntdb_has_free_lock(struct ntdb_context *ntdb)
for (i=0; i<ntdb->file->num_lockrecs; i++) {
if (ntdb->file->lockrecs[i].off
- > NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE)
+ > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits))
return true;
}
return false;
}
-enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb,
- ntdb_off_t hash_lock,
- ntdb_len_t hash_range,
- int ltype, enum ntdb_lock_flags waitflag)
+enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
+ unsigned int h,
+ int ltype)
{
- /* FIXME: Do this properly, using hlock_range */
- unsigned l = NTDB_HASH_LOCK_START
- + (hash_lock >> (64 - NTDB_HASH_LOCK_RANGE_BITS));
+ unsigned l = NTDB_HASH_LOCK_START + h;
+
+ assert(h < (1 << ntdb->hash_bits));
/* a allrecord lock allows us to avoid per chain locks */
if (ntdb->file->allrecord_lock.count) {
@@ -760,15 +754,13 @@ enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb,
" already have expansion lock");
}
- return ntdb_nest_lock(ntdb, l, ltype, waitflag);
+ return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT);
}
-enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb,
- ntdb_off_t hash_lock,
- ntdb_len_t hash_range, int ltype)
+enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
+ unsigned int h, int ltype)
{
- unsigned l = NTDB_HASH_LOCK_START
- + (hash_lock >> (64 - NTDB_HASH_LOCK_RANGE_BITS));
+ unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1));
if (ntdb->flags & NTDB_NOLOCK)
return 0;
@@ -791,14 +783,15 @@ enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb,
return ntdb_nest_unlock(ntdb, l, ltype);
}
-/* Hash locks use NTDB_HASH_LOCK_START + the next 30 bits.
+/* Hash locks use NTDB_HASH_LOCK_START + <number of hash entries>..
* Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide.
* The result is that on 32 bit systems we don't use lock values > 2^31 on
* files that are less than 4GB.
*/
-static ntdb_off_t free_lock_off(ntdb_off_t b_off)
+static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb,
+ ntdb_off_t b_off)
{
- return NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE
+ return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
+ b_off / sizeof(ntdb_off_t);
}
@@ -834,7 +827,8 @@ enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_of
}
#endif
- return ntdb_nest_lock(ntdb, free_lock_off(b_off), F_WRLCK, waitflag);
+ return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK,
+ waitflag);
}
void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off)
@@ -842,7 +836,7 @@ void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off)
if (ntdb->file->allrecord_lock.count)
return;
- ntdb_nest_unlock(ntdb, free_lock_off(b_off), F_WRLCK);
+ ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK);
}
_PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb)
diff --git a/lib/ntdb/ntdb.c b/lib/ntdb/ntdb.c
index a74e0f4b78..8d50ba60c1 100644
--- a/lib/ntdb/ntdb.c
+++ b/lib/ntdb/ntdb.c
@@ -25,14 +25,13 @@ static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
ntdb_off_t off,
ntdb_len_t keylen,
ntdb_len_t datalen,
- struct ntdb_used_record *rec,
- uint64_t h)
+ struct ntdb_used_record *rec)
{
uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
enum NTDB_ERROR ecode;
ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
- keylen + dataroom, h);
+ keylen + dataroom);
if (ecode == NTDB_SUCCESS) {
ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
}
@@ -49,8 +48,7 @@ static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
enum NTDB_ERROR ecode;
/* Allocate a new record. */
- new_off = alloc(ntdb, key.dsize, dbuf.dsize, h->h, NTDB_USED_MAGIC,
- growing);
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing);
if (NTDB_OFF_IS_ERR(new_off)) {
return NTDB_OFF_TO_ERR(new_off);
}
@@ -116,7 +114,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
struct ntdb_used_record rec;
enum NTDB_ERROR ecode;
- off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec);
if (NTDB_OFF_IS_ERR(off)) {
return NTDB_OFF_TO_ERR(off);
}
@@ -135,7 +133,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
/* Can modify in-place. Easy! */
ecode = update_rec_hdr(ntdb, off,
key.dsize, dbuf.dsize,
- &rec, h.h);
+ &rec);
if (ecode != NTDB_SUCCESS) {
goto out;
}
@@ -146,8 +144,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
if (ecode != NTDB_SUCCESS) {
goto out;
}
- ntdb_unlock_hashes(ntdb, h.hlock_start,
- h.hlock_range, F_WRLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
return NTDB_SUCCESS;
}
} else {
@@ -164,7 +161,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
/* If we didn't use the old record, this implies we're growing. */
ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
out:
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
return ecode;
}
@@ -179,7 +176,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
NTDB_DATA new_dbuf;
enum NTDB_ERROR ecode;
- off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec);
if (NTDB_OFF_IS_ERR(off)) {
return NTDB_OFF_TO_ERR(off);
}
@@ -191,8 +188,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
/* Fast path: can append in place. */
if (rec_extra_padding(&rec) >= dbuf.dsize) {
ecode = update_rec_hdr(ntdb, off, key.dsize,
- old_dlen + dbuf.dsize, &rec,
- h.h);
+ old_dlen + dbuf.dsize, &rec);
if (ecode != NTDB_SUCCESS) {
goto out;
}
@@ -233,7 +229,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
out_free_newdata:
ntdb->free_fn(newdata, ntdb->alloc_data);
out:
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
return ecode;
}
@@ -245,7 +241,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
struct hash_info h;
enum NTDB_ERROR ecode;
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec);
if (NTDB_OFF_IS_ERR(off)) {
return NTDB_OFF_TO_ERR(off);
}
@@ -262,7 +258,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
ecode = NTDB_SUCCESS;
}
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
return ecode;
}
@@ -272,11 +268,11 @@ _PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
struct ntdb_used_record rec;
struct hash_info h;
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec);
if (NTDB_OFF_IS_ERR(off)) {
return false;
}
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
return off ? true : false;
}
@@ -288,7 +284,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
struct hash_info h;
enum NTDB_ERROR ecode;
- off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec);
if (NTDB_OFF_IS_ERR(off)) {
return NTDB_OFF_TO_ERR(off);
}
@@ -316,7 +312,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
ntdb_inc_seqnum(ntdb);
unlock:
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_WRLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
return ecode;
}
@@ -486,7 +482,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
struct hash_info h;
enum NTDB_ERROR ecode;
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec);
if (NTDB_OFF_IS_ERR(off)) {
return NTDB_OFF_TO_ERR(off);
}
@@ -507,7 +503,7 @@ _PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
}
}
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
return ecode;
}
diff --git a/lib/ntdb/ntdb.h b/lib/ntdb/ntdb.h
index 87f3f5bbfa..09622787d4 100644
--- a/lib/ntdb/ntdb.h
+++ b/lib/ntdb/ntdb.h
@@ -765,7 +765,7 @@ struct ntdb_attribute_log {
*/
struct ntdb_attribute_hash {
struct ntdb_attribute_base base; /* .attr = NTDB_ATTRIBUTE_HASH */
- uint64_t (*fn)(const void *key, size_t len, uint64_t seed,
+ uint32_t (*fn)(const void *key, size_t len, uint32_t seed,
void *data);
void *data;
};
@@ -809,7 +809,6 @@ struct ntdb_attribute_stats {
uint64_t alloc_coalesce_succeeded;
uint64_t alloc_coalesce_num_merged;
uint64_t compares;
- uint64_t compare_wrong_bucket;
uint64_t compare_wrong_offsetbits;
uint64_t compare_wrong_keylen;
uint64_t compare_wrong_rechash;
@@ -822,6 +821,8 @@ struct ntdb_attribute_stats {
uint64_t transaction_read_direct_fail;
uint64_t transaction_write_direct;
uint64_t transaction_write_direct_fail;
+ uint64_t traverses;
+ uint64_t traverse_val_vanished;
uint64_t expands;
uint64_t frees;
uint64_t locks;
diff --git a/lib/ntdb/open.c b/lib/ntdb/open.c
index 6aac46ab2f..4b8c45298b 100644
--- a/lib/ntdb/open.c
+++ b/lib/ntdb/open.c
@@ -17,7 +17,6 @@
*/
#include "private.h"
#include <ccan/build_assert/build_assert.h>
-#include <assert.h>
/* all tdbs, to detect double-opens (fcntl file don't nest!) */
static struct ntdb_context *tdbs = NULL;
@@ -53,10 +52,10 @@ static bool read_all(int fd, void *buf, size_t len)
return true;
}
-static uint64_t random_number(struct ntdb_context *ntdb)
+static uint32_t random_number(struct ntdb_context *ntdb)
{
int fd;
- uint64_t ret = 0;
+ uint32_t ret = 0;
struct timeval now;
fd = open("/dev/urandom", O_RDONLY);
@@ -71,14 +70,14 @@ static uint64_t random_number(struct ntdb_context *ntdb)
fd = open("/dev/egd-pool", O_RDWR);
if (fd >= 0) {
/* Command is 1, next byte is size we want to read. */
- char cmd[2] = { 1, sizeof(uint64_t) };
+ char cmd[2] = { 1, sizeof(uint32_t) };
if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
- char reply[1 + sizeof(uint64_t)];
+ char reply[1 + sizeof(uint32_t)];
int r = read(fd, reply, sizeof(reply));
if (r > 1) {
/* Copy at least some bytes. */
memcpy(&ret, reply+1, r - 1);
- if (reply[0] == sizeof(uint64_t)
+ if (reply[0] == sizeof(uint32_t)
&& r == sizeof(reply)) {
close(fd);
return ret;
@@ -105,92 +104,119 @@ static void ntdb_context_init(struct ntdb_context *ntdb)
ntdb->access = NULL;
}
-struct new_database {
- struct ntdb_header hdr;
- struct ntdb_freetable ftable;
- struct ntdb_free_record remainder;
-};
+/* initialise a new database:
+ *
+ * struct ntdb_header;
+ * struct {
+ * struct ntdb_used_record hash_header;
+ * ntdb_off_t hash_buckets[1 << ntdb->hash_bits];
+ * } hash;
+ * struct ntdb_freetable ftable;
+ * struct {
+ * struct ntdb_free_record free_header;
+ * char forty_three[...];
+ * } remainder;
+ */
+#define NEW_DATABASE_HDR_SIZE(hbits) \
+ (sizeof(struct ntdb_header) \
+ + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \
+ + sizeof(struct ntdb_freetable) \
+ + sizeof(struct ntdb_free_record))
-/* initialise a new database */
static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
- struct ntdb_attribute_seed *seed,
- struct ntdb_header *hdr)
+ struct ntdb_attribute_seed *seed,
+ struct ntdb_header *rhdr)
{
/* We make it up in memory, then write it out if not internal */
- struct new_database *newdb;
+ struct ntdb_freetable *ftable;
+ struct ntdb_used_record *htable;
+ struct ntdb_header *hdr;
+ struct ntdb_free_record *remainder;
+ char *mem;
unsigned int magic_len;
ssize_t rlen;
- size_t dbsize, remaindersize;
+ size_t dbsize, hashsize, hdrsize, remaindersize;
enum NTDB_ERROR ecode;
+ hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits;
+
/* Always make db a multiple of NTDB_PGSIZE */
- dbsize = (sizeof(*newdb) + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
- remaindersize = dbsize - sizeof(*newdb);
- newdb = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
- if (!newdb) {
+ hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits);
+ dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
+
+ mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
+ if (!mem) {
return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
"ntdb_new_database: failed to allocate");
}
+ hdr = (void *)mem;
+ htable = (void *)(mem + sizeof(*hdr));
+ ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize);
+ remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize
+ + sizeof(*ftable));
+
/* Fill in the header */
- newdb->hdr.version = NTDB_VERSION;
+ hdr->version = NTDB_VERSION;
if (seed)
- newdb->hdr.hash_seed = seed->seed;
+ hdr->hash_seed = seed->seed;
else
- newdb->hdr.hash_seed = random_number(ntdb);
- newdb->hdr.hash_test = NTDB_HASH_MAGIC;
- newdb->hdr.hash_test = ntdb->hash_fn(&newdb->hdr.hash_test,
- sizeof(newdb->hdr.hash_test),
- newdb->hdr.hash_seed,
- ntdb->hash_data);
- newdb->hdr.recovery = 0;
- newdb->hdr.features_used = newdb->hdr.features_offered = NTDB_FEATURE_MASK;
- newdb->hdr.seqnum = 0;
- newdb->hdr.capabilities = 0;
- memset(newdb->hdr.reserved, 0, sizeof(newdb->hdr.reserved));
- /* Initial hashes are empty. */
- memset(newdb->hdr.hashtable, 0, sizeof(newdb->hdr.hashtable));
+ hdr->hash_seed = random_number(ntdb);
+ hdr->hash_test = NTDB_HASH_MAGIC;
+ hdr->hash_test = ntdb->hash_fn(&hdr->hash_test,
+ sizeof(hdr->hash_test),
+ hdr->hash_seed,
+ ntdb->hash_data);
+ hdr->hash_bits = ntdb->hash_bits;
+ hdr->recovery = 0;
+ hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK;
+ hdr->seqnum = 0;
+ hdr->capabilities = 0;
+ memset(hdr->reserved, 0, sizeof(hdr->reserved));
+
+ /* Hash is all zero after header. */
+ set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize);
+ memset(htable + 1, 0, hashsize);
/* Free is empty. */
- newdb->hdr.free_table = offsetof(struct new_database, ftable);
- memset(&newdb->ftable, 0, sizeof(newdb->ftable));
- ecode = set_header(NULL, &newdb->ftable.hdr, NTDB_FTABLE_MAGIC, 0,
- sizeof(newdb->ftable) - sizeof(newdb->ftable.hdr),
- sizeof(newdb->ftable) - sizeof(newdb->ftable.hdr),
- 0);
+ hdr->free_table = (char *)ftable - (char *)hdr;
+ memset(ftable, 0, sizeof(*ftable));
+ ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
+ sizeof(*ftable) - sizeof(ftable->hdr),
+ sizeof(*ftable) - sizeof(ftable->hdr));
if (ecode != NTDB_SUCCESS) {
goto out;
}
/* Rest of database is a free record, containing junk. */
- newdb->remainder.ftable_and_len
- = (remaindersize + sizeof(newdb->remainder)
+ remaindersize = dbsize - hdrsize;
+ remainder->ftable_and_len
+ = (remaindersize + sizeof(*remainder)
- sizeof(struct ntdb_used_record));
- newdb->remainder.next = 0;
- newdb->remainder.magic_and_prev
+ remainder->next = 0;
+ remainder->magic_and_prev
= (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
- | offsetof(struct new_database, remainder);
- memset(&newdb->remainder + 1, 0x43, remaindersize);
+ | ((char *)remainder - (char *)hdr);
+ memset(remainder + 1, 0x43, remaindersize);
/* Put in our single free entry. */
- newdb->ftable.buckets[size_to_bucket(remaindersize)] =
- offsetof(struct new_database, remainder);
+ ftable->buckets[size_to_bucket(remaindersize)] =
+ (char *)remainder - (char *)hdr;
/* Magic food */
- memset(newdb->hdr.magic_food, 0, sizeof(newdb->hdr.magic_food));
- strcpy(newdb->hdr.magic_food, NTDB_MAGIC_FOOD);
+ memset(hdr->magic_food, 0, sizeof(hdr->magic_food));
+ strcpy(hdr->magic_food, NTDB_MAGIC_FOOD);
/* This creates an endian-converted database, as if read from disk */
- magic_len = sizeof(newdb->hdr.magic_food);
- ntdb_convert(ntdb,
- (char *)&newdb->hdr + magic_len,
- sizeof(*newdb) - magic_len);
+ magic_len = sizeof(hdr->magic_food);
+ ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len);
- *hdr = newdb->hdr;
+ /* Return copy of header. */
+ *rhdr = *hdr;
if (ntdb->flags & NTDB_INTERNAL) {
ntdb->file->map_size = dbsize;
- ntdb->file->map_ptr = newdb;
+ ntdb->file->map_ptr = hdr;
return NTDB_SUCCESS;
}
if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
@@ -207,7 +233,7 @@ static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
goto out;
}
- rlen = write(ntdb->file->fd, newdb, dbsize);
+ rlen = write(ntdb->file->fd, hdr, dbsize);
if (rlen != dbsize) {
if (rlen >= 0)
errno = ENOSPC;
@@ -218,7 +244,7 @@ static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
}
out:
- ntdb->free_fn(newdb, ntdb->alloc_data);
+ ntdb->free_fn(hdr, ntdb->alloc_data);
return ecode;
}
@@ -487,6 +513,7 @@ _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
ntdb->alloc_fn = default_alloc;
ntdb->expand_fn = default_expand;
ntdb->free_fn = default_free;
+ ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */
while (attr) {
switch (attr->base.attr) {
@@ -687,6 +714,7 @@ _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
ntdb_context_init(ntdb);
ntdb_convert(ntdb, &hdr, sizeof(hdr));
+ ntdb->hash_bits = hdr.hash_bits;
ntdb->hash_seed = hdr.hash_seed;
hash_test = NTDB_HASH_MAGIC;
hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
diff --git a/lib/ntdb/private.h b/lib/ntdb/private.h
index 488e99a0f9..957d85e494 100644
--- a/lib/ntdb/private.h
+++ b/lib/ntdb/private.h
@@ -48,6 +48,7 @@
#include <utime.h>
#include <unistd.h>
#endif
+#include <assert.h>
#ifndef TEST_IT
#define TEST_IT(cond)
@@ -114,30 +115,17 @@ typedef int ntdb_bool_err;
/* Hash chain locks. */
#define NTDB_HASH_LOCK_START 64
-/* Range for hash locks. */
-#define NTDB_HASH_LOCK_RANGE_BITS 30
-#define NTDB_HASH_LOCK_RANGE (1 << NTDB_HASH_LOCK_RANGE_BITS)
-
-/* We have 1024 entries in the top level. */
-#define NTDB_TOPLEVEL_HASH_BITS 10
-/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */
-#define NTDB_SUBLEVEL_HASH_BITS 6
-/* And 8 entries in each group, ie 8 groups per sublevel. */
-#define NTDB_HASH_GROUP_BITS 3
-/* This is currently 10: beyond this we chain. */
-#define NTDB_MAX_LEVELS (1+(64-NTDB_TOPLEVEL_HASH_BITS) / NTDB_SUBLEVEL_HASH_BITS)
-
/* Extend file by least 100 times larger than needed. */
#define NTDB_EXTENSION_FACTOR 100
-/* We steal bits from the offsets to store hash info. */
-#define NTDB_OFF_HASH_GROUP_MASK ((1ULL << NTDB_HASH_GROUP_BITS) - 1)
/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */
#define NTDB_OFF_UPPER_STEAL 8
-#define NTDB_OFF_UPPER_STEAL_EXTRA 7
-/* The bit number where we store extra hash bits. */
-#define NTDB_OFF_HASH_EXTRA_BIT 57
-#define NTDB_OFF_UPPER_STEAL_SUBHASH_BIT 56
+
+/* And we use the lower bit, too. */
+#define NTDB_OFF_CHAIN_BIT 0
+
+/* Hash table sits just after the header. */
+#define NTDB_HASH_OFFSET (sizeof(struct ntdb_header))
/* Additional features we understand. Currently: none. */
#define NTDB_FEATURE_MASK ((uint64_t)0)
@@ -145,7 +133,7 @@ typedef int ntdb_bool_err;
/* The bit number where we store the extra hash bits. */
/* Convenience mask to get actual offset. */
#define NTDB_OFF_MASK \
- (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - NTDB_OFF_HASH_GROUP_MASK)
+ (((1ULL << (64 - NTDB_OFF_UPPER_STEAL)) - 1) - (1<<NTDB_OFF_CHAIN_BIT))
/* How many buckets in a free list: see size_to_bucket(). */
#define NTDB_FREE_BUCKETS (64 - NTDB_OFF_UPPER_STEAL)
@@ -157,12 +145,14 @@ typedef int ntdb_bool_err;
/* Indicates this entry is not on an flist (can happen during coalescing) */
#define NTDB_FTABLE_NONE ((1ULL << NTDB_OFF_UPPER_STEAL) - 1)
+/* By default, hash is 64k bytes */
+#define NTDB_DEFAULT_HBITS 13
+
struct ntdb_used_record {
/* For on-disk compatibility, we avoid bitfields:
magic: 16, (highest)
key_len_bits: 5,
extra_padding: 32
- hash_bits: 11
*/
uint64_t magic_and_meta;
/* The bottom key_len_bits*2 are key length, rest is data length. */
@@ -189,11 +179,6 @@ static inline uint64_t rec_extra_padding(const struct ntdb_used_record *r)
return (r->magic_and_meta >> 11) & 0xFFFFFFFF;
}
-static inline uint32_t rec_hash(const struct ntdb_used_record *r)
-{
- return r->magic_and_meta & ((1 << 11) - 1);
-}
-
static inline uint16_t rec_magic(const struct ntdb_used_record *r)
{
return (r->magic_and_meta >> 48);
@@ -236,17 +221,12 @@ struct ntdb_recovery_record {
uint64_t eof;
};
-/* If we bottom out of the subhashes, we chain. */
-struct ntdb_chain {
- ntdb_off_t rec[1 << NTDB_HASH_GROUP_BITS];
- ntdb_off_t next;
-};
-
/* this is stored at the front of every database */
struct ntdb_header {
char magic_food[64]; /* for /etc/magic */
/* FIXME: Make me 32 bit? */
uint64_t version; /* version of the code */
+ uint64_t hash_bits; /* bits for toplevel hash table. */
uint64_t hash_test; /* result of hashing HASH_MAGIC. */
uint64_t hash_seed; /* "random" seed written at creation time. */
ntdb_off_t free_table; /* (First) free table. */
@@ -260,8 +240,12 @@ struct ntdb_header {
ntdb_off_t capabilities; /* Optional linked list of capabilities. */
ntdb_off_t reserved[22];
- /* Top level hash table. */
- ntdb_off_t hashtable[1ULL << NTDB_TOPLEVEL_HASH_BITS];
+ /*
+ * Hash table is next:
+ *
+ * struct ntdb_used_record htable_hdr;
+ * ntdb_off_t htable[1 << hash_bits];
+ */
};
struct ntdb_freetable {
@@ -280,33 +264,15 @@ struct ntdb_capability {
/* Information about a particular (locked) hash entry. */
struct hash_info {
/* Full hash value of entry. */
- uint64_t h;
- /* Start and length of lock acquired. */
- ntdb_off_t hlock_start;
- ntdb_len_t hlock_range;
- /* Start of hash group. */
- ntdb_off_t group_start;
- /* Bucket we belong in. */
- unsigned int home_bucket;
+ uint32_t h;
+ /* Start of hash table / chain. */
+ ntdb_off_t table;
+ /* Number of entries in this table/chain. */
+ ntdb_off_t table_size;
/* Bucket we (or an empty space) were found in. */
- unsigned int found_bucket;
- /* How many bits of the hash are already used. */
- unsigned int hash_used;
- /* Current working group. */
- ntdb_off_t group[1 << NTDB_HASH_GROUP_BITS];
-};
-
-struct traverse_info {
- struct traverse_level {
- ntdb_off_t hashtable;
- /* We ignore groups here, and treat it as a big array. */
- unsigned entry;
- unsigned int total_buckets;
- } levels[NTDB_MAX_LEVELS + 1];
- unsigned int num_levels;
- unsigned int toplevel_group;
- /* This makes delete-everything-inside-traverse work as expected. */
- ntdb_off_t prev;
+ ntdb_off_t bucket;
+ /* Old value that was in that entry (if not found) */
+ ntdb_off_t old_val;
};
enum ntdb_lock_flags {
@@ -375,40 +341,46 @@ struct ntdb_methods {
/*
internal prototypes
*/
+/* Get bits from a value. */
+static inline uint32_t bits_from(uint64_t val, unsigned start, unsigned num)
+{
+ assert(num <= 32);
+ return (val >> start) & ((1U << num) - 1);
+}
+
+
/* hash.c: */
-uint64_t ntdb_jenkins_hash(const void *key, size_t length, uint64_t seed,
+uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
void *unused);
enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
- struct traverse_info *tinfo,
+ struct hash_info *h,
NTDB_DATA *kbuf, size_t *dlen);
enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
- struct traverse_info *tinfo,
+ struct hash_info *h,
NTDB_DATA *kbuf, size_t *dlen);
/* Hash random memory. */
-uint64_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len);
-
-/* Hash on disk. */
-uint64_t hash_record(struct ntdb_context *ntdb, ntdb_off_t off);
+uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len);
/* Find and lock a hash entry (or where it would be). */
ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
NTDB_DATA key,
int ltype,
struct hash_info *h,
- struct ntdb_used_record *rec,
- struct traverse_info *tinfo);
+ struct ntdb_used_record *rec);
enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
+ const struct hash_info *h,
ntdb_off_t new_off);
-enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb, struct hash_info *h,
+enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h,
ntdb_off_t new_off);
-enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb, struct hash_info *h);
+enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
+ const struct hash_info *h);
/* For ntdb_check */
bool is_subhash(ntdb_off_t val);
@@ -424,7 +396,7 @@ ntdb_off_t next_ftable(struct ntdb_context *ntdb, ntdb_off_t ftable);
/* This returns space or -ve error number. */
ntdb_off_t alloc(struct ntdb_context *ntdb, size_t keylen, size_t datalen,
- uint64_t hash, unsigned magic, bool growing);
+ unsigned magic, bool growing);
/* Put this record in a free list. */
enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
@@ -436,7 +408,7 @@ enum NTDB_ERROR add_free_record(struct ntdb_context *ntdb,
enum NTDB_ERROR set_header(struct ntdb_context *ntdb,
struct ntdb_used_record *rec,
unsigned magic, uint64_t keylen, uint64_t datalen,
- uint64_t actuallen, unsigned hashlow);
+ uint64_t actuallen);
/* Used by ntdb_check to verify. */
unsigned int size_to_bucket(ntdb_len_t data_len);
@@ -504,13 +476,12 @@ enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call);
/* If we fork, we no longer really own locks. */
bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log);
-/* Lock/unlock a range of hashes. */
-enum NTDB_ERROR ntdb_lock_hashes(struct ntdb_context *ntdb,
- ntdb_off_t hash_lock, ntdb_len_t hash_range,
- int ltype, enum ntdb_lock_flags waitflag);
-enum NTDB_ERROR ntdb_unlock_hashes(struct ntdb_context *ntdb,
- ntdb_off_t hash_lock,
- ntdb_len_t hash_range, int ltype);
+/* Lock/unlock a hash bucket. */
+enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
+ unsigned int hbucket,
+ int ltype);
+enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
+ unsigned int hash, int ltype);
/* For closing the file. */
void ntdb_lock_cleanup(struct ntdb_context *ntdb);
@@ -588,9 +559,11 @@ struct ntdb_context {
struct ntdb_file *file;
/* Hash function. */
- uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *);
+ uint32_t (*hash_fn)(const void *key, size_t len, uint32_t seed, void *);
void *hash_data;
- uint64_t hash_seed;
+ uint32_t hash_seed;
+ /* Bits in toplevel hash table. */
+ unsigned int hash_bits;
/* Allocate and free functions. */
void *(*alloc_fn)(const void *owner, size_t len, void *priv_data);
diff --git a/lib/ntdb/summary.c b/lib/ntdb/summary.c
index f5313bec55..5a75dc5b11 100644
--- a/lib/ntdb/summary.c
+++ b/lib/ntdb/summary.c
@@ -16,7 +16,6 @@
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
-#include <assert.h>
#include <ccan/tally/tally.h>
#define SUMMARY_FORMAT \
@@ -30,9 +29,8 @@
"Number of uncoalesced records: %zu\n" \
"Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
"Toplevel hash used: %u of %u\n" \
- "Number of chains: %zu\n" \
- "Number of subhashes: %zu\n" \
- "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
+ "Number of hashes: %zu\n" \
+ "Smallest/average/largest hash chains: %zu/%zu/%zu\n%s" \
"Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
#define BUCKET_SUMMARY_FORMAT_A \
@@ -48,17 +46,17 @@
#define HISTO_HEIGHT 20
static ntdb_off_t count_hash(struct ntdb_context *ntdb,
- ntdb_off_t hash_off, unsigned bits)
+ ntdb_off_t hash_off,
+ ntdb_off_t num)
{
const ntdb_off_t *h;
- ntdb_off_t count = 0;
- unsigned int i;
+ ntdb_off_t i, count = 0;
- h = ntdb_access_read(ntdb, hash_off, sizeof(*h) << bits, true);
+ h = ntdb_access_read(ntdb, hash_off, sizeof(*h) * num, true);
if (NTDB_PTR_IS_ERR(h)) {
return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(h));
}
- for (i = 0; i < (1 << bits); i++)
+ for (i = 0; i < num; i++)
count += (h[i] != 0);
ntdb_access_release(ntdb, h);
@@ -66,14 +64,13 @@ static ntdb_off_t count_hash(struct ntdb_context *ntdb,
}
static enum NTDB_ERROR summarize(struct ntdb_context *ntdb,
- struct tally *hashes,
struct tally *ftables,
struct tally *fr,
struct tally *keys,
struct tally *data,
struct tally *extra,
struct tally *uncoal,
- struct tally *chains,
+ struct tally *hashes,
size_t *num_caps)
{
ntdb_off_t off;
@@ -119,8 +116,8 @@ static enum NTDB_ERROR summarize(struct ntdb_context *ntdb,
tally_add(extra, rec_extra_padding(&p->u));
} else if (rec_magic(&p->u) == NTDB_HTABLE_MAGIC) {
ntdb_off_t count = count_hash(ntdb,
- off + sizeof(p->u),
- NTDB_SUBLEVEL_HASH_BITS);
+ off + sizeof(p->u),
+ 1 << ntdb->hash_bits);
if (NTDB_OFF_IS_ERR(count)) {
return NTDB_OFF_TO_ERR(count);
}
@@ -139,7 +136,8 @@ static enum NTDB_ERROR summarize(struct ntdb_context *ntdb,
len = sizeof(p->u)
+ rec_data_length(&p->u)
+ rec_extra_padding(&p->u);
- tally_add(chains, 1);
+ tally_add(hashes,
+ rec_data_length(&p->u)/sizeof(ntdb_off_t));
tally_add(extra, rec_extra_padding(&p->u));
} else if (rec_magic(&p->u) == NTDB_CAP_MAGIC) {
len = sizeof(p->u)
@@ -200,12 +198,11 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
{
ntdb_len_t len;
size_t num_caps = 0;
- struct tally *ftables, *hashes, *freet, *keys, *data, *extra, *uncoal,
- *chains;
- char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg;
+ struct tally *ftables, *freet, *keys, *data, *extra, *uncoal, *hashes;
+ char *freeg, *keysg, *datag, *extrag, *uncoalg, *hashesg;
enum NTDB_ERROR ecode;
- hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
+ freeg = keysg = datag = extrag = uncoalg = hashesg = NULL;
ecode = ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
if (ecode != NTDB_SUCCESS) {
@@ -220,44 +217,43 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
/* Start stats off empty. */
ftables = tally_new(HISTO_HEIGHT);
- hashes = tally_new(HISTO_HEIGHT);
freet = tally_new(HISTO_HEIGHT);
keys = tally_new(HISTO_HEIGHT);
data = tally_new(HISTO_HEIGHT);
extra = tally_new(HISTO_HEIGHT);
uncoal = tally_new(HISTO_HEIGHT);
- chains = tally_new(HISTO_HEIGHT);
- if (!ftables || !hashes || !freet || !keys || !data || !extra
- || !uncoal || !chains) {
+ hashes = tally_new(HISTO_HEIGHT);
+ if (!ftables || !freet || !keys || !data || !extra
+ || !uncoal || !hashes) {
ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
"ntdb_summary: failed to allocate"
" tally structures");
goto unlock;
}
- ecode = summarize(ntdb, hashes, ftables, freet, keys, data, extra,
- uncoal, chains, &num_caps);
+ ecode = summarize(ntdb, ftables, freet, keys, data, extra,
+ uncoal, hashes, &num_caps);
if (ecode != NTDB_SUCCESS) {
goto unlock;
}
if (flags & NTDB_SUMMARY_HISTOGRAMS) {
- hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
+ hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
}
/* 20 is max length of a %llu. */
len = strlen(SUMMARY_FORMAT) + 33*20 + 1
- + (hashesg ? strlen(hashesg) : 0)
+ (freeg ? strlen(freeg) : 0)
+ (keysg ? strlen(keysg) : 0)
+ (datag ? strlen(datag) : 0)
+ (extrag ? strlen(extrag) : 0)
+ (uncoalg ? strlen(uncoalg) : 0)
+ + (hashesg ? strlen(hashesg) : 0)
+ num_caps * (strlen(CAPABILITY_FORMAT) + 20
+ strlen(" (uncheckable,read-only)"));
@@ -284,11 +280,9 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
tally_total(uncoal, NULL),
tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
uncoalg ? uncoalg : "",
- (unsigned)count_hash(ntdb, offsetof(struct ntdb_header,
- hashtable),
- NTDB_TOPLEVEL_HASH_BITS),
- 1 << NTDB_TOPLEVEL_HASH_BITS,
- tally_num(chains),
+ (unsigned)count_hash(ntdb, sizeof(struct ntdb_header),
+ 1 << ntdb->hash_bits),
+ 1 << ntdb->hash_bits,
tally_num(hashes),
tally_min(hashes), tally_mean(hashes), tally_max(hashes),
hashesg ? hashesg : "",
@@ -300,29 +294,26 @@ _PUBLIC_ enum NTDB_ERROR ntdb_summary(struct ntdb_context *ntdb,
* sizeof(struct ntdb_used_record) * 100.0 / ntdb->file->map_size,
tally_num(ftables) * sizeof(struct ntdb_freetable)
* 100.0 / ntdb->file->map_size,
- (tally_num(hashes)
- * (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS)
- + (sizeof(ntdb_off_t) << NTDB_TOPLEVEL_HASH_BITS)
- + sizeof(struct ntdb_chain) * tally_num(chains))
+ (tally_total(hashes, NULL) * sizeof(ntdb_off_t)
+ + (sizeof(ntdb_off_t) << ntdb->hash_bits))
* 100.0 / ntdb->file->map_size);
add_capabilities(ntdb, *summary);
unlock:
- ntdb->free_fn(hashesg, ntdb->alloc_data);
ntdb->free_fn(freeg, ntdb->alloc_data);
ntdb->free_fn(keysg, ntdb->alloc_data);
ntdb->free_fn(datag, ntdb->alloc_data);
ntdb->free_fn(extrag, ntdb->alloc_data);
ntdb->free_fn(uncoalg, ntdb->alloc_data);
- ntdb->free_fn(hashes, ntdb->alloc_data);
+ ntdb->free_fn(hashesg, ntdb->alloc_data);
ntdb->free_fn(freet, ntdb->alloc_data);
ntdb->free_fn(keys, ntdb->alloc_data);
ntdb->free_fn(data, ntdb->alloc_data);
ntdb->free_fn(extra, ntdb->alloc_data);
ntdb->free_fn(uncoal, ntdb->alloc_data);
ntdb->free_fn(ftables, ntdb->alloc_data);
- ntdb->free_fn(chains, ntdb->alloc_data);
+ ntdb->free_fn(hashes, ntdb->alloc_data);
ntdb_allrecord_unlock(ntdb, F_RDLCK);
ntdb_unlock_expand(ntdb, F_RDLCK);
diff --git a/lib/ntdb/test/api-12-store.c b/lib/ntdb/test/api-12-store.c
index 24d9498755..8f1f42352b 100644
--- a/lib/ntdb/test/api-12-store.c
+++ b/lib/ntdb/test/api-12-store.c
@@ -9,7 +9,7 @@
#include "logging.h"
/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
+static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
diff --git a/lib/ntdb/test/api-13-delete.c b/lib/ntdb/test/api-13-delete.c
index 182252b109..9bf4026d12 100644
--- a/lib/ntdb/test/api-13-delete.c
+++ b/lib/ntdb/test/api-13-delete.c
@@ -8,14 +8,13 @@
#include "logging.h"
/* We rig the hash so adjacent-numbered records always clash. */
-static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
+static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv)
{
- return ((uint64_t)*(const unsigned int *)key)
- << (64 - NTDB_TOPLEVEL_HASH_BITS - 1);
+ return *((const unsigned int *)key) / 2;
}
/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
+static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
diff --git a/lib/ntdb/test/api-20-alloc-attr.c b/lib/ntdb/test/api-20-alloc-attr.c
index d5c7e718bc..5b4fb131f0 100644
--- a/lib/ntdb/test/api-20-alloc-attr.c
+++ b/lib/ntdb/test/api-20-alloc-attr.c
@@ -77,7 +77,7 @@ int main(int argc, char *argv[])
alloc_attr.alloc.free = test_free;
alloc_attr.alloc.priv_data = &owner_weird_count;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3 + 4) + 1);
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 700 * 3 + 4) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
curr_ntdb = NULL;
@@ -88,7 +88,7 @@ int main(int argc, char *argv[])
if (!ntdb)
continue;
- for (j = 0; j < 500; j++) {
+ for (j = 0; j < 700; j++) {
NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
diff --git a/lib/ntdb/test/api-82-lockattr.c b/lib/ntdb/test/api-82-lockattr.c
index 51bb939f59..30de7dfddf 100644
--- a/lib/ntdb/test/api-82-lockattr.c
+++ b/lib/ntdb/test/api-82-lockattr.c
@@ -58,7 +58,7 @@ int main(int argc, char *argv[])
lock_attr.flock.unlock = ntdb_fcntl_unlock;
lock_attr.flock.data = &lock_err;
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 80);
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 81);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
NTDB_DATA d;
@@ -190,6 +190,9 @@ int main(int argc, char *argv[])
/* Nonblocking traverse; go nonblock partway through. */
lock_err = 0;
ok1(ntdb_store(ntdb, key, data, NTDB_REPLACE) == 0);
+ /* Need two entries to ensure two lock attempts! */
+ ok1(ntdb_store(ntdb, ntdb_mkdata("key2", 4), data,
+ NTDB_REPLACE) == 0);
trav_err = EAGAIN;
ok1(ntdb_traverse(ntdb, trav, &lock_err) == NTDB_ERR_LOCK);
ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/test/api-check-callback.c b/lib/ntdb/test/api-check-callback.c
index f74f04b598..3050fcddd9 100644
--- a/lib/ntdb/test/api-check-callback.c
+++ b/lib/ntdb/test/api-check-callback.c
@@ -59,6 +59,7 @@ int main(int argc, char *argv[])
int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
NTDB_NOMMAP|NTDB_CONVERT };
+ return 0;
plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
diff --git a/lib/ntdb/test/api-missing-entries.c b/lib/ntdb/test/api-missing-entries.c
index 1c8064f945..b77f8ff31f 100644
--- a/lib/ntdb/test/api-missing-entries.c
+++ b/lib/ntdb/test/api-missing-entries.c
@@ -11,10 +11,10 @@
#define NUM_RECORDS 1189
/* We use the same seed which we saw this failure on. */
-static uint64_t failhash(const void *key, size_t len, uint64_t seed, void *p)
+static uint32_t failhash(const void *key, size_t len, uint32_t seed, void *p)
{
- seed = 699537674708983027ULL;
- return hash64_stable((const unsigned char *)key, len, seed);
+ return hash64_stable((const unsigned char *)key, len,
+ 699537674708983027ULL);
}
int main(int argc, char *argv[])
diff --git a/lib/ntdb/test/helprun-layout.c b/lib/ntdb/test/helprun-layout.c
index 7f1f9f9d8e..fa6fa29fce 100644
--- a/lib/ntdb/test/helprun-layout.c
+++ b/lib/ntdb/test/helprun-layout.c
@@ -94,13 +94,6 @@ static ntdb_len_t data_record_len(struct tle_used *used)
return len;
}
-static ntdb_len_t hashtable_len(struct tle_hashtable *htable)
-{
- return sizeof(struct ntdb_used_record)
- + (sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS)
- + htable->extra;
-}
-
static ntdb_len_t capability_len(struct tle_capability *cap)
{
return sizeof(struct ntdb_capability) + cap->extra;
@@ -128,25 +121,13 @@ static void set_data_record(void *mem, struct ntdb_context *ntdb,
struct ntdb_used_record *u = mem;
set_header(ntdb, u, NTDB_USED_MAGIC, used->key.dsize, used->data.dsize,
- used->key.dsize + used->data.dsize + used->extra,
- ntdb_hash(ntdb, used->key.dptr, used->key.dsize));
+ used->key.dsize + used->data.dsize + used->extra);
memcpy(u + 1, used->key.dptr, used->key.dsize);
memcpy((char *)(u + 1) + used->key.dsize,
used->data.dptr, used->data.dsize);
add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
}
-static void set_hashtable(void *mem, struct ntdb_context *ntdb,
- struct tle_hashtable *htable)
-{
- struct ntdb_used_record *u = mem;
- ntdb_len_t len = sizeof(ntdb_off_t) << NTDB_SUBLEVEL_HASH_BITS;
-
- set_header(ntdb, u, NTDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
- memset(u + 1, 0, len);
- add_zero_pad(u, len, htable->extra);
-}
-
static void set_capability(void *mem, struct ntdb_context *ntdb,
struct tle_capability *cap, struct ntdb_header *hdr,
ntdb_off_t last_cap)
@@ -156,7 +137,7 @@ static void set_capability(void *mem, struct ntdb_context *ntdb,
c->type = cap->type;
c->next = 0;
- set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len, 0);
+ set_header(ntdb, &c->hdr, NTDB_CAP_MAGIC, 0, len, len);
/* Append to capability list. */
if (!last_cap) {
@@ -175,7 +156,7 @@ static void set_freetable(void *mem, struct ntdb_context *ntdb,
memset(ftable, 0, sizeof(*ftable));
set_header(ntdb, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
sizeof(*ftable) - sizeof(ftable->hdr),
- sizeof(*ftable) - sizeof(ftable->hdr), 0);
+ sizeof(*ftable) - sizeof(ftable->hdr));
if (last_ftable) {
ftable = (struct ntdb_freetable *)((char *)hdr + last_ftable);
@@ -197,12 +178,6 @@ static void add_to_freetable(struct ntdb_context *ntdb,
NTDB_LOCK_WAIT, false);
}
-static ntdb_off_t hbucket_off(ntdb_off_t group_start, unsigned ingroup)
-{
- return group_start
- + (ingroup % (1 << NTDB_HASH_GROUP_BITS)) * sizeof(ntdb_off_t);
-}
-
/* Get bits from a value. */
static uint32_t bits(uint64_t val, unsigned start, unsigned num)
{
@@ -210,22 +185,24 @@ static uint32_t bits(uint64_t val, unsigned start, unsigned num)
return (val >> start) & ((1U << num) - 1);
}
-/* We take bits from the top: that way we can lock whole sections of the hash
- * by using lock ranges. */
-static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
+static ntdb_off_t encode_offset(const struct ntdb_context *ntdb,
+ ntdb_off_t new_off, uint32_t hash)
{
- *used += num;
- return bits(h, 64 - *used, num);
+ ntdb_off_t extra;
+
+ assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0);
+ assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0);
+ /* We pack extra hash bits into the upper bits of the offset. */
+ extra = bits(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL);
+ extra <<= (64 - NTDB_OFF_UPPER_STEAL);
+
+ return new_off | extra;
}
-static ntdb_off_t encode_offset(ntdb_off_t new_off, unsigned bucket,
- uint64_t h)
+static ntdb_off_t hbucket_off(ntdb_len_t idx)
{
- return bucket
- | new_off
- | ((uint64_t)bits(h, 64 - NTDB_OFF_UPPER_STEAL_EXTRA,
- NTDB_OFF_UPPER_STEAL_EXTRA)
- << NTDB_OFF_HASH_EXTRA_BIT);
+ return sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record)
+ + idx * sizeof(ntdb_off_t);
}
/* FIXME: Our hash table handling here is primitive: we don't expand! */
@@ -233,28 +210,14 @@ static void add_to_hashtable(struct ntdb_context *ntdb,
ntdb_off_t eoff,
NTDB_DATA key)
{
- uint64_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- ntdb_off_t b_off, group_start;
- unsigned i, group, in_group;
- unsigned used = 0;
+ ntdb_off_t b_off;
+ uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- group = use_bits(h, NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS, &used);
- in_group = use_bits(h, NTDB_HASH_GROUP_BITS, &used);
+ b_off = hbucket_off(h & ((1 << ntdb->hash_bits)-1));
+ if (ntdb_read_off(ntdb, b_off) != 0)
+ abort();
- group_start = offsetof(struct ntdb_header, hashtable)
- + group * (sizeof(ntdb_off_t) << NTDB_HASH_GROUP_BITS);
-
- for (i = 0; i < (1 << NTDB_HASH_GROUP_BITS); i++) {
- unsigned bucket = (in_group + i) % (1 << NTDB_HASH_GROUP_BITS);
-
- b_off = hbucket_off(group_start, bucket);
- if (ntdb_read_off(ntdb, b_off) == 0) {
- ntdb_write_off(ntdb, b_off,
- encode_offset(eoff, in_group, h));
- return;
- }
- }
- abort();
+ ntdb_write_off(ntdb, b_off, encode_offset(ntdb, eoff, h));
}
static struct tle_freetable *find_ftable(struct ntdb_layout *layout, unsigned num)
@@ -277,11 +240,16 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
union ntdb_attribute *attr)
{
unsigned int i;
- ntdb_off_t off, len, last_ftable, last_cap;
+ ntdb_off_t off, hdrlen, len, last_ftable, last_cap;
char *mem;
struct ntdb_context *ntdb;
- off = sizeof(struct ntdb_header);
+ /* Now populate our header, cribbing from a real NTDB header. */
+ ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr);
+
+ off = sizeof(struct ntdb_header) + sizeof(struct ntdb_used_record)
+ + (sizeof(ntdb_off_t) << ntdb->hash_bits);
+ hdrlen = off;
/* First pass of layout: calc lengths */
for (i = 0; i < layout->num_elems; i++) {
@@ -297,9 +265,6 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
case DATA:
len = data_record_len(&e->used);
break;
- case HASHTABLE:
- len = hashtable_len(&e->hashtable);
- break;
case CAPABILITY:
len = capability_len(&e->capability);
break;
@@ -312,9 +277,7 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
mem = malloc(off);
/* Fill with some weird pattern. */
memset(mem, 0x99, off);
- /* Now populate our header, cribbing from a real NTDB header. */
- ntdb = ntdb_open("layout", NTDB_INTERNAL, O_RDWR, 0, attr);
- memcpy(mem, ntdb->file->map_ptr, sizeof(struct ntdb_header));
+ memcpy(mem, ntdb->file->map_ptr, hdrlen);
/* Mug the ntdb we have to make it use this. */
freefn(ntdb->file->map_ptr);
@@ -337,9 +300,6 @@ struct ntdb_context *ntdb_layout_get(struct ntdb_layout *layout,
case DATA:
set_data_record(mem + e->base.off, ntdb, &e->used);
break;
- case HASHTABLE:
- set_hashtable(mem + e->base.off, ntdb, &e->hashtable);
- break;
case CAPABILITY:
set_capability(mem + e->base.off, ntdb, &e->capability,
(struct ntdb_header *)mem, last_cap);
diff --git a/lib/ntdb/test/layout.h b/lib/ntdb/test/layout.h
index bcd20b8965..b4f6a960eb 100644
--- a/lib/ntdb/test/layout.h
+++ b/lib/ntdb/test/layout.h
@@ -32,7 +32,7 @@ void ntdb_layout_write(struct ntdb_layout *layout, void (*freefn)(void *),
void ntdb_layout_free(struct ntdb_layout *layout);
enum layout_type {
- FREETABLE, FREE, DATA, HASHTABLE, CAPABILITY
+ FREETABLE, FREE, DATA, CAPABILITY
};
/* Shared by all union members. */
@@ -58,13 +58,6 @@ struct tle_used {
ntdb_len_t extra;
};
-struct tle_hashtable {
- struct tle_base base;
- int parent;
- unsigned int bucket;
- ntdb_len_t extra;
-};
-
struct tle_capability {
struct tle_base base;
uint64_t type;
@@ -76,7 +69,6 @@ union ntdb_layout_elem {
struct tle_freetable ftable;
struct tle_free free;
struct tle_used used;
- struct tle_hashtable hashtable;
struct tle_capability capability;
};
diff --git a/lib/ntdb/test/run-001-encode.c b/lib/ntdb/test/run-001-encode.c
index 12965676a2..b8a61bee8c 100644
--- a/lib/ntdb/test/run-001-encode.c
+++ b/lib/ntdb/test/run-001-encode.c
@@ -8,32 +8,30 @@ int main(int argc, char *argv[])
struct ntdb_used_record rec;
struct ntdb_context ntdb = { .log_fn = tap_log_fn };
- plan_tests(64 + 32 + 48*6 + 1);
+ plan_tests(64 + 32 + 48*5 + 1);
/* We should be able to encode any data value. */
for (i = 0; i < 64; i++)
ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, 0, 1ULL << i,
- 1ULL << i, 0) == 0);
+ 1ULL << i) == 0);
/* And any key and data with < 64 bits between them. */
for (i = 0; i < 32; i++) {
ntdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i;
ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
- klen + dlen, 0) == 0);
+ klen + dlen) == 0);
}
/* We should neatly encode all values. */
for (i = 0; i < 48; i++) {
- uint64_t h = 1ULL << (i < 5 ? i : 4);
uint64_t klen = 1ULL << (i < 16 ? i : 15);
uint64_t dlen = 1ULL << i;
uint64_t xlen = 1ULL << (i < 32 ? i : 31);
ok1(set_header(&ntdb, &rec, NTDB_USED_MAGIC, klen, dlen,
- klen+dlen+xlen, h) == 0);
+ klen+dlen+xlen) == 0);
ok1(rec_key_length(&rec) == klen);
ok1(rec_data_length(&rec) == dlen);
ok1(rec_extra_padding(&rec) == xlen);
- ok1((uint64_t)rec_hash(&rec) == h);
ok1(rec_magic(&rec) == NTDB_USED_MAGIC);
}
ok1(tap_log_messages == 0);
diff --git a/lib/ntdb/test/run-02-expand.c b/lib/ntdb/test/run-02-expand.c
index abf1569388..2c8b1a291b 100644
--- a/lib/ntdb/test/run-02-expand.c
+++ b/lib/ntdb/test/run-02-expand.c
@@ -29,7 +29,7 @@ int main(int argc, char *argv[])
val = ntdb->file->map_size;
/* Need some hash lock for expand. */
- ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0);
+ ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0);
failtest_suppress = false;
if (!ok1(ntdb_expand(ntdb, 1) == 0)) {
failtest_suppress = true;
@@ -39,11 +39,11 @@ int main(int argc, char *argv[])
failtest_suppress = true;
ok1(ntdb->file->map_size >= val + 1 * NTDB_EXTENSION_FACTOR);
- ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0);
+ ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
val = ntdb->file->map_size;
- ok1(ntdb_lock_hashes(ntdb, 0, 1, F_WRLCK, NTDB_LOCK_WAIT) == 0);
+ ok1(ntdb_lock_hash(ntdb, 0, F_WRLCK) == 0);
failtest_suppress = false;
if (!ok1(ntdb_expand(ntdb, 1024) == 0)) {
failtest_suppress = true;
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
break;
}
failtest_suppress = true;
- ok1(ntdb_unlock_hashes(ntdb, 0, 1, F_WRLCK) == 0);
+ ok1(ntdb_unlock_hash(ntdb, 0, F_WRLCK) == 0);
ok1(ntdb->file->map_size >= val + 1024 * NTDB_EXTENSION_FACTOR);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
ntdb_close(ntdb);
diff --git a/lib/ntdb/test/run-03-coalesce.c b/lib/ntdb/test/run-03-coalesce.c
index 22a6817881..363c078fc6 100644
--- a/lib/ntdb/test/run-03-coalesce.c
+++ b/lib/ntdb/test/run-03-coalesce.c
@@ -34,7 +34,7 @@ int main(int argc, char *argv[])
/* No coalescing can be done due to EOF */
layout = new_ntdb_layout();
ntdb_layout_add_freetable(layout);
- len = 56544;
+ len = 15560;
ntdb_layout_add_free(layout, len, 0);
ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
/* NOMMAP is for lockcheck. */
@@ -60,24 +60,24 @@ int main(int argc, char *argv[])
/* No coalescing can be done due to used record */
layout = new_ntdb_layout();
ntdb_layout_add_freetable(layout);
- ntdb_layout_add_free(layout, 56512, 0);
+ ntdb_layout_add_free(layout, 15528, 0);
ntdb_layout_add_used(layout, key, data, 6);
ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
/* NOMMAP is for lockcheck. */
ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
&tap_log_attr);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 56512);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
- b_off = bucket_off(ntdb->ftable_off, size_to_bucket(56512));
+ b_off = bucket_off(ntdb->ftable_off, size_to_bucket(15528));
/* Lock and fail to coalesce. */
ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
test = layout->elem[1].base.off;
- ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 56512, &test)
+ ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 15528, &test)
== 0);
ntdb_unlock_free_bucket(ntdb, b_off);
- ok1(free_record_length(ntdb, layout->elem[1].base.off) == 56512);
+ ok1(free_record_length(ntdb, layout->elem[1].base.off) == 15528);
ok1(test == layout->elem[1].base.off);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
ntdb_close(ntdb);
@@ -87,13 +87,13 @@ int main(int argc, char *argv[])
layout = new_ntdb_layout();
ntdb_layout_add_freetable(layout);
ntdb_layout_add_free(layout, 1024, 0);
- ntdb_layout_add_free(layout, 55504, 0);
+ ntdb_layout_add_free(layout, 14520, 0);
ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
/* NOMMAP is for lockcheck. */
ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
&tap_log_attr);
ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
- ok1(free_record_length(ntdb, layout->elem[2].base.off) == 55504);
+ ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14520);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Figure out which bucket (first) free entry is. */
@@ -102,12 +102,12 @@ int main(int argc, char *argv[])
ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
- == 1024 + sizeof(struct ntdb_used_record) + 55504);
+ == 1024 + sizeof(struct ntdb_used_record) + 14520);
/* Should tell us it's erased this one... */
ok1(test == NTDB_ERR_NOEXIST);
ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
ok1(free_record_length(ntdb, layout->elem[1].base.off)
- == 1024 + sizeof(struct ntdb_used_record) + 55504);
+ == 1024 + sizeof(struct ntdb_used_record) + 14520);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
ntdb_close(ntdb);
ntdb_layout_free(layout);
@@ -116,14 +116,14 @@ int main(int argc, char *argv[])
layout = new_ntdb_layout();
ntdb_layout_add_freetable(layout);
ntdb_layout_add_free(layout, 1024, 0);
- ntdb_layout_add_free(layout, 55472, 0);
+ ntdb_layout_add_free(layout, 14488, 0);
ntdb_layout_add_used(layout, key, data, 6);
ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
/* NOMMAP is for lockcheck. */
ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
&tap_log_attr);
ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
- ok1(free_record_length(ntdb, layout->elem[2].base.off) == 55472);
+ ok1(free_record_length(ntdb, layout->elem[2].base.off) == 14488);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
@@ -132,10 +132,10 @@ int main(int argc, char *argv[])
ok1(ntdb_lock_free_bucket(ntdb, b_off, NTDB_LOCK_WAIT) == 0);
test = layout->elem[2].base.off;
ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
- == 1024 + sizeof(struct ntdb_used_record) + 55472);
+ == 1024 + sizeof(struct ntdb_used_record) + 14488);
ok1(ntdb->file->allrecord_lock.count == 0 && ntdb->file->num_lockrecs == 0);
ok1(free_record_length(ntdb, layout->elem[1].base.off)
- == 1024 + sizeof(struct ntdb_used_record) + 55472);
+ == 1024 + sizeof(struct ntdb_used_record) + 14488);
ok1(test == NTDB_ERR_NOEXIST);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
ntdb_close(ntdb);
@@ -146,14 +146,14 @@ int main(int argc, char *argv[])
ntdb_layout_add_freetable(layout);
ntdb_layout_add_free(layout, 1024, 0);
ntdb_layout_add_free(layout, 512, 0);
- ntdb_layout_add_free(layout, 54976, 0);
+ ntdb_layout_add_free(layout, 13992, 0);
ntdb_layout_write(layout, free, &tap_log_attr, "run-03-coalesce.ntdb");
/* NOMMAP is for lockcheck. */
ntdb = ntdb_open("run-03-coalesce.ntdb", NTDB_NOMMAP, O_RDWR, 0,
&tap_log_attr);
ok1(free_record_length(ntdb, layout->elem[1].base.off) == 1024);
ok1(free_record_length(ntdb, layout->elem[2].base.off) == 512);
- ok1(free_record_length(ntdb, layout->elem[3].base.off) == 54976);
+ ok1(free_record_length(ntdb, layout->elem[3].base.off) == 13992);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Figure out which bucket free entry is. */
@@ -163,12 +163,12 @@ int main(int argc, char *argv[])
test = layout->elem[2].base.off;
ok1(coalesce(ntdb, layout->elem[1].base.off, b_off, 1024, &test)
== 1024 + sizeof(struct ntdb_used_record) + 512
- + sizeof(struct ntdb_used_record) + 54976);
+ + sizeof(struct ntdb_used_record) + 13992);
ok1(ntdb->file->allrecord_lock.count == 0
&& ntdb->file->num_lockrecs == 0);
ok1(free_record_length(ntdb, layout->elem[1].base.off)
== 1024 + sizeof(struct ntdb_used_record) + 512
- + sizeof(struct ntdb_used_record) + 54976);
+ + sizeof(struct ntdb_used_record) + 13992);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
ntdb_close(ntdb);
ntdb_layout_free(layout);
diff --git a/lib/ntdb/test/run-04-basichash.c b/lib/ntdb/test/run-04-basichash.c
index 6e3bdc012d..41b49239cb 100644
--- a/lib/ntdb/test/run-04-basichash.c
+++ b/lib/ntdb/test/run-04-basichash.c
@@ -2,16 +2,15 @@
#include "tap-interface.h"
#include "logging.h"
-/* We rig the hash so adjacent-numbered records always clash. */
-static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv)
+/* We rig the hash so all records clash. */
+static uint32_t clash(const void *key, size_t len, uint32_t seed, void *priv)
{
- return ((uint64_t)*(const unsigned int *)key)
- << (64 - NTDB_TOPLEVEL_HASH_BITS - 1);
+ return *((const unsigned int *)key) << 20;
}
int main(int argc, char *argv[])
{
- unsigned int i, j;
+ unsigned int i;
struct ntdb_context *ntdb;
unsigned int v;
struct ntdb_used_record rec;
@@ -26,11 +25,10 @@ int main(int argc, char *argv[])
hattr.base.next = &tap_log_attr;
- plan_tests(sizeof(flags) / sizeof(flags[0])
- * (91 + (2 * ((1 << NTDB_HASH_GROUP_BITS) - 1))) + 1);
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 137 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
struct hash_info h;
- ntdb_off_t new_off, off, subhash;
+ ntdb_off_t new_off, new_off2, off;
ntdb = ntdb_open("run-04-basichash.ntdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
@@ -40,26 +38,24 @@ int main(int argc, char *argv[])
v = 0;
/* Should not find it. */
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in group 0, bucket 0. */
- ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+ /* Should have located space in top table, bucket 0. */
+ ok1(h.table == NTDB_HASH_OFFSET);
+ ok1(h.table_size == (1 << ntdb->hash_bits));
+ ok1(h.bucket == 0);
+ ok1(h.old_val == 0);
/* Should have lock on bucket 0 */
- ok1(h.hlock_start == 0);
- ok1(h.hlock_range ==
- 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+ ok1(h.h == 0);
ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
ok1((ntdb->flags & NTDB_NOLOCK)
|| ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
/* FIXME: Check lock length */
/* Allocate a new record. */
- new_off = alloc(ntdb, key.dsize, dbuf.dsize, h.h,
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
NTDB_USED_MAGIC, false);
ok1(!NTDB_OFF_IS_ERR(new_off));
@@ -73,91 +69,93 @@ int main(int argc, char *argv[])
ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
/* We should be able to unlock that OK. */
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_WRLCK) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
/* Database should be consistent. */
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Now, this should give a successful lookup. */
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL)
- == new_off);
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in group 0, bucket 0. */
- ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+ /* Should have located it in top table, bucket 0. */
+ ok1(h.table == NTDB_HASH_OFFSET);
+ ok1(h.table_size == (1 << ntdb->hash_bits));
+ ok1(h.bucket == 0);
/* Should have lock on bucket 0 */
- ok1(h.hlock_start == 0);
- ok1(h.hlock_range ==
- 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+ ok1(h.h == 0);
ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
ok1((ntdb->flags & NTDB_NOLOCK)
|| ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
/* FIXME: Check lock length */
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_WRLCK) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
/* Database should be consistent. */
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Test expansion. */
v = 1;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in group 0, bucket 1. */
- ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 1);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
+ /* Should have located clash in toplevel bucket 0. */
+ ok1(h.table == NTDB_HASH_OFFSET);
+ ok1(h.table_size == (1 << ntdb->hash_bits));
+ ok1(h.bucket == 0);
+ ok1((h.old_val & NTDB_OFF_MASK) == new_off);
/* Should have lock on bucket 0 */
- ok1(h.hlock_start == 0);
- ok1(h.hlock_range ==
- 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
ok1((ntdb->flags & NTDB_NOLOCK)
|| ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
/* FIXME: Check lock length */
- /* Make it expand 0'th bucket. */
- ok1(expand_group(ntdb, &h) == 0);
- /* First one should be subhash, next should be empty. */
- ok1(is_subhash(h.group[0]));
- subhash = (h.group[0] & NTDB_OFF_MASK);
- for (j = 1; j < (1 << NTDB_HASH_GROUP_BITS); j++)
- ok1(h.group[j] == 0);
+ new_off2 = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
- ok1(ntdb_write_convert(ntdb, h.group_start,
- h.group, sizeof(h.group)) == 0);
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_WRLCK) == 0);
+ off = new_off2 + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+
+ /* We should be able to add it now. */
+ ok1(add_to_hash(ntdb, &h, new_off2) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
/* Should be happy with expansion. */
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Should be able to find it. */
+ /* Should be able to find both. */
+ v = 1;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off2);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 1);
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
v = 0;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL)
- == new_off);
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in expanded group 0, bucket 0. */
- ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
- + NTDB_SUBLEVEL_HASH_BITS);
+ /* Should have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 0);
/* Should have lock on bucket 0 */
- ok1(h.hlock_start == 0);
- ok1(h.hlock_range ==
- 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
ok1((ntdb->flags & NTDB_NOLOCK)
|| ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
@@ -171,86 +169,149 @@ int main(int argc, char *argv[])
+ rec_data_length(&rec)
+ rec_extra_padding(&rec),
NTDB_LOCK_NOWAIT, false) == 0);
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_WRLCK) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Test second-level expansion: should expand 0th bucket. */
+ /* Should still be able to find other record. */
+ v = 1;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off2);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 1);
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Now should find empty space. */
v = 0;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in group 0, bucket 0. */
- ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS+NTDB_SUBLEVEL_HASH_BITS);
+ /* Should have located space in chain, bucket 0. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 0);
+ ok1(h.old_val == 0);
+
+ /* Adding another record should work. */
+ v = 2;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have located space in chain, bucket 0. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 0);
+ ok1(h.old_val == 0);
/* Should have lock on bucket 0 */
- ok1(h.hlock_start == 0);
- ok1(h.hlock_range ==
- 1ULL << (64-(NTDB_TOPLEVEL_HASH_BITS-NTDB_HASH_GROUP_BITS)));
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
ok1((ntdb->flags & NTDB_NOLOCK)
|| ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- /* FIXME: Check lock length */
- ok1(expand_group(ntdb, &h) == 0);
- /* First one should be subhash, next should be empty. */
- ok1(is_subhash(h.group[0]));
- subhash = (h.group[0] & NTDB_OFF_MASK);
- for (j = 1; j < (1 << NTDB_HASH_GROUP_BITS); j++)
- ok1(h.group[j] == 0);
- ok1(ntdb_write_convert(ntdb, h.group_start,
- h.group, sizeof(h.group)) == 0);
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_WRLCK) == 0);
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
- /* Should be happy with expansion. */
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ off = new_off + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL) == 0);
+ /* Adding another record should cause expansion. */
+ v = 3;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in group 0, bucket 0. */
- ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
- + NTDB_SUBLEVEL_HASH_BITS * 2);
+ /* Should not have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 2);
+ ok1(h.bucket == 2);
+ ok1(h.old_val != 0);
- /* We should be able to add it now. */
- /* Allocate a new record. */
- new_off = alloc(ntdb, key.dsize, dbuf.dsize, h.h,
- NTDB_USED_MAGIC, false);
- ok1(!NTDB_OFF_IS_ERR(new_off));
- ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
- /* Make sure we fill it in for later finding. */
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
off = new_off + sizeof(struct ntdb_used_record);
ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
off += key.dsize;
ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
- /* We should be able to unlock that OK. */
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_WRLCK) == 0);
+ /* Retrieve it and check. */
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have appended to chain, bucket 2. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 3);
+ ok1(h.bucket == 2);
- /* Database should be consistent. */
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
- /* Should be able to find it. */
- v = 0;
- ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL)
- == new_off);
+ /* YA record: relocation. */
+ v = 4;
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == 0);
/* Should have created correct hash. */
ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in expanded group 0, bucket 0. */
- ok1(h.group_start == subhash + sizeof(struct ntdb_used_record));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
- + NTDB_SUBLEVEL_HASH_BITS * 2);
+ /* Should not have located space in chain. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 3);
+ ok1(h.bucket == 3);
+ ok1(h.old_val != 0);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+
+ new_off = alloc(ntdb, key.dsize, dbuf.dsize,
+ NTDB_USED_MAGIC, false);
+ ok1(!NTDB_OFF_IS_ERR(new_off2));
+ off = new_off + sizeof(struct ntdb_used_record);
+ ok1(!ntdb->io->twrite(ntdb, off, key.dptr, key.dsize));
+ off += key.dsize;
+ ok1(!ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize));
+ ok1(add_to_hash(ntdb, &h, new_off) == 0);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
+
+ /* Retrieve it and check. */
+ ok1(find_and_lock(ntdb, key, F_WRLCK, &h, &rec) == new_off);
+ /* Should have created correct hash. */
+ ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
+ /* Should have appended to chain, bucket 2. */
+ ok1(h.table > NTDB_HASH_OFFSET);
+ ok1(h.table_size == 4);
+ ok1(h.bucket == 3);
+
+ /* Should have lock on bucket 0 */
+ ok1((h.h & ((1 << ntdb->hash_bits)-1)) == 0);
+ ok1((ntdb->flags & NTDB_NOLOCK) || ntdb->file->num_lockrecs == 1);
+ ok1((ntdb->flags & NTDB_NOLOCK)
+ || ntdb->file->lockrecs[0].off == NTDB_HASH_LOCK_START);
+ ok1(ntdb_unlock_hash(ntdb, h.h, F_WRLCK) == 0);
ntdb_close(ntdb);
}
diff --git a/lib/ntdb/test/run-15-append.c b/lib/ntdb/test/run-15-append.c
index 3c208137f2..97fd53c241 100644
--- a/lib/ntdb/test/run-15-append.c
+++ b/lib/ntdb/test/run-15-append.c
@@ -12,10 +12,10 @@ static ntdb_off_t ntdb_offset(struct ntdb_context *ntdb, NTDB_DATA key)
struct ntdb_used_record urec;
struct hash_info h;
- off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec, NULL);
+ off = find_and_lock(ntdb, key, F_RDLCK, &h, &urec);
if (NTDB_OFF_IS_ERR(off))
return 0;
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
return off;
}
diff --git a/lib/ntdb/test/run-20-growhash.c b/lib/ntdb/test/run-20-growhash.c
deleted file mode 100644
index 5559370f2a..0000000000
--- a/lib/ntdb/test/run-20-growhash.c
+++ /dev/null
@@ -1,137 +0,0 @@
-#include "ntdb-source.h"
-#include "tap-interface.h"
-#include "logging.h"
-
-static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv)
-{
- return *(const uint64_t *)key;
-}
-
-static void add_bits(uint64_t *val, unsigned new, unsigned new_bits,
- unsigned *done)
-{
- *done += new_bits;
- *val |= ((uint64_t)new << (64 - *done));
-}
-
-static uint64_t make_key(unsigned topgroup, unsigned topbucket,
- unsigned subgroup1, unsigned subbucket1,
- unsigned subgroup2, unsigned subbucket2)
-{
- uint64_t key = 0;
- unsigned done = 0;
-
- add_bits(&key, topgroup, NTDB_TOPLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS,
- &done);
- add_bits(&key, topbucket, NTDB_HASH_GROUP_BITS, &done);
- add_bits(&key, subgroup1, NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS,
- &done);
- add_bits(&key, subbucket1, NTDB_HASH_GROUP_BITS, &done);
- add_bits(&key, subgroup2, NTDB_SUBLEVEL_HASH_BITS - NTDB_HASH_GROUP_BITS,
- &done);
- add_bits(&key, subbucket2, NTDB_HASH_GROUP_BITS, &done);
- return key;
-}
-
-int main(int argc, char *argv[])
-{
- unsigned int i, j;
- struct ntdb_context *ntdb;
- uint64_t kdata;
- struct ntdb_used_record rec;
- NTDB_DATA key = { (unsigned char *)&kdata, sizeof(kdata) };
- NTDB_DATA dbuf = { (unsigned char *)&kdata, sizeof(kdata) };
- union ntdb_attribute hattr = { .hash = { .base = { NTDB_ATTRIBUTE_HASH },
- .fn = myhash } };
- int flags[] = { NTDB_INTERNAL, NTDB_DEFAULT, NTDB_NOMMAP,
- NTDB_INTERNAL|NTDB_CONVERT, NTDB_CONVERT,
- NTDB_NOMMAP|NTDB_CONVERT,
- };
-
- hattr.base.next = &tap_log_attr;
-
- plan_tests(sizeof(flags) / sizeof(flags[0])
- * (9 + (20 + 2 * ((1 << NTDB_HASH_GROUP_BITS) - 2))
- * (1 << NTDB_HASH_GROUP_BITS)) + 1);
- for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
- struct hash_info h;
-
- ntdb = ntdb_open("run-20-growhash.ntdb", flags[i],
- O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr);
- ok1(ntdb);
- if (!ntdb)
- continue;
-
- /* Fill a group. */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
- kdata = make_key(0, j, 0, 0, 0, 0);
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- }
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- /* Check first still exists. */
- kdata = make_key(0, 0, 0, 0, 0, 0);
- ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL) != 0);
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have located space in group 0, bucket 0. */
- ok1(h.group_start == offsetof(struct ntdb_header, hashtable));
- ok1(h.home_bucket == 0);
- ok1(h.found_bucket == 0);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS);
- /* Entire group should be full! */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++)
- ok1(h.group[j] != 0);
-
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_RDLCK) == 0);
-
- /* Now, add one more to each should expand (that) bucket. */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
- unsigned int k;
- kdata = make_key(0, j, 0, 1, 0, 0);
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL));
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have moved to subhash */
- ok1(h.group_start >= sizeof(struct ntdb_header));
- ok1(h.home_bucket == 1);
- ok1(h.found_bucket == 1);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
- + NTDB_SUBLEVEL_HASH_BITS);
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_RDLCK) == 0);
-
- /* Keep adding, make it expand again. */
- for (k = 2; k < (1 << NTDB_HASH_GROUP_BITS); k++) {
- kdata = make_key(0, j, 0, k, 0, 0);
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- }
-
- /* This should tip it over to sub-sub-hash. */
- kdata = make_key(0, j, 0, 0, 0, 1);
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
- ok1(find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL));
- /* Should have created correct hash. */
- ok1(h.h == ntdb_hash(ntdb, key.dptr, key.dsize));
- /* Should have moved to subhash */
- ok1(h.group_start >= sizeof(struct ntdb_header));
- ok1(h.home_bucket == 1);
- ok1(h.found_bucket == 1);
- ok1(h.hash_used == NTDB_TOPLEVEL_HASH_BITS
- + NTDB_SUBLEVEL_HASH_BITS + NTDB_SUBLEVEL_HASH_BITS);
- ok1(ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range,
- F_RDLCK) == 0);
- }
- ntdb_close(ntdb);
- }
-
- ok1(tap_log_messages == 0);
- return exit_status();
-}
diff --git a/lib/ntdb/test/run-25-hashoverload.c b/lib/ntdb/test/run-25-hashoverload.c
index 611eb71bf6..1b69fe9a61 100644
--- a/lib/ntdb/test/run-25-hashoverload.c
+++ b/lib/ntdb/test/run-25-hashoverload.c
@@ -2,7 +2,9 @@
#include "tap-interface.h"
#include "logging.h"
-static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv)
+#define OVERLOAD 100
+
+static uint32_t badhash(const void *key, size_t len, uint32_t seed, void *priv)
{
return 0;
}
@@ -29,7 +31,7 @@ int main(int argc, char *argv[])
hattr.base.next = &tap_log_attr;
- plan_tests(6883);
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * (7 * OVERLOAD + 11) + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
NTDB_DATA d = { NULL, 0 }; /* Bogus GCC warning */
@@ -39,70 +41,47 @@ int main(int argc, char *argv[])
if (!ntdb)
continue;
- /* Fill a group. */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
+ /* Overload a bucket. */
+ for (j = 0; j < OVERLOAD; j++) {
ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
}
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- /* Now store one last value: should form chain. */
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
-
/* Check we can find them all. */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS) + 1; j++) {
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == sizeof(j));
- ok1(d.dptr != NULL);
- ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
- free(d.dptr);
- }
-
- /* Now add a *lot* more. */
- for (j = (1 << NTDB_HASH_GROUP_BITS) + 1;
- j < (16 << NTDB_HASH_GROUP_BITS);
- j++) {
- ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
+ for (j = 0; j < OVERLOAD; j++) {
ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
ok1(d.dsize == sizeof(j));
ok1(d.dptr != NULL);
ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
free(d.dptr);
}
- ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Traverse through them. */
- ok1(ntdb_traverse(ntdb, trav, NULL) == j);
+ ok1(ntdb_traverse(ntdb, trav, NULL) == OVERLOAD);
- /* Empty the first chain-worth. */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++)
+ /* Delete the first 99. */
+ for (j = 0; j < OVERLOAD-1; j++)
ok1(ntdb_delete(ntdb, key) == 0);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
- for (j = (1 << NTDB_HASH_GROUP_BITS);
- j < (16 << NTDB_HASH_GROUP_BITS);
- j++) {
- ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
- ok1(d.dsize == sizeof(j));
- ok1(d.dptr != NULL);
- ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
- free(d.dptr);
- }
+ ok1(ntdb_fetch(ntdb, key, &d) == NTDB_SUCCESS);
+ ok1(d.dsize == sizeof(j));
+ ok1(d.dptr != NULL);
+ ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0);
+ free(d.dptr);
/* Traverse through them. */
- ok1(ntdb_traverse(ntdb, trav, NULL)
- == (15 << NTDB_HASH_GROUP_BITS));
+ ok1(ntdb_traverse(ntdb, trav, NULL) == 1);
/* Re-add */
- for (j = 0; j < (1 << NTDB_HASH_GROUP_BITS); j++) {
+ for (j = 0; j < OVERLOAD-1; j++) {
ok1(ntdb_store(ntdb, key, dbuf, NTDB_INSERT) == 0);
}
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
/* Now try deleting as we go. */
- ok1(ntdb_traverse(ntdb, trav, trav)
- == (16 << NTDB_HASH_GROUP_BITS));
+ ok1(ntdb_traverse(ntdb, trav, trav) == OVERLOAD);
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
ok1(ntdb_traverse(ntdb, trav, NULL) == 0);
ntdb_close(ntdb);
diff --git a/lib/ntdb/test/run-30-exhaust-before-expand.c b/lib/ntdb/test/run-30-exhaust-before-expand.c
index 24c48b005a..cc9ea3fa3d 100644
--- a/lib/ntdb/test/run-30-exhaust-before-expand.c
+++ b/lib/ntdb/test/run-30-exhaust-before-expand.c
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
size = ntdb->file->map_size;
/* Create one record to chew up most space. */
- d.dsize = size - sizeof(struct new_database) - 32;
+ d.dsize = size - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 32;
d.dptr = calloc(d.dsize, 1);
j = 0;
ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
diff --git a/lib/ntdb/test/run-35-convert.c b/lib/ntdb/test/run-35-convert.c
index 6a38d425cb..5b8099c448 100644
--- a/lib/ntdb/test/run-35-convert.c
+++ b/lib/ntdb/test/run-35-convert.c
@@ -24,6 +24,10 @@ int main(int argc, char *argv[])
failtest_exit(exit_status());
ntdb_close(ntdb);
+ /* We can fail in log message formatting or open. That's OK */
+ if (failtest_has_failed()) {
+ failtest_exit(exit_status());
+ }
/* If we say NTDB_CONVERT, it must be converted */
ntdb = ntdb_open("run-35-convert.ntdb",
flags[i]|NTDB_CONVERT,
diff --git a/lib/ntdb/test/run-50-multiple-freelists.c b/lib/ntdb/test/run-50-multiple-freelists.c
index 962462e5d4..5496e3e0dd 100644
--- a/lib/ntdb/test/run-50-multiple-freelists.c
+++ b/lib/ntdb/test/run-50-multiple-freelists.c
@@ -39,27 +39,27 @@ int main(int argc, char *argv[])
ok1(ntdb_check(ntdb, NULL, NULL) == 0);
off = get_free(ntdb, 0, 80 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC, 0);
+ NTDB_USED_MAGIC);
ok1(off == layout->elem[3].base.off);
ok1(ntdb->ftable_off == layout->elem[0].base.off);
off = get_free(ntdb, 0, 160 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC, 0);
+ NTDB_USED_MAGIC);
ok1(off == layout->elem[5].base.off);
ok1(ntdb->ftable_off == layout->elem[1].base.off);
off = get_free(ntdb, 0, 320 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC, 0);
+ NTDB_USED_MAGIC);
ok1(off == layout->elem[7].base.off);
ok1(ntdb->ftable_off == layout->elem[2].base.off);
off = get_free(ntdb, 0, 40 - sizeof(struct ntdb_used_record), 0,
- NTDB_USED_MAGIC, 0);
+ NTDB_USED_MAGIC);
ok1(off == layout->elem[9].base.off);
ok1(ntdb->ftable_off == layout->elem[0].base.off);
/* Now we fail. */
- off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC, 0);
+ off = get_free(ntdb, 0, 0, 1, NTDB_USED_MAGIC);
ok1(off == 0);
ntdb_close(ntdb);
diff --git a/lib/ntdb/test/run-64-bit-tdb.c b/lib/ntdb/test/run-64-bit-tdb.c
index a85a4af56c..5afdd8747c 100644
--- a/lib/ntdb/test/run-64-bit-tdb.c
+++ b/lib/ntdb/test/run-64-bit-tdb.c
@@ -39,7 +39,8 @@ int main(int argc, char *argv[])
/* Add a fake record to chew up the existing free space. */
k = ntdb_mkdata("fake", 4);
- d.dsize = ntdb->file->map_size - sizeof(struct new_database)- 8;
+ d.dsize = ntdb->file->map_size
+ - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8;
d.dptr = malloc(d.dsize);
memset(d.dptr, 0, d.dsize);
ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
@@ -66,9 +67,9 @@ int main(int argc, char *argv[])
ok1(ntdb_check(ntdb, NULL, NULL) == NTDB_SUCCESS);
/* Make sure it put it at end as we expected. */
- off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec, NULL);
+ off = find_and_lock(ntdb, k, F_RDLCK, &h, &rec);
ok1(off >= ALMOST_4G);
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
ok1(ntdb_fetch(ntdb, k, &d) == 0);
ok1(d.dsize == 5);
diff --git a/lib/ntdb/test/run-90-get-set-attributes.c b/lib/ntdb/test/run-90-get-set-attributes.c
index fc265b0729..4f8792569f 100644
--- a/lib/ntdb/test/run-90-get-set-attributes.c
+++ b/lib/ntdb/test/run-90-get-set-attributes.c
@@ -13,7 +13,7 @@ static int myunlock(int fd, int rw, off_t off, off_t len, void *unused)
return 0;
}
-static uint64_t hash_fn(const void *key, size_t len, uint64_t seed,
+static uint32_t hash_fn(const void *key, size_t len, uint32_t seed,
void *priv)
{
return 0;
diff --git a/lib/ntdb/test/run-capabilities.c b/lib/ntdb/test/run-capabilities.c
index 52c4fac0e6..cb03746862 100644
--- a/lib/ntdb/test/run-capabilities.c
+++ b/lib/ntdb/test/run-capabilities.c
@@ -39,7 +39,7 @@ static void create_ntdb(const char *name,
ntdb_layout_add_freetable(layout);
ntdb_layout_add_used(layout, key, data, 6);
clen = len_of(breaks_check, breaks_write, breaks_open);
- ntdb_layout_add_free(layout, 56480 - clen, 0);
+ ntdb_layout_add_free(layout, 15496 - clen, 0);
ntdb_layout_add_capability(layout, cap,
breaks_write, breaks_check, breaks_open,
clen);
@@ -53,7 +53,7 @@ static void create_ntdb(const char *name,
key.dsize--;
ntdb_layout_add_used(layout, key, data, 11 - key.dsize);
clen = len_of(breaks_check, breaks_write, breaks_open);
- ntdb_layout_add_free(layout, 65456 - clen, 0);
+ ntdb_layout_add_free(layout, 16304 - clen, 0);
ntdb_layout_add_capability(layout, cap,
breaks_write, breaks_check,
breaks_open, clen);
diff --git a/lib/ntdb/test/run-expand-in-transaction.c b/lib/ntdb/test/run-expand-in-transaction.c
index 54f9d81dec..71866a37c1 100644
--- a/lib/ntdb/test/run-expand-in-transaction.c
+++ b/lib/ntdb/test/run-expand-in-transaction.c
@@ -25,7 +25,8 @@ int main(int argc, char *argv[])
size = ntdb->file->map_size;
/* Add a fake record to chew up the existing free space. */
k = ntdb_mkdata("fake", 4);
- d.dsize = ntdb->file->map_size - sizeof(struct new_database)- 8;
+ d.dsize = ntdb->file->map_size
+ - NEW_DATABASE_HDR_SIZE(ntdb->hash_bits) - 8;
d.dptr = malloc(d.dsize);
memset(d.dptr, 0, d.dsize);
ok1(ntdb_store(ntdb, k, d, NTDB_INSERT) == 0);
diff --git a/lib/ntdb/test/run-traverse.c b/lib/ntdb/test/run-traverse.c
index 9dfc94d3b3..ed95f33604 100644
--- a/lib/ntdb/test/run-traverse.c
+++ b/lib/ntdb/test/run-traverse.c
@@ -5,7 +5,7 @@
#define NUM_RECORDS 1000
/* We use the same seed which we saw a failure on. */
-static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p)
+static uint32_t fixedhash(const void *key, size_t len, uint32_t seed, void *p)
{
return hash64_stable((const unsigned char *)key, len,
*(uint64_t *)p);
diff --git a/lib/ntdb/tools/speed.c b/lib/ntdb/tools/speed.c
index 868494b898..8928d8c67a 100644
--- a/lib/ntdb/tools/speed.c
+++ b/lib/ntdb/tools/speed.c
@@ -82,8 +82,6 @@ static void dump_and_clear_stats(struct ntdb_context **ntdb,
(unsigned long long)stats.stats.alloc_coalesce_num_merged);
printf("compares = %llu\n",
(unsigned long long)stats.stats.compares);
- printf(" compare_wrong_bucket = %llu\n",
- (unsigned long long)stats.stats.compare_wrong_bucket);
printf(" compare_wrong_offsetbits = %llu\n",
(unsigned long long)stats.stats.compare_wrong_offsetbits);
printf(" compare_wrong_keylen = %llu\n",
diff --git a/lib/ntdb/traverse.c b/lib/ntdb/traverse.c
index ee1e1006db..d0ce3517b0 100644
--- a/lib/ntdb/traverse.c
+++ b/lib/ntdb/traverse.c
@@ -24,14 +24,14 @@ _PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb,
void *p)
{
enum NTDB_ERROR ecode;
- struct traverse_info tinfo;
+ struct hash_info h;
NTDB_DATA k, d;
int64_t count = 0;
k.dptr = NULL;
- for (ecode = first_in_hash(ntdb, &tinfo, &k, &d.dsize);
+ for (ecode = first_in_hash(ntdb, &h, &k, &d.dsize);
ecode == NTDB_SUCCESS;
- ecode = next_in_hash(ntdb, &tinfo, &k, &d.dsize)) {
+ ecode = next_in_hash(ntdb, &h, &k, &d.dsize)) {
d.dptr = k.dptr + k.dsize;
count++;
@@ -50,26 +50,29 @@ _PUBLIC_ int64_t ntdb_traverse_(struct ntdb_context *ntdb,
_PUBLIC_ enum NTDB_ERROR ntdb_firstkey(struct ntdb_context *ntdb, NTDB_DATA *key)
{
- struct traverse_info tinfo;
+ struct hash_info h;
- return first_in_hash(ntdb, &tinfo, key, NULL);
+ return first_in_hash(ntdb, &h, key, NULL);
}
-/* We lock twice, not very efficient. We could keep last key & tinfo cached. */
+/* We lock twice, not very efficient. We could keep last key & h cached. */
_PUBLIC_ enum NTDB_ERROR ntdb_nextkey(struct ntdb_context *ntdb, NTDB_DATA *key)
{
- struct traverse_info tinfo;
struct hash_info h;
struct ntdb_used_record rec;
+ ntdb_off_t off;
- tinfo.prev = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec, &tinfo);
+ off = find_and_lock(ntdb, *key, F_RDLCK, &h, &rec);
ntdb->free_fn(key->dptr, ntdb->alloc_data);
- if (NTDB_OFF_IS_ERR(tinfo.prev)) {
- return NTDB_OFF_TO_ERR(tinfo.prev);
+ if (NTDB_OFF_IS_ERR(off)) {
+ return NTDB_OFF_TO_ERR(off);
}
- ntdb_unlock_hashes(ntdb, h.hlock_start, h.hlock_range, F_RDLCK);
+ ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
- return next_in_hash(ntdb, &tinfo, key, NULL);
+ /* If we found something, skip to next. */
+ if (off)
+ h.bucket++;
+ return next_in_hash(ntdb, &h, key, NULL);
}
static int wipe_one(struct ntdb_context *ntdb,
diff --git a/lib/ntdb/wscript b/lib/ntdb/wscript
index 0a90b46c80..ff8f24eae8 100644
--- a/lib/ntdb/wscript
+++ b/lib/ntdb/wscript
@@ -47,7 +47,6 @@ def configure(conf):
'test/run-11-simple-fetch.c',
'test/run-12-check.c',
'test/run-15-append.c',
- 'test/run-20-growhash.c',
'test/run-25-hashoverload.c',
'test/run-30-exhaust-before-expand.c',
'test/run-35-convert.c',