From 9e0deff904877068d19b41e965732f145c2554b9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 13 Sep 2010 19:55:26 +0930 Subject: tdb: make check more robust against recovery failures. We can end up with dead areas when we die during transaction commit; tdb_check() fails on such a (valid) database. This is particularly noticable now we no longer truncate on recovery; if the recovery area was at the end of the file we used to remove it that way. Signed-off-by: Rusty Russell --- lib/tdb/common/check.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) (limited to 'lib/tdb/common/check.c') diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c index 2c640434ee..3be8a0c48b 100644 --- a/lib/tdb/common/check.c +++ b/lib/tdb/common/check.c @@ -301,6 +301,21 @@ static bool tdb_check_free_record(struct tdb_context *tdb, return true; } +/* Slow, but should be very rare. */ +static size_t dead_space(struct tdb_context *tdb, tdb_off_t off) +{ + size_t len; + + for (len = 0; off + len < tdb->map_size; len++) { + char c; + if (tdb->methods->tdb_read(tdb, off, &c, 1, 0)) + return 0; + if (c != 0 && c != 0x42) + break; + } + return len; +} + int tdb_check(struct tdb_context *tdb, int (*check)(TDB_DATA key, TDB_DATA data, void *private_data), void *private_data) @@ -310,6 +325,7 @@ int tdb_check(struct tdb_context *tdb, tdb_off_t off, recovery_start; struct tdb_record rec; bool found_recovery = false; + tdb_len_t dead; if (tdb_lockall_read(tdb) == -1) return -1; @@ -369,8 +385,23 @@ int tdb_check(struct tdb_context *tdb, if (!tdb_check_free_record(tdb, off, &rec, hashes)) goto free; break; - case TDB_RECOVERY_MAGIC: + /* If we crash after ftruncate, we can get zeroes or fill. */ case TDB_RECOVERY_INVALID_MAGIC: + case 0x42424242: + if (recovery_start == off) { + found_recovery = true; + break; + } + dead = dead_space(tdb, off); + if (dead < sizeof(rec)) + goto corrupt; + + TDB_LOG((tdb, TDB_DEBUG_ERROR, + "Dead space at %d-%d (of %u)\n", + off, off + dead, tdb->map_size)); + rec.rec_len = dead - sizeof(rec); + break; + case TDB_RECOVERY_MAGIC: if (recovery_start != off) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "Unexpected recovery record at offset %d\n", @@ -379,7 +410,8 @@ int tdb_check(struct tdb_context *tdb, } found_recovery = true; break; - default: + default: ; + corrupt: tdb->ecode = TDB_ERR_CORRUPT; TDB_LOG((tdb, TDB_DEBUG_ERROR, "Bad magic 0x%x at offset %d\n", @@ -405,9 +437,8 @@ int tdb_check(struct tdb_context *tdb, /* We must have found recovery area if there was one. */ if (recovery_start != 0 && !found_recovery) { TDB_LOG((tdb, TDB_DEBUG_ERROR, - "Expected %s recovery area, got %s\n", - recovery_start ? "a" : "no", - found_recovery ? "one" : "none")); + "Expected a recovery area at %u\n", + recovery_start)); goto free; } -- cgit