diff options
-rw-r--r-- | source4/lib/tdb/common/error.c | 3 | ||||
-rw-r--r-- | source4/lib/tdb/common/io.c | 11 | ||||
-rw-r--r-- | source4/lib/tdb/common/lock.c | 10 | ||||
-rw-r--r-- | source4/lib/tdb/common/tdb.c | 5 | ||||
-rw-r--r-- | source4/lib/tdb/common/tdb_private.h | 1 | ||||
-rw-r--r-- | source4/lib/tdb/common/transaction.c | 29 | ||||
-rw-r--r-- | source4/lib/tdb/common/traverse.c | 70 | ||||
-rw-r--r-- | source4/lib/tdb/docs/README | 20 | ||||
-rw-r--r-- | source4/lib/tdb/include/tdb.h | 3 | ||||
-rw-r--r-- | source4/lib/tdb/tools/tdbtorture.c | 10 |
10 files changed, 130 insertions, 32 deletions
diff --git a/source4/lib/tdb/common/error.c b/source4/lib/tdb/common/error.c index 270a441463..4cf33a29ab 100644 --- a/source4/lib/tdb/common/error.c +++ b/source4/lib/tdb/common/error.c @@ -43,7 +43,8 @@ static struct tdb_errname { {TDB_ERR_EXISTS, "Record exists"}, {TDB_ERR_NOLOCK, "Lock exists on other keys"}, {TDB_ERR_EINVAL, "Invalid parameter"}, - {TDB_ERR_NOEXIST, "Record does not exist"} }; + {TDB_ERR_NOEXIST, "Record does not exist"}, + {TDB_ERR_RDONLY, "write not permitted"} }; /* Error string for the last tdb error */ const char *tdb_errorstr(struct tdb_context *tdb) diff --git a/source4/lib/tdb/common/io.c b/source4/lib/tdb/common/io.c index f02cd1a0e1..c1f6f55bc4 100644 --- a/source4/lib/tdb/common/io.c +++ b/source4/lib/tdb/common/io.c @@ -97,6 +97,11 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe) static int tdb_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { + if (tdb->read_only) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) return -1; @@ -224,6 +229,12 @@ void tdb_mmap(struct tdb_context *tdb) static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition) { char buf[1024]; + + if (tdb->read_only) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + if (ftruncate(tdb->fd, size+addition) == -1) { char b = 0; if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) { diff --git a/source4/lib/tdb/common/lock.c b/source4/lib/tdb/common/lock.c index 8061bab7b1..7a76bd3d9b 100644 --- a/source4/lib/tdb/common/lock.c +++ b/source4/lib/tdb/common/lock.c @@ -42,10 +42,12 @@ int tdb_brlock_len(struct tdb_context *tdb, tdb_off_t offset, struct flock fl; int ret; - if (tdb->flags & TDB_NOLOCK) + if (tdb->flags & TDB_NOLOCK) { return 0; + } + if ((rw_type == F_WRLCK) && (tdb->read_only)) { - errno = EACCES; + tdb->ecode = TDB_ERR_RDONLY; return -1; } @@ -64,8 +66,8 @@ int tdb_brlock_len(struct tdb_context *tdb, tdb_off_t offset, * EAGAIN is an expected return from non-blocking * locks. */ if (errno != EAGAIN) { - TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n", - tdb->fd, offset, rw_type, lck_type, + TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d len=%d: %s\n", + tdb->fd, offset, rw_type, lck_type, len, strerror(errno))); } else if (!probe && lck_type != F_SETLK) { /* Ensure error code is set for log fun to examine. */ diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c index c37d37a4f2..2e229e88cc 100644 --- a/source4/lib/tdb/common/tdb.c +++ b/source4/lib/tdb/common/tdb.c @@ -227,6 +227,11 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) char *p = NULL; int ret = 0; + if (tdb->read_only) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + /* find which hash bucket it is in */ hash = tdb->hash_fn(&key); if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) diff --git a/source4/lib/tdb/common/tdb_private.h b/source4/lib/tdb/common/tdb_private.h index eefcc52557..ba0a379e7f 100644 --- a/source4/lib/tdb/common/tdb_private.h +++ b/source4/lib/tdb/common/tdb_private.h @@ -162,6 +162,7 @@ struct tdb_traverse_lock { struct tdb_traverse_lock *next; u32 off; u32 hash; + int lock_rw; }; diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index b9d44a7283..75e91c56cc 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -372,6 +372,15 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } + if (tdb->travlocks.next != NULL) { + /* you cannot use transactions inside a traverse (although you can use + traverse inside a transaction) as otherwise you can end up with + deadlock */ + TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction within a traverse\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + tdb->transaction = calloc(sizeof(struct tdb_transaction), 1); if (tdb->transaction == NULL) { tdb->ecode = TDB_ERR_OOM; @@ -388,15 +397,9 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } - /* get a write lock from the freelist to the end of file. It - would be much better to make this a read lock as it would - increase parallelism, but it could lead to deadlocks on - commit when a write lock needs to be taken. - - TODO: look at alternative locking strategies to allow this - to be a read lock - */ - if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) { + /* get a read lock from the freelist to the end of file. This + is upgraded to a write lock during the commit */ + if (tdb_brlock_len(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) { TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get hash locks\n")); tdb->ecode = TDB_ERR_LOCK; goto fail; @@ -763,6 +766,14 @@ int tdb_transaction_commit(struct tdb_context *tdb) return -1; } + /* upgrade the main transaction lock region to a write lock */ + if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_start: failed to upgrade hash locks\n")); + tdb->ecode = TDB_ERR_LOCK; + tdb_transaction_cancel(tdb); + return -1; + } + /* get the global lock - this prevents new users attaching to the database during the commit */ if (tdb_brlock_len(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { diff --git a/source4/lib/tdb/common/traverse.c b/source4/lib/tdb/common/traverse.c index 7d1e99cbe8..335dce4152 100644 --- a/source4/lib/tdb/common/traverse.c +++ b/source4/lib/tdb/common/traverse.c @@ -71,7 +71,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc } } - if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1) + if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1) return -1; /* No previous record? Start at top of chain. */ @@ -118,7 +118,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc tdb_do_delete(tdb, current, rec) != 0) goto fail; } - tdb_unlock(tdb, tlock->hash, F_WRLCK); + tdb_unlock(tdb, tlock->hash, tlock->lock_rw); want_next = 0; } /* We finished iteration without finding anything */ @@ -126,7 +126,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc fail: tlock->off = 0; - if (tdb_unlock(tdb, tlock->hash, F_WRLCK) != 0) + if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0) TDB_LOG((tdb, 0, "tdb_next_lock: On error unlock failed!\n")); return -1; } @@ -136,32 +136,33 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc if fn is NULL then it is not called a non-zero return value from fn() indicates that the traversal should stop */ -int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *private) +static int tdb_traverse_internal(struct tdb_context *tdb, + tdb_traverse_func fn, void *private, + struct tdb_traverse_lock *tl) { TDB_DATA key, dbuf; struct list_struct rec; - struct tdb_traverse_lock tl = { NULL, 0, 0 }; int ret, count = 0; /* This was in the initializaton, above, but the IRIX compiler * did not like it. crh */ - tl.next = tdb->travlocks.next; + tl->next = tdb->travlocks.next; /* fcntl locks don't stack: beware traverse inside traverse */ - tdb->travlocks.next = &tl; + tdb->travlocks.next = tl; /* tdb_next_lock places locks on the record returned, and its chain */ - while ((ret = tdb_next_lock(tdb, &tl, &rec)) > 0) { + while ((ret = tdb_next_lock(tdb, tl, &rec)) > 0) { count++; /* now read the full record */ - key.dptr = tdb_alloc_read(tdb, tl.off + sizeof(rec), + key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec), rec.key_len + rec.data_len); if (!key.dptr) { ret = -1; - if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) + if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) goto out; - if (tdb_unlock_record(tdb, tl.off) != 0) + if (tdb_unlock_record(tdb, tl->off) != 0) TDB_LOG((tdb, 0, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n")); goto out; } @@ -170,31 +171,70 @@ int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *private) dbuf.dsize = rec.data_len; /* Drop chain lock, call out */ - if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) { + if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) { ret = -1; goto out; } if (fn && fn(tdb, key, dbuf, private)) { /* They want us to terminate traversal */ ret = count; - if (tdb_unlock_record(tdb, tl.off) != 0) { + if (tdb_unlock_record(tdb, tl->off) != 0) { TDB_LOG((tdb, 0, "tdb_traverse: unlock_record failed!\n"));; ret = -1; } - tdb->travlocks.next = tl.next; + tdb->travlocks.next = tl->next; SAFE_FREE(key.dptr); return count; } SAFE_FREE(key.dptr); } out: - tdb->travlocks.next = tl.next; + tdb->travlocks.next = tl->next; if (ret < 0) return -1; else return count; } + +/* + a write style traverse - temporarily marks the db read only +*/ +int tdb_traverse_read(struct tdb_context *tdb, + tdb_traverse_func fn, void *private) +{ + struct tdb_traverse_lock tl = { NULL, 0, 0, F_RDLCK }; + int ret, read_only = tdb->read_only; + tdb->read_only = 1; + ret = tdb_traverse_internal(tdb, fn, private, &tl); + tdb->read_only = read_only; + return ret; +} + +/* + a write style traverse - needs to get the transaction lock to + prevent deadlocks +*/ +int tdb_traverse(struct tdb_context *tdb, + tdb_traverse_func fn, void *private) +{ + struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK }; + int ret; + + if (tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0) == -1) { + TDB_LOG((tdb, 0, "tdb_traverse: failed to get transaction lock\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + + ret = tdb_traverse_internal(tdb, fn, private, &tl); + + tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0); + + return ret; +} + + /* find the first entry in the database and return its key */ TDB_DATA tdb_firstkey(struct tdb_context *tdb) { diff --git a/source4/lib/tdb/docs/README b/source4/lib/tdb/docs/README index 18b32de37f..b31ce36ab1 100644 --- a/source4/lib/tdb/docs/README +++ b/source4/lib/tdb/docs/README @@ -127,7 +127,25 @@ int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, if fn is NULL then it is not called - a non-zero return value from fn() indicates that the traversal should stop + a non-zero return value from fn() indicates that the traversal + should stop. Traversal callbacks may not start transactions. + +---------------------------------------------------------------------- +int tdb_traverse_read(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, + TDB_DATA key, TDB_DATA dbuf, void *state), void *state); + + traverse the entire database - calling fn(tdb, key, data, state) on + each element, but marking the database read only during the + traversal, so any write operations will fail. This allows tdb to + use read locks, which increases the parallelism possible during the + traversal. + + return -1 on error or the record count traversed + + if fn is NULL then it is not called + + a non-zero return value from fn() indicates that the traversal + should stop. Traversal callbacks may not start transactions. ---------------------------------------------------------------------- TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb); diff --git a/source4/lib/tdb/include/tdb.h b/source4/lib/tdb/include/tdb.h index 3f123d814c..c116f29fc1 100644 --- a/source4/lib/tdb/include/tdb.h +++ b/source4/lib/tdb/include/tdb.h @@ -52,7 +52,7 @@ extern "C" { /* error codes */ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, TDB_ERR_OOM, TDB_ERR_EXISTS, TDB_ERR_NOLOCK, TDB_ERR_LOCK_TIMEOUT, - TDB_ERR_NOEXIST, TDB_ERR_EINVAL}; + TDB_ERR_NOEXIST, TDB_ERR_EINVAL, TDB_ERR_RDONLY}; typedef struct TDB_DATA { unsigned char *dptr; @@ -98,6 +98,7 @@ int tdb_close(struct tdb_context *tdb); TDB_DATA tdb_firstkey(struct tdb_context *tdb); TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key); int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *); +int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *); int tdb_exists(struct tdb_context *tdb, TDB_DATA key); int tdb_lockall(struct tdb_context *tdb); void tdb_unlockall(struct tdb_context *tdb); diff --git a/source4/lib/tdb/tools/tdbtorture.c b/source4/lib/tdb/tools/tdbtorture.c index b0a2e7484f..c0076a92d4 100644 --- a/source4/lib/tdb/tools/tdbtorture.c +++ b/source4/lib/tdb/tools/tdbtorture.c @@ -39,6 +39,7 @@ #define TRANSACTION_PROB 10 #define LOCKSTORE_PROB 5 #define TRAVERSE_PROB 20 +#define TRAVERSE_READ_PROB 20 #define CULL_PROB 100 #define KEYLEN 3 #define DATALEN 100 @@ -192,6 +193,13 @@ static void addrec_db(void) } #endif +#if TRAVERSE_READ_PROB + if (random() % TRAVERSE_READ_PROB == 0) { + tdb_traverse_read(db, NULL, NULL); + goto next; + } +#endif + data = tdb_fetch(db, key); if (data.dptr) free(data.dptr); @@ -273,7 +281,7 @@ static void usage(void) addrec_db(); } - tdb_traverse(db, NULL, NULL); + tdb_traverse_read(db, NULL, NULL); tdb_traverse(db, traverse_fn, NULL); tdb_traverse(db, traverse_fn, NULL); |