summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source4/lib/tdb/common/error.c3
-rw-r--r--source4/lib/tdb/common/io.c11
-rw-r--r--source4/lib/tdb/common/lock.c10
-rw-r--r--source4/lib/tdb/common/tdb.c5
-rw-r--r--source4/lib/tdb/common/tdb_private.h1
-rw-r--r--source4/lib/tdb/common/transaction.c29
-rw-r--r--source4/lib/tdb/common/traverse.c70
-rw-r--r--source4/lib/tdb/docs/README20
-rw-r--r--source4/lib/tdb/include/tdb.h3
-rw-r--r--source4/lib/tdb/tools/tdbtorture.c10
10 files changed, 130 insertions, 32 deletions
diff --git a/source4/lib/tdb/common/error.c b/source4/lib/tdb/common/error.c
index 270a441463..4cf33a29ab 100644
--- a/source4/lib/tdb/common/error.c
+++ b/source4/lib/tdb/common/error.c
@@ -43,7 +43,8 @@ static struct tdb_errname {
{TDB_ERR_EXISTS, "Record exists"},
{TDB_ERR_NOLOCK, "Lock exists on other keys"},
{TDB_ERR_EINVAL, "Invalid parameter"},
- {TDB_ERR_NOEXIST, "Record does not exist"} };
+ {TDB_ERR_NOEXIST, "Record does not exist"},
+ {TDB_ERR_RDONLY, "write not permitted"} };
/* Error string for the last tdb error */
const char *tdb_errorstr(struct tdb_context *tdb)
diff --git a/source4/lib/tdb/common/io.c b/source4/lib/tdb/common/io.c
index f02cd1a0e1..c1f6f55bc4 100644
--- a/source4/lib/tdb/common/io.c
+++ b/source4/lib/tdb/common/io.c
@@ -97,6 +97,11 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
const void *buf, tdb_len_t len)
{
+ if (tdb->read_only) {
+ tdb->ecode = TDB_ERR_RDONLY;
+ return -1;
+ }
+
if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
return -1;
@@ -224,6 +229,12 @@ void tdb_mmap(struct tdb_context *tdb)
static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
{
char buf[1024];
+
+ if (tdb->read_only) {
+ tdb->ecode = TDB_ERR_RDONLY;
+ return -1;
+ }
+
if (ftruncate(tdb->fd, size+addition) == -1) {
char b = 0;
if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) {
diff --git a/source4/lib/tdb/common/lock.c b/source4/lib/tdb/common/lock.c
index 8061bab7b1..7a76bd3d9b 100644
--- a/source4/lib/tdb/common/lock.c
+++ b/source4/lib/tdb/common/lock.c
@@ -42,10 +42,12 @@ int tdb_brlock_len(struct tdb_context *tdb, tdb_off_t offset,
struct flock fl;
int ret;
- if (tdb->flags & TDB_NOLOCK)
+ if (tdb->flags & TDB_NOLOCK) {
return 0;
+ }
+
if ((rw_type == F_WRLCK) && (tdb->read_only)) {
- errno = EACCES;
+ tdb->ecode = TDB_ERR_RDONLY;
return -1;
}
@@ -64,8 +66,8 @@ int tdb_brlock_len(struct tdb_context *tdb, tdb_off_t offset,
* EAGAIN is an expected return from non-blocking
* locks. */
if (errno != EAGAIN) {
- TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n",
- tdb->fd, offset, rw_type, lck_type,
+ TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d len=%d: %s\n",
+ tdb->fd, offset, rw_type, lck_type, len,
strerror(errno)));
} else if (!probe && lck_type != F_SETLK) {
/* Ensure error code is set for log fun to examine. */
diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index c37d37a4f2..2e229e88cc 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -227,6 +227,11 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
char *p = NULL;
int ret = 0;
+ if (tdb->read_only) {
+ tdb->ecode = TDB_ERR_RDONLY;
+ return -1;
+ }
+
/* find which hash bucket it is in */
hash = tdb->hash_fn(&key);
if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
diff --git a/source4/lib/tdb/common/tdb_private.h b/source4/lib/tdb/common/tdb_private.h
index eefcc52557..ba0a379e7f 100644
--- a/source4/lib/tdb/common/tdb_private.h
+++ b/source4/lib/tdb/common/tdb_private.h
@@ -162,6 +162,7 @@ struct tdb_traverse_lock {
struct tdb_traverse_lock *next;
u32 off;
u32 hash;
+ int lock_rw;
};
diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c
index b9d44a7283..75e91c56cc 100644
--- a/source4/lib/tdb/common/transaction.c
+++ b/source4/lib/tdb/common/transaction.c
@@ -372,6 +372,15 @@ int tdb_transaction_start(struct tdb_context *tdb)
return -1;
}
+ if (tdb->travlocks.next != NULL) {
+ /* you cannot use transactions inside a traverse (although you can use
+ traverse inside a transaction) as otherwise you can end up with
+ deadlock */
+ TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction within a traverse\n"));
+ tdb->ecode = TDB_ERR_LOCK;
+ return -1;
+ }
+
tdb->transaction = calloc(sizeof(struct tdb_transaction), 1);
if (tdb->transaction == NULL) {
tdb->ecode = TDB_ERR_OOM;
@@ -388,15 +397,9 @@ int tdb_transaction_start(struct tdb_context *tdb)
return -1;
}
- /* get a write lock from the freelist to the end of file. It
- would be much better to make this a read lock as it would
- increase parallelism, but it could lead to deadlocks on
- commit when a write lock needs to be taken.
-
- TODO: look at alternative locking strategies to allow this
- to be a read lock
- */
- if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) {
+ /* get a read lock from the freelist to the end of file. This
+ is upgraded to a write lock during the commit */
+ if (tdb_brlock_len(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) {
TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get hash locks\n"));
tdb->ecode = TDB_ERR_LOCK;
goto fail;
@@ -763,6 +766,14 @@ int tdb_transaction_commit(struct tdb_context *tdb)
return -1;
}
+ /* upgrade the main transaction lock region to a write lock */
+ if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) {
+ TDB_LOG((tdb, 0, "tdb_transaction_start: failed to upgrade hash locks\n"));
+ tdb->ecode = TDB_ERR_LOCK;
+ tdb_transaction_cancel(tdb);
+ return -1;
+ }
+
/* get the global lock - this prevents new users attaching to the database
during the commit */
if (tdb_brlock_len(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
diff --git a/source4/lib/tdb/common/traverse.c b/source4/lib/tdb/common/traverse.c
index 7d1e99cbe8..335dce4152 100644
--- a/source4/lib/tdb/common/traverse.c
+++ b/source4/lib/tdb/common/traverse.c
@@ -71,7 +71,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
}
}
- if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)
+ if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
return -1;
/* No previous record? Start at top of chain. */
@@ -118,7 +118,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
tdb_do_delete(tdb, current, rec) != 0)
goto fail;
}
- tdb_unlock(tdb, tlock->hash, F_WRLCK);
+ tdb_unlock(tdb, tlock->hash, tlock->lock_rw);
want_next = 0;
}
/* We finished iteration without finding anything */
@@ -126,7 +126,7 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
fail:
tlock->off = 0;
- if (tdb_unlock(tdb, tlock->hash, F_WRLCK) != 0)
+ if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
TDB_LOG((tdb, 0, "tdb_next_lock: On error unlock failed!\n"));
return -1;
}
@@ -136,32 +136,33 @@ static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tloc
if fn is NULL then it is not called
a non-zero return value from fn() indicates that the traversal should stop
*/
-int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *private)
+static int tdb_traverse_internal(struct tdb_context *tdb,
+ tdb_traverse_func fn, void *private,
+ struct tdb_traverse_lock *tl)
{
TDB_DATA key, dbuf;
struct list_struct rec;
- struct tdb_traverse_lock tl = { NULL, 0, 0 };
int ret, count = 0;
/* This was in the initializaton, above, but the IRIX compiler
* did not like it. crh
*/
- tl.next = tdb->travlocks.next;
+ tl->next = tdb->travlocks.next;
/* fcntl locks don't stack: beware traverse inside traverse */
- tdb->travlocks.next = &tl;
+ tdb->travlocks.next = tl;
/* tdb_next_lock places locks on the record returned, and its chain */
- while ((ret = tdb_next_lock(tdb, &tl, &rec)) > 0) {
+ while ((ret = tdb_next_lock(tdb, tl, &rec)) > 0) {
count++;
/* now read the full record */
- key.dptr = tdb_alloc_read(tdb, tl.off + sizeof(rec),
+ key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec),
rec.key_len + rec.data_len);
if (!key.dptr) {
ret = -1;
- if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0)
+ if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0)
goto out;
- if (tdb_unlock_record(tdb, tl.off) != 0)
+ if (tdb_unlock_record(tdb, tl->off) != 0)
TDB_LOG((tdb, 0, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
goto out;
}
@@ -170,31 +171,70 @@ int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *private)
dbuf.dsize = rec.data_len;
/* Drop chain lock, call out */
- if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) {
+ if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
ret = -1;
goto out;
}
if (fn && fn(tdb, key, dbuf, private)) {
/* They want us to terminate traversal */
ret = count;
- if (tdb_unlock_record(tdb, tl.off) != 0) {
+ if (tdb_unlock_record(tdb, tl->off) != 0) {
TDB_LOG((tdb, 0, "tdb_traverse: unlock_record failed!\n"));;
ret = -1;
}
- tdb->travlocks.next = tl.next;
+ tdb->travlocks.next = tl->next;
SAFE_FREE(key.dptr);
return count;
}
SAFE_FREE(key.dptr);
}
out:
- tdb->travlocks.next = tl.next;
+ tdb->travlocks.next = tl->next;
if (ret < 0)
return -1;
else
return count;
}
+
+/*
+ a write style traverse - temporarily marks the db read only
+*/
+int tdb_traverse_read(struct tdb_context *tdb,
+ tdb_traverse_func fn, void *private)
+{
+ struct tdb_traverse_lock tl = { NULL, 0, 0, F_RDLCK };
+ int ret, read_only = tdb->read_only;
+ tdb->read_only = 1;
+ ret = tdb_traverse_internal(tdb, fn, private, &tl);
+ tdb->read_only = read_only;
+ return ret;
+}
+
+/*
+ a write style traverse - needs to get the transaction lock to
+ prevent deadlocks
+*/
+int tdb_traverse(struct tdb_context *tdb,
+ tdb_traverse_func fn, void *private)
+{
+ struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
+ int ret;
+
+ if (tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0) == -1) {
+ TDB_LOG((tdb, 0, "tdb_traverse: failed to get transaction lock\n"));
+ tdb->ecode = TDB_ERR_LOCK;
+ return -1;
+ }
+
+ ret = tdb_traverse_internal(tdb, fn, private, &tl);
+
+ tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0);
+
+ return ret;
+}
+
+
/* find the first entry in the database and return its key */
TDB_DATA tdb_firstkey(struct tdb_context *tdb)
{
diff --git a/source4/lib/tdb/docs/README b/source4/lib/tdb/docs/README
index 18b32de37f..b31ce36ab1 100644
--- a/source4/lib/tdb/docs/README
+++ b/source4/lib/tdb/docs/README
@@ -127,7 +127,25 @@ int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb,
if fn is NULL then it is not called
- a non-zero return value from fn() indicates that the traversal should stop
+ a non-zero return value from fn() indicates that the traversal
+ should stop. Traversal callbacks may not start transactions.
+
+----------------------------------------------------------------------
+int tdb_traverse_read(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb,
+ TDB_DATA key, TDB_DATA dbuf, void *state), void *state);
+
+ traverse the entire database - calling fn(tdb, key, data, state) on
+ each element, but marking the database read only during the
+ traversal, so any write operations will fail. This allows tdb to
+ use read locks, which increases the parallelism possible during the
+ traversal.
+
+ return -1 on error or the record count traversed
+
+ if fn is NULL then it is not called
+
+ a non-zero return value from fn() indicates that the traversal
+ should stop. Traversal callbacks may not start transactions.
----------------------------------------------------------------------
TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb);
diff --git a/source4/lib/tdb/include/tdb.h b/source4/lib/tdb/include/tdb.h
index 3f123d814c..c116f29fc1 100644
--- a/source4/lib/tdb/include/tdb.h
+++ b/source4/lib/tdb/include/tdb.h
@@ -52,7 +52,7 @@ extern "C" {
/* error codes */
enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK,
TDB_ERR_OOM, TDB_ERR_EXISTS, TDB_ERR_NOLOCK, TDB_ERR_LOCK_TIMEOUT,
- TDB_ERR_NOEXIST, TDB_ERR_EINVAL};
+ TDB_ERR_NOEXIST, TDB_ERR_EINVAL, TDB_ERR_RDONLY};
typedef struct TDB_DATA {
unsigned char *dptr;
@@ -98,6 +98,7 @@ int tdb_close(struct tdb_context *tdb);
TDB_DATA tdb_firstkey(struct tdb_context *tdb);
TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key);
int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *);
+int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *);
int tdb_exists(struct tdb_context *tdb, TDB_DATA key);
int tdb_lockall(struct tdb_context *tdb);
void tdb_unlockall(struct tdb_context *tdb);
diff --git a/source4/lib/tdb/tools/tdbtorture.c b/source4/lib/tdb/tools/tdbtorture.c
index b0a2e7484f..c0076a92d4 100644
--- a/source4/lib/tdb/tools/tdbtorture.c
+++ b/source4/lib/tdb/tools/tdbtorture.c
@@ -39,6 +39,7 @@
#define TRANSACTION_PROB 10
#define LOCKSTORE_PROB 5
#define TRAVERSE_PROB 20
+#define TRAVERSE_READ_PROB 20
#define CULL_PROB 100
#define KEYLEN 3
#define DATALEN 100
@@ -192,6 +193,13 @@ static void addrec_db(void)
}
#endif
+#if TRAVERSE_READ_PROB
+ if (random() % TRAVERSE_READ_PROB == 0) {
+ tdb_traverse_read(db, NULL, NULL);
+ goto next;
+ }
+#endif
+
data = tdb_fetch(db, key);
if (data.dptr) free(data.dptr);
@@ -273,7 +281,7 @@ static void usage(void)
addrec_db();
}
- tdb_traverse(db, NULL, NULL);
+ tdb_traverse_read(db, NULL, NULL);
tdb_traverse(db, traverse_fn, NULL);
tdb_traverse(db, traverse_fn, NULL);