From ede8415d61b6791114c65de1c283a4e8c11f1585 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 22 Sep 2005 03:56:41 +0000 Subject: r10405: added transactions into tdb, and hook them into ldb. See my samba-technical posting for more details on the transactions design. This also adds a number of command line arguments to tdbtorture, making it more flexible, and fixes some lock deadlock conditions in the tdbtorture code. (This used to be commit 06bd8abba942ec9f1e23f5c5d546cbb71ca3a701) --- source4/lib/tdb/common/transaction.c | 976 +++++++++++++++++++++++++++++++++++ 1 file changed, 976 insertions(+) create mode 100644 source4/lib/tdb/common/transaction.c (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c new file mode 100644 index 0000000000..b9d44a7283 --- /dev/null +++ b/source4/lib/tdb/common/transaction.c @@ -0,0 +1,976 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 2005 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "tdb_private.h" + +/* + transaction design: + + - only allow a single transaction at a time per database. This makes + using the transaction API simpler, as otherwise the caller would + have to cope with temporary failures in transactions that conflict + with other current transactions + + - keep the transaction recovery information in the same file as the + database, using a special 'transaction recovery' record pointed at + by the header. This removes the need for extra journal files as + used by some other databases + + - dymacially allocated the transaction recover record, re-using it + for subsequent transactions. If a larger record is needed then + tdb_free() the old record to place it on the normal tdb freelist + before allocating the new record + + - during transactions, keep a linked list of writes all that have + been performed by intercepting all tdb_write() calls. The hooked + transaction versions of tdb_read() and tdb_write() check this + linked list and try to use the elements of the list in preference + to the real database. + + - don't allow any locks to be held when a transaction starts, + otherwise we can end up with deadlock (plus lack of lock nesting + in posix locks would mean the lock is lost) + + - if the caller gains a lock during the transaction but doesn't + release it then fail the commit + + - allow for nested calls to tdb_transaction_start(), re-using the + existing transaction record. If the inner transaction is cancelled + then a subsequent commit will fail + + - keep a mirrored copy of the tdb hash chain heads to allow for the + fast hash heads scan on traverse, updating the mirrored copy in + the transaction version of tdb_write + + - allow callers to mix transaction and non-transaction use of tdb, + although once a transaction is started then an exclusive lock is + gained until the transaction is committed or cancelled + + - the commit stategy involves first saving away all modified data + into a linearised buffer in the transaction recovery area, then + marking the transaction recovery area with a magic value to + indicate a valid recovery record. In total 4 fsync/msync calls are + needed per commit to prevent race conditions. It might be possible + to reduce this to 3 or even 2 with some more work. + + - check for a valid recovery record on open of the tdb, while the + global lock is held. Automatically recover from the transaction + recovery area if needed, then continue with the open as + usual. This allows for smooth crash recovery with no administrator + intervention. + + - if TDB_NOSYNC is passed to flags in tdb_open then transactions are + still available, but no transaction recovery area is used and no + fsync/msync calls are made. + +*/ + + +/* + hold the context of any current transaction +*/ +struct tdb_transaction { + /* we keep a mirrored copy of the tdb hash heads here so + tdb_next_hash_chain() can operate efficiently */ + u32 *hash_heads; + + /* the original io methods - used to do IOs to the real db */ + const struct tdb_methods *io_methods; + + /* the list of transaction elements. We use a doubly linked + list with a last pointer to allow us to keep the list + ordered, with first element at the front of the list. It + needs to be doubly linked as the read/write traversals need + to be backwards, while the commit needs to be forwards */ + struct tdb_transaction_el { + struct tdb_transaction_el *next, *prev; + tdb_off_t offset; + tdb_len_t length; + unsigned char *data; + } *elements, *elements_last; + + /* non-zero when an internal transaction error has + occurred. All write operations will then fail until the + transaction is ended */ + int transaction_error; + + /* when inside a transaction we need to keep track of any + nested tdb_transaction_start() calls, as these are allowed, + but don't create a new transaction */ + int nesting; + + /* old file size before transaction */ + tdb_len_t old_map_size; +}; + + +/* + read while in a transaction. We need to check first if the data is in our list + of transaction elements, then if not do a real read +*/ +static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, + tdb_len_t len, int cv) +{ + struct tdb_transaction_el *el; + + /* we need to walk the list backwards to get the most recent data */ + for (el=tdb->transaction->elements_last;el;el=el->prev) { + tdb_len_t partial; + + if (off+len <= el->offset) { + continue; + } + if (off >= el->offset + el->length) { + continue; + } + + /* an overlapping read - needs to be split into up to + 2 reads and a memcpy */ + if (off < el->offset) { + partial = el->offset - off; + if (transaction_read(tdb, off, buf, partial, cv) != 0) { + goto fail; + } + len -= partial; + off += partial; + buf = (void *)(partial + (char *)buf); + } + if (off + len <= el->offset + el->length) { + partial = len; + } else { + partial = el->offset + el->length - off; + } + memcpy(buf, el->data + (off - el->offset), partial); + if (cv) { + tdb_convert(buf, len); + } + len -= partial; + off += partial; + buf = (void *)(partial + (char *)buf); + + if (len != 0 && transaction_read(tdb, off, buf, len, cv) != 0) { + goto fail; + } + + return 0; + } + + /* its not in the transaction elements - do a real read */ + return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv); + +fail: + TDB_LOG((tdb, 0, "transaction_read: failed at off=%d len=%d\n", off, len)); + tdb->ecode = TDB_ERR_IO; + tdb->transaction->transaction_error = 1; + return -1; +} + + +/* + write while in a transaction +*/ +static int transaction_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + struct tdb_transaction_el *el; + + /* if the write is to a hash head, then update the transaction + hash heads */ + if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP && + off < FREELIST_TOP+TDB_HASHTABLE_SIZE(tdb)) { + u32 chain = (off-FREELIST_TOP) / sizeof(tdb_off_t); + memcpy(&tdb->transaction->hash_heads[chain], buf, len); + } + + /* first see if we can replace an existing entry */ + for (el=tdb->transaction->elements_last;el;el=el->prev) { + tdb_len_t partial; + + if (off+len <= el->offset) { + continue; + } + if (off >= el->offset + el->length) { + continue; + } + + /* an overlapping write - needs to be split into up to + 2 writes and a memcpy */ + if (off < el->offset) { + partial = el->offset - off; + if (transaction_write(tdb, off, buf, partial) != 0) { + goto fail; + } + len -= partial; + off += partial; + buf = (const void *)(partial + (const char *)buf); + } + if (off + len <= el->offset + el->length) { + partial = len; + } else { + partial = el->offset + el->length - off; + } + memcpy(el->data + (off - el->offset), buf, partial); + len -= partial; + off += partial; + buf = (const void *)(partial + (const char *)buf); + + if (len != 0 && transaction_write(tdb, off, buf, len) != 0) { + goto fail; + } + + return 0; + } + + /* add a new entry at the end of the list */ + el = malloc(sizeof(*el)); + if (el == NULL) { + tdb->ecode = TDB_ERR_OOM; + tdb->transaction->transaction_error = 1; + return -1; + } + el->next = NULL; + el->prev = tdb->transaction->elements_last; + el->offset = off; + el->length = len; + el->data = malloc(len); + if (el->data == NULL) { + free(el); + tdb->ecode = TDB_ERR_OOM; + tdb->transaction->transaction_error = 1; + return -1; + } + if (buf) { + memcpy(el->data, buf, len); + } else { + memset(el->data, TDB_PAD_BYTE, len); + } + if (el->prev) { + el->prev->next = el; + } else { + tdb->transaction->elements = el; + } + tdb->transaction->elements_last = el; + return 0; + +fail: + TDB_LOG((tdb, 0, "transaction_write: failed at off=%d len=%d\n", off, len)); + tdb->ecode = TDB_ERR_IO; + tdb->transaction->transaction_error = 1; + return -1; +} + +/* + accelerated hash chain head search, using the cached hash heads +*/ +static void transaction_next_hash_chain(struct tdb_context *tdb, u32 *chain) +{ + u32 h = *chain; + for (;h < tdb->header.hash_size;h++) { + /* the +1 takes account of the freelist */ + if (0 != tdb->transaction->hash_heads[h+1]) { + break; + } + } + (*chain) = h; +} + +/* + out of bounds check during a transaction +*/ +static int transaction_oob(struct tdb_context *tdb, tdb_off_t len, int probe) +{ + if (len <= tdb->map_size) { + return 0; + } + return TDB_ERRCODE(TDB_ERR_IO, -1); +} + +/* + transaction version of tdb_expand(). +*/ +static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size, + tdb_off_t addition) +{ + /* add a write to the transaction elements, so subsequent + reads see the zero data */ + if (transaction_write(tdb, size, NULL, addition) != 0) { + return -1; + } + + return 0; +} + +/* + brlock during a transaction - ignore them +*/ +int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, + int rw_type, int lck_type, int probe) +{ + return 0; +} + +static const struct tdb_methods transaction_methods = { + .tdb_read = transaction_read, + .tdb_write = transaction_write, + .next_hash_chain = transaction_next_hash_chain, + .tdb_oob = transaction_oob, + .tdb_expand_file = transaction_expand_file, + .tdb_brlock = transaction_brlock +}; + + +/* + start a tdb transaction. No token is returned, as only a single + transaction is allowed to be pending per tdb_context +*/ +int tdb_transaction_start(struct tdb_context *tdb) +{ + /* some sanity checks */ + if (tdb->read_only || (tdb->flags & TDB_INTERNAL)) { + TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); + tdb->ecode = TDB_ERR_EINVAL; + return -1; + } + + /* cope with nested tdb_transaction_start() calls */ + if (tdb->transaction != NULL) { + tdb->transaction->nesting++; + TDB_LOG((tdb, 0, "tdb_transaction_start: nesting %d\n", + tdb->transaction->nesting)); + return 0; + } + + if (tdb->num_locks != 0) { + /* the caller must not have any locks when starting a + transaction as otherwise we'll be screwed by lack + of nested locks in posix */ + TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction with locks held\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + + tdb->transaction = calloc(sizeof(struct tdb_transaction), 1); + if (tdb->transaction == NULL) { + tdb->ecode = TDB_ERR_OOM; + return -1; + } + + /* get the transaction write lock. This is a blocking lock. As + discussed with Volker, there are a number of ways we could + make this async, which we will probably do in the future */ + if (tdb_brlock_len(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get transaction lock\n")); + tdb->ecode = TDB_ERR_LOCK; + SAFE_FREE(tdb->transaction); + return -1; + } + + /* get a write lock from the freelist to the end of file. It + would be much better to make this a read lock as it would + increase parallelism, but it could lead to deadlocks on + commit when a write lock needs to be taken. + + TODO: look at alternative locking strategies to allow this + to be a read lock + */ + if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get hash locks\n")); + tdb->ecode = TDB_ERR_LOCK; + goto fail; + } + + /* setup a copy of the hash table heads so the hash scan in + traverse can be fast */ + tdb->transaction->hash_heads = calloc(tdb->header.hash_size+1, sizeof(tdb_off_t)); + if (tdb->transaction->hash_heads == NULL) { + tdb->ecode = TDB_ERR_OOM; + goto fail; + } + if (tdb->methods->tdb_read(tdb, FREELIST_TOP, tdb->transaction->hash_heads, + TDB_HASHTABLE_SIZE(tdb), 0) != 0) { + TDB_LOG((tdb, 0, "tdb_transaction_start: failed to read hash heads\n")); + tdb->ecode = TDB_ERR_IO; + goto fail; + } + + /* make sure we know about any file expansions already done by + anyone else */ + tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); + tdb->transaction->old_map_size = tdb->map_size; + + /* finally hook the io methods, replacing them with + transaction specific methods */ + tdb->transaction->io_methods = tdb->methods; + tdb->methods = &transaction_methods; + + return 0; + +fail: + tdb_brlock_len(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); + tdb_brlock_len(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + SAFE_FREE(tdb->transaction->hash_heads); + SAFE_FREE(tdb->transaction); + return -1; +} + + +/* + cancel the current transaction +*/ +int tdb_transaction_cancel(struct tdb_context *tdb) +{ + if (tdb->transaction == NULL) { + TDB_LOG((tdb, 0, "tdb_transaction_cancel: no transaction\n")); + return -1; + } + + if (tdb->transaction->nesting != 0) { + tdb->transaction->transaction_error = 1; + tdb->transaction->nesting--; + return 0; + } + + tdb->map_size = tdb->transaction->old_map_size; + + /* free all the transaction elements */ + while (tdb->transaction->elements) { + struct tdb_transaction_el *el = tdb->transaction->elements; + tdb->transaction->elements = el->next; + free(el->data); + free(el); + } + + /* remove any locks created during the transaction */ + if (tdb->num_locks != 0) { + int h; + for (h=0;hheader.hash_size+1;h++) { + if (tdb->locked[h].count != 0) { + tdb_brlock_len(tdb,FREELIST_TOP+4*h,F_UNLCK,F_SETLKW, 0, 1); + tdb->locked[h].count = 0; + } + } + tdb->num_locks = 0; + } + + /* restore the normal io methods */ + tdb->methods = tdb->transaction->io_methods; + + tdb_brlock_len(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); + tdb_brlock_len(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + SAFE_FREE(tdb->transaction->hash_heads); + SAFE_FREE(tdb->transaction); + + return 0; +} + +/* + sync to disk +*/ +static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length) +{ + if (fsync(tdb->fd) != 0) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, 0, "tdb_transaction: fsync failed\n")); + return -1; + } +#ifdef MS_SYNC + if (tdb->map_ptr) { + tdb_off_t moffset = offset & ~(tdb->page_size-1); + if (msync(moffset + (char *)tdb->map_ptr, + length + (offset - moffset), MS_SYNC) != 0) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, 0, "tdb_transaction: msync failed\n")); + return -1; + } + } +#endif + return 0; +} + + +/* + work out how much space the linearised recovery data will consume +*/ +static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) +{ + struct tdb_transaction_el *el; + tdb_len_t recovery_size = 0; + + recovery_size = sizeof(u32); + for (el=tdb->transaction->elements;el;el=el->next) { + if (el->offset >= tdb->transaction->old_map_size) { + continue; + } + recovery_size += 2*sizeof(tdb_off_t) + el->length; + } + + return recovery_size; +} + +/* + allocate the recovery area, or use an existing recovery area if it is + large enough +*/ +static int tdb_recovery_allocate(struct tdb_context *tdb, + tdb_len_t *recovery_size, + tdb_off_t *recovery_offset, + tdb_len_t *recovery_max_size) +{ + struct list_struct rec; + const struct tdb_methods *methods = tdb->transaction->io_methods; + tdb_off_t recovery_head; + + if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { + TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to read recovery head\n")); + return -1; + } + + rec.rec_len = 0; + + if (recovery_head != 0 && + methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) { + TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to read recovery record\n")); + return -1; + } + + *recovery_size = tdb_recovery_size(tdb); + + if (recovery_head != 0 && *recovery_size <= rec.rec_len) { + /* it fits in the existing area */ + *recovery_max_size = rec.rec_len; + *recovery_offset = recovery_head; + return 0; + } + + /* we need to free up the old recovery area, then allocate a + new one at the end of the file. Note that we cannot use + tdb_allocate() to allocate the new one as that might return + us an area that is being currently used (as of the start of + the transaction) */ + if (recovery_head != 0) { + if (tdb_free(tdb, recovery_head, &rec) == -1) { + TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to free previous recovery area\n")); + return -1; + } + } + + /* the tdb_free() call might have increased the recovery size */ + *recovery_size = tdb_recovery_size(tdb); + + /* round up to a multiple of page size */ + *recovery_max_size = TDB_ALIGN(sizeof(rec) + *recovery_size, tdb->page_size) - sizeof(rec); + *recovery_offset = tdb->map_size; + recovery_head = *recovery_offset; + + if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, + (tdb->map_size - tdb->transaction->old_map_size) + + sizeof(rec) + *recovery_max_size) == -1) { + TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to create recovery area\n")); + return -1; + } + + /* remap the file (if using mmap) */ + methods->tdb_oob(tdb, tdb->map_size + 1, 1); + + /* we have to reset the old map size so that we don't try to expand the file + again in the transaction commit, which would destroy the recovery area */ + tdb->transaction->old_map_size = tdb->map_size; + + /* write the recovery header offset and sync - we can sync without a race here + as the magic ptr in the recovery record has not been set */ + CONVERT(recovery_head); + if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD, + &recovery_head, sizeof(tdb_off_t)) == -1) { + TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to write recovery head\n")); + return -1; + } + + return 0; +} + + +/* + setup the recovery data that will be used on a crash during commit +*/ +static int transaction_setup_recovery(struct tdb_context *tdb, + tdb_off_t *magic_offset) +{ + struct tdb_transaction_el *el; + tdb_len_t recovery_size; + unsigned char *data, *p; + const struct tdb_methods *methods = tdb->transaction->io_methods; + struct list_struct *rec; + tdb_off_t recovery_offset, recovery_max_size; + tdb_off_t old_map_size = tdb->transaction->old_map_size; + u32 magic; + + /* + check that the recovery area has enough space + */ + if (tdb_recovery_allocate(tdb, &recovery_size, + &recovery_offset, &recovery_max_size) == -1) { + return -1; + } + + data = malloc(recovery_size + sizeof(*rec)); + if (data == NULL) { + tdb->ecode = TDB_ERR_OOM; + return -1; + } + + rec = (struct list_struct *)data; + memset(rec, 0, sizeof(*rec)); + + rec->magic = 0; + rec->data_len = recovery_size; + rec->rec_len = recovery_max_size; + rec->key_len = old_map_size; + CONVERT(rec); + + /* build the recovery data into a single blob to allow us to do a single + large write, which should be more efficient */ + p = data + sizeof(*rec); + for (el=tdb->transaction->elements;el;el=el->next) { + if (el->offset >= old_map_size) { + continue; + } + if (el->offset + el->length > tdb->transaction->old_map_size) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: transaction data over new region boundary\n")); + free(data); + tdb->ecode = TDB_ERR_CORRUPT; + return -1; + } + ((u32 *)p)[0] = el->offset; + ((u32 *)p)[1] = el->length; + if (DOCONV()) { + tdb_convert(p, 8); + } + /* the recovery area contains the old data, not the + new data, so we have to call the original tdb_read + method to get it */ + if (methods->tdb_read(tdb, el->offset, p + 8, el->length, 0) != 0) { + free(data); + tdb->ecode = TDB_ERR_IO; + return -1; + } + p += 8 + el->length; + } + + /* and the tailer */ + *(u32 *)p = sizeof(*rec) + recovery_max_size; + CONVERT(p); + + /* write the recovery data to the recovery area */ + if (methods->tdb_write(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to write recovery data\n")); + free(data); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* as we don't have ordered writes, we have to sync the recovery + data before we update the magic to indicate that the recovery + data is present */ + if (transaction_sync(tdb, recovery_offset, sizeof(*rec) + recovery_size) == -1) { + free(data); + return -1; + } + + free(data); + + magic = TDB_RECOVERY_MAGIC; + CONVERT(magic); + + *magic_offset = recovery_offset + offsetof(struct list_struct, magic); + + if (methods->tdb_write(tdb, *magic_offset, &magic, sizeof(magic)) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to write recovery magic\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* ensure the recovery magic marker is on disk */ + if (transaction_sync(tdb, *magic_offset, sizeof(magic)) == -1) { + return -1; + } + + return 0; +} + +/* + commit the current transaction +*/ +int tdb_transaction_commit(struct tdb_context *tdb) +{ + const struct tdb_methods *methods; + tdb_off_t magic_offset; + u32 zero = 0; + + if (tdb->transaction == NULL) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: no transaction\n")); + return -1; + } + + if (tdb->transaction->transaction_error) { + tdb->ecode = TDB_ERR_IO; + tdb_transaction_cancel(tdb); + TDB_LOG((tdb, 0, "tdb_transaction_commit: transaction error pending\n")); + return -1; + } + + if (tdb->transaction->nesting != 0) { + tdb->transaction->nesting--; + return 0; + } + + /* check for a null transaction */ + if (tdb->transaction->elements == NULL) { + tdb_transaction_cancel(tdb); + return 0; + } + + methods = tdb->transaction->io_methods; + + /* if there are any locks pending then the caller has not + nested their locks properly, so fail the transaction */ + if (tdb->num_locks) { + tdb->ecode = TDB_ERR_LOCK; + TDB_LOG((tdb, 0, "tdb_transaction_commit: locks pending on commit\n")); + tdb_transaction_cancel(tdb); + return -1; + } + + /* get the global lock - this prevents new users attaching to the database + during the commit */ + if (tdb_brlock_len(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to get global lock\n")); + tdb->ecode = TDB_ERR_LOCK; + tdb_transaction_cancel(tdb); + return -1; + } + + if (!(tdb->flags & TDB_NOSYNC)) { + /* write the recovery data to the end of the file */ + if (transaction_setup_recovery(tdb, &magic_offset) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to setup recovery data\n")); + tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_transaction_cancel(tdb); + return -1; + } + } + + /* expand the file to the new size if needed */ + if (tdb->map_size != tdb->transaction->old_map_size) { + if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, + tdb->map_size - + tdb->transaction->old_map_size) == -1) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, 0, "tdb_transaction_commit: expansion failed\n")); + tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_transaction_cancel(tdb); + return -1; + } + tdb->map_size = tdb->transaction->old_map_size; + methods->tdb_oob(tdb, tdb->map_size + 1, 1); + } + + /* perform all the writes */ + while (tdb->transaction->elements) { + struct tdb_transaction_el *el = tdb->transaction->elements; + + if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: write failed during commit\n")); + + /* we've overwritten part of the data and + possibly expanded the file, so we need to + run the crash recovery code */ + tdb->methods = methods; + tdb_transaction_recover(tdb); + + tdb_transaction_cancel(tdb); + tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + + TDB_LOG((tdb, 0, "tdb_transaction_commit: write failed\n")); + return -1; + } + tdb->transaction->elements = el->next; + free(el->data); + free(el); + } + + if (!(tdb->flags & TDB_NOSYNC)) { + /* ensure the new data is on disk */ + if (transaction_sync(tdb, 0, tdb->map_size) == -1) { + return -1; + } + + /* remove the recovery marker */ + if (methods->tdb_write(tdb, magic_offset, &zero, 4) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to remove recovery magic\n")); + return -1; + } + + /* ensure the recovery marker has been removed on disk */ + if (transaction_sync(tdb, magic_offset, 4) == -1) { + return -1; + } + } + + tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + + /* use a transaction cancel to free memory and remove the + transaction locks */ + tdb_transaction_cancel(tdb); + return 0; +} + + +/* + recover from an aborted transaction. Must be called with exclusive + database write access already established (including the global + lock to prevent new processes attaching) +*/ +int tdb_transaction_recover(struct tdb_context *tdb) +{ + tdb_off_t recovery_head, recovery_eof; + unsigned char *data, *p; + u32 zero = 0; + struct list_struct rec; + + /* find the recovery area */ + if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to read recovery head\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + if (recovery_head == 0) { + /* we have never allocated a recovery record */ + return 0; + } + + /* read the recovery record */ + if (tdb->methods->tdb_read(tdb, recovery_head, &rec, + sizeof(rec), DOCONV()) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to read recovery record\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + if (rec.magic != TDB_RECOVERY_MAGIC) { + /* there is no valid recovery data */ + return 0; + } + + if (tdb->read_only) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: attempt to recover read only database\n")); + tdb->ecode = TDB_ERR_CORRUPT; + return -1; + } + + recovery_eof = rec.key_len; + + data = malloc(rec.data_len); + if (data == NULL) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to allocate recovery data\n")); + tdb->ecode = TDB_ERR_OOM; + return -1; + } + + /* read the full recovery data */ + if (tdb->methods->tdb_read(tdb, recovery_head + sizeof(rec), data, + rec.data_len, 0) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to read recovery data\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* recover the file data */ + p = data; + while (p+8 < data + rec.data_len) { + u32 ofs, len; + if (DOCONV()) { + tdb_convert(p, 8); + } + ofs = ((u32 *)p)[0]; + len = ((u32 *)p)[1]; + + if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) { + free(data); + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to recover %d bytes at offset %d\n", len, ofs)); + tdb->ecode = TDB_ERR_IO; + return -1; + } + p += 8 + len; + } + + free(data); + + if (transaction_sync(tdb, 0, tdb->map_size) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to sync recovery\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* if the recovery area is after the recovered eof then remove it */ + if (recovery_eof <= recovery_head) { + if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &zero) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to remove recovery head\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + } + + /* remove the recovery magic */ + if (tdb_ofs_write(tdb, recovery_head + offsetof(struct list_struct, magic), + &zero) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to remove recovery magic\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + /* reduce the file size to the old size */ + tdb_munmap(tdb); + if (ftruncate(tdb->fd, recovery_eof) != 0) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to reduce to recovery size\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + tdb->map_size = recovery_eof; + tdb_mmap(tdb); + + if (transaction_sync(tdb, 0, recovery_eof) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to sync2 recovery\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } + + TDB_LOG((tdb, 0, "tdb_transaction_recover: recovered %d byte database\n", + recovery_eof)); + + /* all done */ + return 0; +} -- cgit From bd310b792509f7305d7dc029eb4bec109322a4bf Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 22 Sep 2005 13:12:46 +0000 Subject: r10421: following on discussions with simo, I have worked out a way of allowing searches to proceed while another process is in a transaction, then only upgrading the transaction lock to a write lock on commit. The solution is: - split tdb_traverse() into two calls, called tdb_traverse() and tdb_traverse_read(). The _read() version only gets read locks, and will fail any write operations made in the callback from the traverse. - the normal tdb_traverse() call allows for read or write operations in the callback, but gets the transaction lock, preventing transastions from starting inside the traverse In addition we enforce the following rule that you may not start a transaction within a traverse callback, although you can start a traverse within a transaction With these rules in place I believe all the deadlock possibilities are removed, and we can now allow for searches to happen in parallel with transactions (This used to be commit 7dd31288a701d772e45b1960ac4ce4cc1be782ed) --- source4/lib/tdb/common/transaction.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index b9d44a7283..75e91c56cc 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -372,6 +372,15 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } + if (tdb->travlocks.next != NULL) { + /* you cannot use transactions inside a traverse (although you can use + traverse inside a transaction) as otherwise you can end up with + deadlock */ + TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction within a traverse\n")); + tdb->ecode = TDB_ERR_LOCK; + return -1; + } + tdb->transaction = calloc(sizeof(struct tdb_transaction), 1); if (tdb->transaction == NULL) { tdb->ecode = TDB_ERR_OOM; @@ -388,15 +397,9 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } - /* get a write lock from the freelist to the end of file. It - would be much better to make this a read lock as it would - increase parallelism, but it could lead to deadlocks on - commit when a write lock needs to be taken. - - TODO: look at alternative locking strategies to allow this - to be a read lock - */ - if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) { + /* get a read lock from the freelist to the end of file. This + is upgraded to a write lock during the commit */ + if (tdb_brlock_len(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) { TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get hash locks\n")); tdb->ecode = TDB_ERR_LOCK; goto fail; @@ -763,6 +766,14 @@ int tdb_transaction_commit(struct tdb_context *tdb) return -1; } + /* upgrade the main transaction lock region to a write lock */ + if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) { + TDB_LOG((tdb, 0, "tdb_transaction_start: failed to upgrade hash locks\n")); + tdb->ecode = TDB_ERR_LOCK; + tdb_transaction_cancel(tdb); + return -1; + } + /* get the global lock - this prevents new users attaching to the database during the commit */ if (tdb_brlock_len(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { -- cgit From c0e6586405d2ceb8af9b4252ced83dea931ab8aa Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 24 Sep 2005 02:37:22 +0000 Subject: r10460: fixed portability of transaction code to systems with integer alignment constraints (like sparc) (This used to be commit bce35ad237a55376b6af98416eec92a7c4b422a6) --- source4/lib/tdb/common/transaction.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 75e91c56cc..5f281ca4da 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -628,7 +628,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, struct list_struct *rec; tdb_off_t recovery_offset, recovery_max_size; tdb_off_t old_map_size = tdb->transaction->old_map_size; - u32 magic; + u32 magic, tailer; /* check that the recovery area has enough space @@ -666,8 +666,8 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb->ecode = TDB_ERR_CORRUPT; return -1; } - ((u32 *)p)[0] = el->offset; - ((u32 *)p)[1] = el->length; + memcpy(p, &el->offset, 4); + memcpy(p+4, &el->length, 4); if (DOCONV()) { tdb_convert(p, 8); } @@ -683,7 +683,8 @@ static int transaction_setup_recovery(struct tdb_context *tdb, } /* and the tailer */ - *(u32 *)p = sizeof(*rec) + recovery_max_size; + tailer = sizeof(*rec) + recovery_max_size; + memcpy(p, &tailer, 4); CONVERT(p); /* write the recovery data to the recovery area */ @@ -926,8 +927,8 @@ int tdb_transaction_recover(struct tdb_context *tdb) if (DOCONV()) { tdb_convert(p, 8); } - ofs = ((u32 *)p)[0]; - len = ((u32 *)p)[1]; + memcpy(&ofs, p, 4); + memcpy(&len, p+4, 4); if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) { free(data); -- cgit From 5860aef9cd53da572bef1b86a62a3a5e86da84b0 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 24 Sep 2005 03:43:02 +0000 Subject: r10465: separate out a read_only db from a read-only traversal to ensure we don't end up doing a mmap read only (This used to be commit 294ccfd46a0c4e1af9365d028acdabec03c41ad3) --- source4/lib/tdb/common/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 5f281ca4da..a01f8307e9 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -349,7 +349,7 @@ static const struct tdb_methods transaction_methods = { int tdb_transaction_start(struct tdb_context *tdb) { /* some sanity checks */ - if (tdb->read_only || (tdb->flags & TDB_INTERNAL)) { + if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); tdb->ecode = TDB_ERR_EINVAL; return -1; -- cgit From 3bcfc68e0d158b272e0ef87bf585f79a570136ba Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 24 Sep 2005 06:49:28 +0000 Subject: r10468: - terminate tdbtorture quickly when an error is detected - more workarounds for aix not handling malloc of size 0 (This used to be commit c2b1739c6389503854f55fa8407c55071781eff4) --- source4/lib/tdb/common/transaction.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index a01f8307e9..73c0587a40 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -196,6 +196,10 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { struct tdb_transaction_el *el; + + if (len == 0) { + return 0; + } /* if the write is to a hash head, then update the transaction hash heads */ -- cgit From ddf733fc58f29d7315045d6d886ebebe32fd4ee7 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 24 Sep 2005 06:53:24 +0000 Subject: r10469: use the older style of structure initialisation for tdb to make it more portable. tdb is used in more than just Samba4, and I think the portability/readability balance is a bit different (This used to be commit fc692dc61f06d61cb9126d2a8ccc240cecd11da6) --- source4/lib/tdb/common/transaction.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 73c0587a40..fdcba110c2 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -337,12 +337,12 @@ int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, } static const struct tdb_methods transaction_methods = { - .tdb_read = transaction_read, - .tdb_write = transaction_write, - .next_hash_chain = transaction_next_hash_chain, - .tdb_oob = transaction_oob, - .tdb_expand_file = transaction_expand_file, - .tdb_brlock = transaction_brlock + transaction_read, + transaction_write, + transaction_next_hash_chain, + transaction_oob, + transaction_expand_file, + transaction_brlock }; -- cgit From 6e99b959dedcd8da444fca04832011ffa744b619 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 24 Sep 2005 23:52:30 +0000 Subject: r10483: fixed some uninitialised variables warnings (This used to be commit 315653cf1ecc11c435ee35b20e6cf9c73a67fa68) --- source4/lib/tdb/common/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index fdcba110c2..cf0f378dce 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -734,7 +734,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, int tdb_transaction_commit(struct tdb_context *tdb) { const struct tdb_methods *methods; - tdb_off_t magic_offset; + tdb_off_t magic_offset = 0; u32 zero = 0; if (tdb->transaction == NULL) { -- cgit From be27946d84dbf9155d9505d00abe91089a9b8125 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 26 Sep 2005 01:12:12 +0000 Subject: r10492: work around a bug in solaris which cases lock upgrades to fail with EDEADLK even when progress can be made. This is not a good solution, but I can't find anything better. (This used to be commit 980dd17f7d0a622cd772afc9ba15e50007ad9c6e) --- source4/lib/tdb/common/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index cf0f378dce..9d37046116 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -772,7 +772,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) } /* upgrade the main transaction lock region to a write lock */ - if (tdb_brlock_len(tdb, FREELIST_TOP, F_WRLCK, F_SETLKW, 0, 0) == -1) { + if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) { TDB_LOG((tdb, 0, "tdb_transaction_start: failed to upgrade hash locks\n")); tdb->ecode = TDB_ERR_LOCK; tdb_transaction_cancel(tdb); -- cgit From 5d1bc4fe31192503621cce704d24c0c5e11d24fc Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 11 Oct 2005 05:01:52 +0000 Subject: r10891: I noticed that the secrets.db was not being backed up on my system due to msync/mmap not changing the mtime of the file. This patch ensures that for successfully completed transactions we update the mtime. I don't do this on all tdb writes as its too expensive, but doing it just on transactions is bearable, as those cost quite a lot anyway. (This used to be commit b2934732dd62f705f59c124f19460c5436a9a422) --- source4/lib/tdb/common/transaction.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 9d37046116..becc6cd059 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -857,6 +857,15 @@ int tdb_transaction_commit(struct tdb_context *tdb) tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + /* on some systems (like Linux 2.6.x) changes via mmap/msync + don't change the mtime of the file, this means the file may + not be backed up (as tdb rounding to block sizes means that + file size changes are quite rare too). The following forces + mtime changes when a transaction completes */ +#ifdef HAVE_UTIME + utime(tdb->name, NULL); +#endif + /* use a transaction cancel to free memory and remove the transaction locks */ tdb_transaction_cancel(tdb); -- cgit From b9e07af8227a59932e0da1dda1749d490eba958c Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 1 Feb 2006 10:50:26 +0000 Subject: r13283: added two optimisations to the tdb transactions code. The first is to more agressively coalesce entries in the linked list of the undo log. The second is to ensure that writes during a transaction into the hash table don't cause the size of the undo log linked list to grow. These optimisations don't affect Samba much, but they make a huge difference to the use of ldb in kde (This used to be commit a37d9434d1fa181fd3d060ad032ee4ec5135fc52) --- source4/lib/tdb/common/transaction.c | 37 +++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index becc6cd059..ace4aa51a6 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -195,7 +195,7 @@ fail: static int transaction_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { - struct tdb_transaction_el *el; + struct tdb_transaction_el *el, *best_el=NULL; if (len == 0) { return 0; @@ -213,6 +213,10 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, for (el=tdb->transaction->elements_last;el;el=el->prev) { tdb_len_t partial; + if (best_el == NULL && off == el->offset+el->length) { + best_el = el; + } + if (off+len <= el->offset) { continue; } @@ -248,6 +252,28 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, return 0; } + /* see if we can append the new entry to an existing entry */ + if (best_el && best_el->offset + best_el->length == off && + (off+len < tdb->transaction->old_map_size || + off > tdb->transaction->old_map_size)) { + unsigned char *data = best_el->data; + el = best_el; + el->data = realloc(el->data, el->length + len); + if (el->data == NULL) { + tdb->ecode = TDB_ERR_OOM; + tdb->transaction->transaction_error = 1; + el->data = data; + return -1; + } + if (buf) { + memcpy(el->data + el->length, buf, len); + } else { + memset(el->data + el->length, TDB_PAD_BYTE, len); + } + el->length += len; + return 0; + } + /* add a new entry at the end of the list */ el = malloc(sizeof(*el)); if (el == NULL) { @@ -433,6 +459,15 @@ int tdb_transaction_start(struct tdb_context *tdb) tdb->transaction->io_methods = tdb->methods; tdb->methods = &transaction_methods; + /* by calling this transaction write here, we ensure that we don't grow the + transaction linked list due to hash table updates */ + if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads, + TDB_HASHTABLE_SIZE(tdb)) != 0) { + TDB_LOG((tdb, 0, "tdb_transaction_start: failed to prime hash table\n")); + tdb->ecode = TDB_ERR_IO; + goto fail; + } + return 0; fail: -- cgit From 3387746c4517d6766146080980ac467b72b62316 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 30 Mar 2006 04:52:39 +0000 Subject: r14799: added a tdb_get_seqnum() call, and the TDB_SEQNUM flag. This allows for an extremely lightweight test to see if a tdb has possibly changed. (This used to be commit f325ba605ccceca63712c0f2c98961e35e437b3d) --- source4/lib/tdb/common/transaction.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index ace4aa51a6..6960a02ad8 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -892,6 +892,12 @@ int tdb_transaction_commit(struct tdb_context *tdb) tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + /* + TODO: maybe write to some dummy hdr field, or write to magic + offset without mmap, before the last sync, instead of the + utime() call + */ + /* on some systems (like Linux 2.6.x) changes via mmap/msync don't change the mtime of the file, this means the file may not be backed up (as tdb rounding to block sizes means that -- cgit From b9ccb39c48870b0a84e19e816932d08b3304c1f4 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 5 Apr 2006 03:51:43 +0000 Subject: r14916: print errno so I can work out why OpenBSD is failing the test for tdb in the build farm. msync() is failing. (This used to be commit 4c3f3c414135c2b8fa9ea11a156246b56285b84f) --- source4/lib/tdb/common/transaction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 6960a02ad8..aa54c4ae3c 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -544,7 +544,8 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t if (msync(moffset + (char *)tdb->map_ptr, length + (offset - moffset), MS_SYNC) != 0) { tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, 0, "tdb_transaction: msync failed\n")); + TDB_LOG((tdb, 0, "tdb_transaction: msync failed - %s\n", + strerror(errno))); return -1; } } -- cgit From 68a06bf37ec4ef63767a866977855b8b8bf6a5c8 Mon Sep 17 00:00:00 2001 From: Simo Sorce Date: Mon, 5 Jun 2006 00:46:52 +0000 Subject: r16043: Fix error message (This used to be commit 2898df2cee22f2d478440e9576bec15140909e45) --- source4/lib/tdb/common/transaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index aa54c4ae3c..5ddeb69c47 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -701,7 +701,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, continue; } if (el->offset + el->length > tdb->transaction->old_map_size) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: transaction data over new region boundary\n")); + TDB_LOG((tdb, 0, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); free(data); tdb->ecode = TDB_ERR_CORRUPT; return -1; @@ -729,7 +729,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, /* write the recovery data to the recovery area */ if (methods->tdb_write(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to write recovery data\n")); + TDB_LOG((tdb, 0, "tdb_transaction_setup_recovery: failed to write recovery data\n")); free(data); tdb->ecode = TDB_ERR_IO; return -1; @@ -751,7 +751,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, *magic_offset = recovery_offset + offsetof(struct list_struct, magic); if (methods->tdb_write(tdb, *magic_offset, &magic, sizeof(magic)) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to write recovery magic\n")); + TDB_LOG((tdb, 0, "tdb_transaction_setup_recovery: failed to write recovery magic\n")); tdb->ecode = TDB_ERR_IO; return -1; } -- cgit From d3fee429aee87e9c05a4a606fbf0b60b16dac782 Mon Sep 17 00:00:00 2001 From: Andrew Bartlett Date: Mon, 3 Jul 2006 06:40:56 +0000 Subject: r16774: This patch modifies the tdb API to allow the logging function to be used as part of ldb. This allows tdb failures to be passed all the way up to Samba's DEBUG system, which allowed easier debugging. Unfortunately I had to extend the tdb API, as the logging function didn't have a context pointer. I've worked over the 'debug levels' in TDB. Most of them were 0, which didn't seem right, as some were trace-like messages. We didn't see any of these previously, except when accessing TDB directly. Andrew Bartlett (This used to be commit 58898092c1ce043f6d698db5065f372b79109e22) --- source4/lib/tdb/common/transaction.c | 86 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 43 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 5ddeb69c47..a6fa8a7f66 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -182,7 +182,7 @@ static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv); fail: - TDB_LOG((tdb, 0, "transaction_read: failed at off=%d len=%d\n", off, len)); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len)); tdb->ecode = TDB_ERR_IO; tdb->transaction->transaction_error = 1; return -1; @@ -306,7 +306,7 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, return 0; fail: - TDB_LOG((tdb, 0, "transaction_write: failed at off=%d len=%d\n", off, len)); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", off, len)); tdb->ecode = TDB_ERR_IO; tdb->transaction->transaction_error = 1; return -1; @@ -380,7 +380,7 @@ int tdb_transaction_start(struct tdb_context *tdb) { /* some sanity checks */ if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { - TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); tdb->ecode = TDB_ERR_EINVAL; return -1; } @@ -388,7 +388,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* cope with nested tdb_transaction_start() calls */ if (tdb->transaction != NULL) { tdb->transaction->nesting++; - TDB_LOG((tdb, 0, "tdb_transaction_start: nesting %d\n", + TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n", tdb->transaction->nesting)); return 0; } @@ -397,7 +397,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* the caller must not have any locks when starting a transaction as otherwise we'll be screwed by lack of nested locks in posix */ - TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction with locks held\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction with locks held\n")); tdb->ecode = TDB_ERR_LOCK; return -1; } @@ -406,7 +406,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* you cannot use transactions inside a traverse (although you can use traverse inside a transaction) as otherwise you can end up with deadlock */ - TDB_LOG((tdb, 0, "tdb_transaction_start: cannot start a transaction within a traverse\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction within a traverse\n")); tdb->ecode = TDB_ERR_LOCK; return -1; } @@ -421,7 +421,7 @@ int tdb_transaction_start(struct tdb_context *tdb) discussed with Volker, there are a number of ways we could make this async, which we will probably do in the future */ if (tdb_brlock_len(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get transaction lock\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get transaction lock\n")); tdb->ecode = TDB_ERR_LOCK; SAFE_FREE(tdb->transaction); return -1; @@ -430,7 +430,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* get a read lock from the freelist to the end of file. This is upgraded to a write lock during the commit */ if (tdb_brlock_len(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_start: failed to get hash locks\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n")); tdb->ecode = TDB_ERR_LOCK; goto fail; } @@ -444,7 +444,7 @@ int tdb_transaction_start(struct tdb_context *tdb) } if (tdb->methods->tdb_read(tdb, FREELIST_TOP, tdb->transaction->hash_heads, TDB_HASHTABLE_SIZE(tdb), 0) != 0) { - TDB_LOG((tdb, 0, "tdb_transaction_start: failed to read hash heads\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to read hash heads\n")); tdb->ecode = TDB_ERR_IO; goto fail; } @@ -463,7 +463,7 @@ int tdb_transaction_start(struct tdb_context *tdb) transaction linked list due to hash table updates */ if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads, TDB_HASHTABLE_SIZE(tdb)) != 0) { - TDB_LOG((tdb, 0, "tdb_transaction_start: failed to prime hash table\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n")); tdb->ecode = TDB_ERR_IO; goto fail; } @@ -485,7 +485,7 @@ fail: int tdb_transaction_cancel(struct tdb_context *tdb) { if (tdb->transaction == NULL) { - TDB_LOG((tdb, 0, "tdb_transaction_cancel: no transaction\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n")); return -1; } @@ -535,7 +535,7 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t { if (fsync(tdb->fd) != 0) { tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, 0, "tdb_transaction: fsync failed\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n")); return -1; } #ifdef MS_SYNC @@ -544,7 +544,7 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t if (msync(moffset + (char *)tdb->map_ptr, length + (offset - moffset), MS_SYNC) != 0) { tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, 0, "tdb_transaction: msync failed - %s\n", + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: msync failed - %s\n", strerror(errno))); return -1; } @@ -587,7 +587,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, tdb_off_t recovery_head; if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { - TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to read recovery head\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery head\n")); return -1; } @@ -595,7 +595,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, if (recovery_head != 0 && methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) { - TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to read recovery record\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n")); return -1; } @@ -615,7 +615,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, the transaction) */ if (recovery_head != 0) { if (tdb_free(tdb, recovery_head, &rec) == -1) { - TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to free previous recovery area\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to free previous recovery area\n")); return -1; } } @@ -631,7 +631,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, (tdb->map_size - tdb->transaction->old_map_size) + sizeof(rec) + *recovery_max_size) == -1) { - TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to create recovery area\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to create recovery area\n")); return -1; } @@ -647,7 +647,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, CONVERT(recovery_head); if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD, &recovery_head, sizeof(tdb_off_t)) == -1) { - TDB_LOG((tdb, 0, "tdb_recovery_allocate: failed to write recovery head\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); return -1; } @@ -701,7 +701,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, continue; } if (el->offset + el->length > tdb->transaction->old_map_size) { - TDB_LOG((tdb, 0, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); free(data); tdb->ecode = TDB_ERR_CORRUPT; return -1; @@ -729,7 +729,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, /* write the recovery data to the recovery area */ if (methods->tdb_write(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_setup_recovery: failed to write recovery data\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write recovery data\n")); free(data); tdb->ecode = TDB_ERR_IO; return -1; @@ -751,7 +751,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, *magic_offset = recovery_offset + offsetof(struct list_struct, magic); if (methods->tdb_write(tdb, *magic_offset, &magic, sizeof(magic)) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_setup_recovery: failed to write recovery magic\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write recovery magic\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -774,14 +774,14 @@ int tdb_transaction_commit(struct tdb_context *tdb) u32 zero = 0; if (tdb->transaction == NULL) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: no transaction\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n")); return -1; } if (tdb->transaction->transaction_error) { tdb->ecode = TDB_ERR_IO; tdb_transaction_cancel(tdb); - TDB_LOG((tdb, 0, "tdb_transaction_commit: transaction error pending\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n")); return -1; } @@ -802,14 +802,14 @@ int tdb_transaction_commit(struct tdb_context *tdb) nested their locks properly, so fail the transaction */ if (tdb->num_locks) { tdb->ecode = TDB_ERR_LOCK; - TDB_LOG((tdb, 0, "tdb_transaction_commit: locks pending on commit\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: locks pending on commit\n")); tdb_transaction_cancel(tdb); return -1; } /* upgrade the main transaction lock region to a write lock */ if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_start: failed to upgrade hash locks\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to upgrade hash locks\n")); tdb->ecode = TDB_ERR_LOCK; tdb_transaction_cancel(tdb); return -1; @@ -818,7 +818,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* get the global lock - this prevents new users attaching to the database during the commit */ if (tdb_brlock_len(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to get global lock\n")); + TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: failed to get global lock\n")); tdb->ecode = TDB_ERR_LOCK; tdb_transaction_cancel(tdb); return -1; @@ -827,7 +827,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) if (!(tdb->flags & TDB_NOSYNC)) { /* write the recovery data to the end of the file */ if (transaction_setup_recovery(tdb, &magic_offset) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to setup recovery data\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to setup recovery data\n")); tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); tdb_transaction_cancel(tdb); return -1; @@ -840,7 +840,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) tdb->map_size - tdb->transaction->old_map_size) == -1) { tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, 0, "tdb_transaction_commit: expansion failed\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: expansion failed\n")); tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); tdb_transaction_cancel(tdb); return -1; @@ -854,7 +854,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) struct tdb_transaction_el *el = tdb->transaction->elements; if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: write failed during commit\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n")); /* we've overwritten part of the data and possibly expanded the file, so we need to @@ -865,7 +865,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) tdb_transaction_cancel(tdb); tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); - TDB_LOG((tdb, 0, "tdb_transaction_commit: write failed\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n")); return -1; } tdb->transaction->elements = el->next; @@ -881,7 +881,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* remove the recovery marker */ if (methods->tdb_write(tdb, magic_offset, &zero, 4) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_commit: failed to remove recovery magic\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to remove recovery magic\n")); return -1; } @@ -929,7 +929,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* find the recovery area */ if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to read recovery head\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery head\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -942,7 +942,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* read the recovery record */ if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to read recovery record\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -953,7 +953,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) } if (tdb->read_only) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: attempt to recover read only database\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: attempt to recover read only database\n")); tdb->ecode = TDB_ERR_CORRUPT; return -1; } @@ -962,7 +962,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) data = malloc(rec.data_len); if (data == NULL) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to allocate recovery data\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n")); tdb->ecode = TDB_ERR_OOM; return -1; } @@ -970,7 +970,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* read the full recovery data */ if (tdb->methods->tdb_read(tdb, recovery_head + sizeof(rec), data, rec.data_len, 0) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to read recovery data\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -987,7 +987,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) { free(data); - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to recover %d bytes at offset %d\n", len, ofs)); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover %d bytes at offset %d\n", len, ofs)); tdb->ecode = TDB_ERR_IO; return -1; } @@ -997,7 +997,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) free(data); if (transaction_sync(tdb, 0, tdb->map_size) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to sync recovery\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync recovery\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -1005,7 +1005,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* if the recovery area is after the recovered eof then remove it */ if (recovery_eof <= recovery_head) { if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &zero) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to remove recovery head\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery head\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -1014,7 +1014,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* remove the recovery magic */ if (tdb_ofs_write(tdb, recovery_head + offsetof(struct list_struct, magic), &zero) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to remove recovery magic\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery magic\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -1022,7 +1022,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* reduce the file size to the old size */ tdb_munmap(tdb); if (ftruncate(tdb->fd, recovery_eof) != 0) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to reduce to recovery size\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to reduce to recovery size\n")); tdb->ecode = TDB_ERR_IO; return -1; } @@ -1030,12 +1030,12 @@ int tdb_transaction_recover(struct tdb_context *tdb) tdb_mmap(tdb); if (transaction_sync(tdb, 0, recovery_eof) == -1) { - TDB_LOG((tdb, 0, "tdb_transaction_recover: failed to sync2 recovery\n")); + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync2 recovery\n")); tdb->ecode = TDB_ERR_IO; return -1; } - TDB_LOG((tdb, 0, "tdb_transaction_recover: recovered %d byte database\n", + TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered %d byte database\n", recovery_eof)); /* all done */ -- cgit From cba142f1ae71b03266210e254c251683846d7fd7 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 18 Oct 2006 21:41:59 +0000 Subject: r19401: make tdb_lockall() much more efficient, and add a tdb_lockall_read() call which does a read lock on all chains. These will be used to make ldb searches more efficient (This used to be commit de664ec1f8cf179f1d650563272c0de3f7636e2b) --- source4/lib/tdb/common/transaction.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index a6fa8a7f66..526aef7d03 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -357,7 +357,7 @@ static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size, brlock during a transaction - ignore them */ int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, - int rw_type, int lck_type, int probe) + int rw_type, int lck_type, int probe, size_t len) { return 0; } @@ -393,7 +393,7 @@ int tdb_transaction_start(struct tdb_context *tdb) return 0; } - if (tdb->num_locks != 0) { + if (tdb->num_locks != 0 || tdb->global_lock.count) { /* the caller must not have any locks when starting a transaction as otherwise we'll be screwed by lack of nested locks in posix */ @@ -420,7 +420,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* get the transaction write lock. This is a blocking lock. As discussed with Volker, there are a number of ways we could make this async, which we will probably do in the future */ - if (tdb_brlock_len(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { + if (tdb_brlock(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get transaction lock\n")); tdb->ecode = TDB_ERR_LOCK; SAFE_FREE(tdb->transaction); @@ -429,7 +429,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* get a read lock from the freelist to the end of file. This is upgraded to a write lock during the commit */ - if (tdb_brlock_len(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) { + if (tdb_brlock(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n")); tdb->ecode = TDB_ERR_LOCK; goto fail; @@ -471,8 +471,8 @@ int tdb_transaction_start(struct tdb_context *tdb) return 0; fail: - tdb_brlock_len(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); - tdb_brlock_len(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); + tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); return -1; @@ -505,12 +505,18 @@ int tdb_transaction_cancel(struct tdb_context *tdb) free(el); } + /* remove any global lock created during the transaction */ + if (tdb->global_lock.count != 0) { + tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size); + tdb->global_lock.count = 0; + } + /* remove any locks created during the transaction */ if (tdb->num_locks != 0) { int h; for (h=0;hheader.hash_size+1;h++) { if (tdb->locked[h].count != 0) { - tdb_brlock_len(tdb,FREELIST_TOP+4*h,F_UNLCK,F_SETLKW, 0, 1); + tdb_brlock(tdb,FREELIST_TOP+4*h,F_UNLCK,F_SETLKW, 0, 1); tdb->locked[h].count = 0; } } @@ -520,8 +526,8 @@ int tdb_transaction_cancel(struct tdb_context *tdb) /* restore the normal io methods */ tdb->methods = tdb->transaction->io_methods; - tdb_brlock_len(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); - tdb_brlock_len(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); + tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); @@ -800,7 +806,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* if there are any locks pending then the caller has not nested their locks properly, so fail the transaction */ - if (tdb->num_locks) { + if (tdb->num_locks || tdb->global_lock.count) { tdb->ecode = TDB_ERR_LOCK; TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: locks pending on commit\n")); tdb_transaction_cancel(tdb); @@ -817,7 +823,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* get the global lock - this prevents new users attaching to the database during the commit */ - if (tdb_brlock_len(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { + if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: failed to get global lock\n")); tdb->ecode = TDB_ERR_LOCK; tdb_transaction_cancel(tdb); @@ -828,7 +834,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* write the recovery data to the end of the file */ if (transaction_setup_recovery(tdb, &magic_offset) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to setup recovery data\n")); - tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); tdb_transaction_cancel(tdb); return -1; } @@ -841,7 +847,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) tdb->transaction->old_map_size) == -1) { tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: expansion failed\n")); - tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); tdb_transaction_cancel(tdb); return -1; } @@ -863,7 +869,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) tdb_transaction_recover(tdb); tdb_transaction_cancel(tdb); - tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n")); return -1; @@ -891,7 +897,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) } } - tdb_brlock_len(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1); /* TODO: maybe write to some dummy hdr field, or write to magic -- cgit From 118c064a473562274bff8fb47f37437db904b8fb Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 20 Oct 2006 08:06:14 +0000 Subject: r19423: merge some tdb changes from SAMBA_3_0 to SAMBA_4_0 this is in preparation of a merge in the other direction (This used to be commit db3211079fd594aa03c3b9bb3eb6ad86bdd32837) --- source4/lib/tdb/common/transaction.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 526aef7d03..7dff9a95e3 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -258,7 +258,8 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, off > tdb->transaction->old_map_size)) { unsigned char *data = best_el->data; el = best_el; - el->data = realloc(el->data, el->length + len); + el->data = (unsigned char *)realloc(el->data, + el->length + len); if (el->data == NULL) { tdb->ecode = TDB_ERR_OOM; tdb->transaction->transaction_error = 1; @@ -275,7 +276,7 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, } /* add a new entry at the end of the list */ - el = malloc(sizeof(*el)); + el = (struct tdb_transaction_el *)malloc(sizeof(*el)); if (el == NULL) { tdb->ecode = TDB_ERR_OOM; tdb->transaction->transaction_error = 1; @@ -285,7 +286,7 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, el->prev = tdb->transaction->elements_last; el->offset = off; el->length = len; - el->data = malloc(len); + el->data = (unsigned char *)malloc(len); if (el->data == NULL) { free(el); tdb->ecode = TDB_ERR_OOM; @@ -411,7 +412,8 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } - tdb->transaction = calloc(sizeof(struct tdb_transaction), 1); + tdb->transaction = (struct tdb_transaction *) + calloc(sizeof(struct tdb_transaction), 1); if (tdb->transaction == NULL) { tdb->ecode = TDB_ERR_OOM; return -1; @@ -437,7 +439,8 @@ int tdb_transaction_start(struct tdb_context *tdb) /* setup a copy of the hash table heads so the hash scan in traverse can be fast */ - tdb->transaction->hash_heads = calloc(tdb->header.hash_size+1, sizeof(tdb_off_t)); + tdb->transaction->hash_heads = (u32 *) + calloc(tdb->header.hash_size+1, sizeof(u32)); if (tdb->transaction->hash_heads == NULL) { tdb->ecode = TDB_ERR_OOM; goto fail; @@ -684,7 +687,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, return -1; } - data = malloc(recovery_size + sizeof(*rec)); + data = (unsigned char *)malloc(recovery_size + sizeof(*rec)); if (data == NULL) { tdb->ecode = TDB_ERR_OOM; return -1; @@ -966,7 +969,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) recovery_eof = rec.key_len; - data = malloc(rec.data_len); + data = (unsigned char *)malloc(rec.data_len); if (data == NULL) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n")); tdb->ecode = TDB_ERR_OOM; -- cgit From 2acd8a9b3e1bd3885c5865e4b44ac3b4d92655d3 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Mon, 12 Feb 2007 23:16:02 +0000 Subject: r21303: As discussed on samba-technical: Change the static array for the in-memory mirrors of the hash chain locks to a dynamically allocated one. Jeremy, I count on you to revert it if the build farm freaks out, it's after midnight here :-) Volker (This used to be commit 7b5db2e472c7e27231fa432d3930789e708abd09) --- source4/lib/tdb/common/transaction.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 7dff9a95e3..5c94eb0afe 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -516,12 +516,10 @@ int tdb_transaction_cancel(struct tdb_context *tdb) /* remove any locks created during the transaction */ if (tdb->num_locks != 0) { - int h; - for (h=0;hheader.hash_size+1;h++) { - if (tdb->locked[h].count != 0) { - tdb_brlock(tdb,FREELIST_TOP+4*h,F_UNLCK,F_SETLKW, 0, 1); - tdb->locked[h].count = 0; - } + int i; + for (i=0;inum_lockrecs;i++) { + tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list, + F_UNLCK,F_SETLKW, 0, 1); } tdb->num_locks = 0; } -- cgit From 769efdf048d80c4081487d555649de0f31738dd1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 2 Apr 2007 18:56:25 +0000 Subject: r22041: merge trivial changes from samba3 metze (This used to be commit 902a76ca705f07c61f86a9ef1346583ba9d3157d) --- source4/lib/tdb/common/transaction.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 5c94eb0afe..eb296206f9 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -39,7 +39,7 @@ by the header. This removes the need for extra journal files as used by some other databases - - dymacially allocated the transaction recover record, re-using it + - dynamically allocated the transaction recover record, re-using it for subsequent transactions. If a larger record is needed then tdb_free() the old record to place it on the normal tdb freelist before allocating the new record @@ -88,6 +88,12 @@ */ +struct tdb_transaction_el { + struct tdb_transaction_el *next, *prev; + tdb_off_t offset; + tdb_len_t length; + unsigned char *data; +}; /* hold the context of any current transaction @@ -105,12 +111,7 @@ struct tdb_transaction { ordered, with first element at the front of the list. It needs to be doubly linked as the read/write traversals need to be backwards, while the commit needs to be forwards */ - struct tdb_transaction_el { - struct tdb_transaction_el *next, *prev; - tdb_off_t offset; - tdb_len_t length; - unsigned char *data; - } *elements, *elements_last; + struct tdb_transaction_el *elements, *elements_last; /* non-zero when an internal transaction error has occurred. All write operations will then fail until the -- cgit From 7196090bfd73ab64fd0095ce3ed612bc7978a810 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 5 Apr 2007 03:22:08 +0000 Subject: r22081: transaction_brlock() should be static thanks to Ted T'so for pointing this out (This used to be commit 6921f2f7093bbb6c236f16947072026303e58439) --- source4/lib/tdb/common/transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index eb296206f9..a592c66039 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -358,8 +358,8 @@ static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size, /* brlock during a transaction - ignore them */ -int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, - int rw_type, int lck_type, int probe, size_t len) +static int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, + int rw_type, int lck_type, int probe, size_t len) { return 0; } -- cgit From d9916ac98c6afc6e1ac60ba762c003c2315ad47e Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 11 Apr 2007 05:45:02 +0000 Subject: r22165: merge transaction fix from samba3 (This used to be commit ff78be0be4ef67ed25bfb837bd8a7bf4105367fb) --- source4/lib/tdb/common/transaction.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index a592c66039..0a609af521 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -523,6 +523,8 @@ int tdb_transaction_cancel(struct tdb_context *tdb) F_UNLCK,F_SETLKW, 0, 1); } tdb->num_locks = 0; + tdb->num_lockrecs = 0; + SAFE_FREE(tdb->lockrecs); } /* restore the normal io methods */ -- cgit From dd64de3d5beeeca870a94f4fb8bbc4d094bbe989 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 May 2007 08:15:49 +0000 Subject: r23238: merged transaction lock changes from ctdb this ensures that having the global lock also implies the transaction lock (This used to be commit 9dbb2633d7781fcc5d15b175ef36bfda5eb199bb) --- source4/lib/tdb/common/transaction.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 0a609af521..5e2d4376ed 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -423,9 +423,7 @@ int tdb_transaction_start(struct tdb_context *tdb) /* get the transaction write lock. This is a blocking lock. As discussed with Volker, there are a number of ways we could make this async, which we will probably do in the future */ - if (tdb_brlock(tdb, TRANSACTION_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get transaction lock\n")); - tdb->ecode = TDB_ERR_LOCK; + if (tdb_transaction_lock(tdb, F_WRLCK) == -1) { SAFE_FREE(tdb->transaction); return -1; } @@ -469,6 +467,7 @@ int tdb_transaction_start(struct tdb_context *tdb) TDB_HASHTABLE_SIZE(tdb)) != 0) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n")); tdb->ecode = TDB_ERR_IO; + tdb->methods = tdb->transaction->io_methods; goto fail; } @@ -476,7 +475,7 @@ int tdb_transaction_start(struct tdb_context *tdb) fail: tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); - tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_transaction_unlock(tdb); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); return -1; @@ -531,7 +530,7 @@ int tdb_transaction_cancel(struct tdb_context *tdb) tdb->methods = tdb->transaction->io_methods; tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); - tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1); + tdb_transaction_unlock(tdb); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); -- cgit From b8d69a7ea2505b706ff7c74d7c97bc89d82dfa07 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 10 Jul 2007 02:46:15 +0000 Subject: r23795: more v2->v3 conversion (This used to be commit 84b468b2f8f2dffda89593f816e8bc6a8b6d42ac) --- source4/lib/tdb/common/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 5e2d4376ed..f8a788f857 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -12,7 +12,7 @@ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. + version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -- cgit From 6c973f4e8ccbcb6c9275f8a54e26abb19df7e15a Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 10 Jul 2007 03:42:26 +0000 Subject: r23798: updated old Temple Place FSF addresses to new URL (This used to be commit 40c0919aaa9c1b14bbaebb95ecce53eb0380fdbb) --- source4/lib/tdb/common/transaction.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index f8a788f857..64a5b3f3f2 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -20,8 +20,7 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + License along with this library; if not, see . */ #include "tdb_private.h" -- cgit From f3e13632813c543583fe1d04203825743aa99111 Mon Sep 17 00:00:00 2001 From: Jelmer Vernooij Date: Sat, 11 Aug 2007 21:19:24 +0000 Subject: r24336: Use standard data type uint32_t rather than tdb-specific u32. (This used to be commit f90a698387c53508862eb6359bd4d1fba1d2b4b0) --- source4/lib/tdb/common/transaction.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 64a5b3f3f2..7eaacf7a16 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -100,7 +100,7 @@ struct tdb_transaction_el { struct tdb_transaction { /* we keep a mirrored copy of the tdb hash heads here so tdb_next_hash_chain() can operate efficiently */ - u32 *hash_heads; + uint32_t *hash_heads; /* the original io methods - used to do IOs to the real db */ const struct tdb_methods *io_methods; @@ -205,7 +205,7 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, hash heads */ if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP && off < FREELIST_TOP+TDB_HASHTABLE_SIZE(tdb)) { - u32 chain = (off-FREELIST_TOP) / sizeof(tdb_off_t); + uint32_t chain = (off-FREELIST_TOP) / sizeof(tdb_off_t); memcpy(&tdb->transaction->hash_heads[chain], buf, len); } @@ -316,9 +316,9 @@ fail: /* accelerated hash chain head search, using the cached hash heads */ -static void transaction_next_hash_chain(struct tdb_context *tdb, u32 *chain) +static void transaction_next_hash_chain(struct tdb_context *tdb, uint32_t *chain) { - u32 h = *chain; + uint32_t h = *chain; for (;h < tdb->header.hash_size;h++) { /* the +1 takes account of the freelist */ if (0 != tdb->transaction->hash_heads[h+1]) { @@ -437,8 +437,8 @@ int tdb_transaction_start(struct tdb_context *tdb) /* setup a copy of the hash table heads so the hash scan in traverse can be fast */ - tdb->transaction->hash_heads = (u32 *) - calloc(tdb->header.hash_size+1, sizeof(u32)); + tdb->transaction->hash_heads = (uint32_t *) + calloc(tdb->header.hash_size+1, sizeof(uint32_t)); if (tdb->transaction->hash_heads == NULL) { tdb->ecode = TDB_ERR_OOM; goto fail; @@ -570,7 +570,7 @@ static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) struct tdb_transaction_el *el; tdb_len_t recovery_size = 0; - recovery_size = sizeof(u32); + recovery_size = sizeof(uint32_t); for (el=tdb->transaction->elements;el;el=el->next) { if (el->offset >= tdb->transaction->old_map_size) { continue; @@ -676,7 +676,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, struct list_struct *rec; tdb_off_t recovery_offset, recovery_max_size; tdb_off_t old_map_size = tdb->transaction->old_map_size; - u32 magic, tailer; + uint32_t magic, tailer; /* check that the recovery area has enough space @@ -779,7 +779,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) { const struct tdb_methods *methods; tdb_off_t magic_offset = 0; - u32 zero = 0; + uint32_t zero = 0; if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n")); @@ -932,7 +932,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) { tdb_off_t recovery_head, recovery_eof; unsigned char *data, *p; - u32 zero = 0; + uint32_t zero = 0; struct list_struct rec; /* find the recovery area */ @@ -986,7 +986,7 @@ int tdb_transaction_recover(struct tdb_context *tdb) /* recover the file data */ p = data; while (p+8 < data + rec.data_len) { - u32 ofs, len; + uint32_t ofs, len; if (DOCONV()) { tdb_convert(p, 8); } -- cgit From 9170998427ebbb7abfd9b482fb6e0d051bca5205 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 15 Jan 2008 14:05:47 +1100 Subject: merged tdb from ctdb bzr tree (This used to be commit ed0c3a0f74c305b3b8554b05c3f97cf79db8296a) --- source4/lib/tdb/common/transaction.c | 408 ++++++++++++++++++++--------------- 1 file changed, 240 insertions(+), 168 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 7eaacf7a16..0ecfb9b7ff 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -87,12 +87,6 @@ */ -struct tdb_transaction_el { - struct tdb_transaction_el *next, *prev; - tdb_off_t offset; - tdb_len_t length; - unsigned char *data; -}; /* hold the context of any current transaction @@ -105,12 +99,12 @@ struct tdb_transaction { /* the original io methods - used to do IOs to the real db */ const struct tdb_methods *io_methods; - /* the list of transaction elements. We use a doubly linked - list with a last pointer to allow us to keep the list - ordered, with first element at the front of the list. It - needs to be doubly linked as the read/write traversals need - to be backwards, while the commit needs to be forwards */ - struct tdb_transaction_el *elements, *elements_last; + /* the list of transaction blocks. When a block is first + written to, it gets created in this list */ + uint8_t **blocks; + uint32_t num_blocks; + uint32_t block_size; /* bytes in each block */ + uint32_t last_block_size; /* number of valid bytes in the last block */ /* non-zero when an internal transaction error has occurred. All write operations will then fail until the @@ -134,52 +128,48 @@ struct tdb_transaction { static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb_len_t len, int cv) { - struct tdb_transaction_el *el; - - /* we need to walk the list backwards to get the most recent data */ - for (el=tdb->transaction->elements_last;el;el=el->prev) { - tdb_len_t partial; + uint32_t blk; - if (off+len <= el->offset) { - continue; - } - if (off >= el->offset + el->length) { - continue; + /* break it down into block sized ops */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_read(tdb, off, buf, len2, cv) != 0) { + return -1; } + len -= len2; + off += len2; + buf = (void *)(len2 + (char *)buf); + } - /* an overlapping read - needs to be split into up to - 2 reads and a memcpy */ - if (off < el->offset) { - partial = el->offset - off; - if (transaction_read(tdb, off, buf, partial, cv) != 0) { - goto fail; - } - len -= partial; - off += partial; - buf = (void *)(partial + (char *)buf); - } - if (off + len <= el->offset + el->length) { - partial = len; - } else { - partial = el->offset + el->length - off; - } - memcpy(buf, el->data + (off - el->offset), partial); - if (cv) { - tdb_convert(buf, len); - } - len -= partial; - off += partial; - buf = (void *)(partial + (char *)buf); - - if (len != 0 && transaction_read(tdb, off, buf, len, cv) != 0) { + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + + /* see if we have it in the block list */ + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + /* nope, do a real read */ + if (tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv) != 0) { goto fail; } - return 0; } - /* its not in the transaction elements - do a real read */ - return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv); + /* it is in the block list. Now check for the last block */ + if (blk == tdb->transaction->num_blocks-1) { + if (len > tdb->transaction->last_block_size) { + goto fail; + } + } + + /* now copy it out of this block */ + memcpy(buf, tdb->transaction->blocks[blk] + (off % tdb->transaction->block_size), len); + if (cv) { + tdb_convert(buf, len); + } + return 0; fail: TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len)); @@ -195,12 +185,8 @@ fail: static int transaction_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { - struct tdb_transaction_el *el, *best_el=NULL; + uint32_t blk; - if (len == 0) { - return 0; - } - /* if the write is to a hash head, then update the transaction hash heads */ if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP && @@ -209,110 +195,149 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, memcpy(&tdb->transaction->hash_heads[chain], buf, len); } - /* first see if we can replace an existing entry */ - for (el=tdb->transaction->elements_last;el;el=el->prev) { - tdb_len_t partial; - - if (best_el == NULL && off == el->offset+el->length) { - best_el = el; - } - - if (off+len <= el->offset) { - continue; + /* break it up into block sized chunks */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_write(tdb, off, buf, len2) != 0) { + return -1; } - if (off >= el->offset + el->length) { - continue; + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); } + } - /* an overlapping write - needs to be split into up to - 2 writes and a memcpy */ - if (off < el->offset) { - partial = el->offset - off; - if (transaction_write(tdb, off, buf, partial) != 0) { - goto fail; - } - len -= partial; - off += partial; - buf = (const void *)(partial + (const char *)buf); - } - if (off + len <= el->offset + el->length) { - partial = len; + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + off = off % tdb->transaction->block_size; + + if (tdb->transaction->num_blocks <= blk) { + uint8_t **new_blocks; + /* expand the blocks array */ + if (tdb->transaction->blocks == NULL) { + new_blocks = malloc((blk+1)*sizeof(uint8_t *)); } else { - partial = el->offset + el->length - off; + new_blocks = realloc(tdb->transaction->blocks, (blk+1)*sizeof(uint8_t *)); } - memcpy(el->data + (off - el->offset), buf, partial); - len -= partial; - off += partial; - buf = (const void *)(partial + (const char *)buf); - - if (len != 0 && transaction_write(tdb, off, buf, len) != 0) { + if (new_blocks == NULL) { + tdb->ecode = TDB_ERR_OOM; goto fail; } - - return 0; + memset(&new_blocks[tdb->transaction->num_blocks], 0, + (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *)); + tdb->transaction->blocks = new_blocks; + tdb->transaction->num_blocks = blk+1; + tdb->transaction->last_block_size = 0; } - /* see if we can append the new entry to an existing entry */ - if (best_el && best_el->offset + best_el->length == off && - (off+len < tdb->transaction->old_map_size || - off > tdb->transaction->old_map_size)) { - unsigned char *data = best_el->data; - el = best_el; - el->data = (unsigned char *)realloc(el->data, - el->length + len); - if (el->data == NULL) { + /* allocate and fill a block? */ + if (tdb->transaction->blocks[blk] == NULL) { + tdb->transaction->blocks[blk] = (uint8_t *)calloc(tdb->transaction->block_size, 1); + if (tdb->transaction->blocks[blk] == NULL) { tdb->ecode = TDB_ERR_OOM; tdb->transaction->transaction_error = 1; - el->data = data; + return -1; + } + if (tdb->transaction->old_map_size > blk * tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size; + if (len2 + (blk * tdb->transaction->block_size) > tdb->transaction->old_map_size) { + len2 = tdb->transaction->old_map_size - (blk * tdb->transaction->block_size); + } + if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size, + tdb->transaction->blocks[blk], + len2, 0) != 0) { + SAFE_FREE(tdb->transaction->blocks[blk]); + tdb->ecode = TDB_ERR_IO; + goto fail; + } + if (blk == tdb->transaction->num_blocks-1) { + tdb->transaction->last_block_size = len2; + } + } + } + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(tdb->transaction->blocks[blk] + off, 0, len); + } else { + memcpy(tdb->transaction->blocks[blk] + off, buf, len); + } + if (blk == tdb->transaction->num_blocks-1) { + if (len + off > tdb->transaction->last_block_size) { + tdb->transaction->last_block_size = len + off; + } + } + + return 0; + +fail: + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", + (blk*tdb->transaction->block_size) + off, len)); + tdb->transaction->transaction_error = 1; + return -1; +} + + +/* + write while in a transaction - this varient never expands the transaction blocks, it only + updates existing blocks. This means it cannot change the recovery size +*/ +static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + uint32_t blk; + + /* break it up into block sized chunks */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_write_existing(tdb, off, buf, len2) != 0) { return -1; } - if (buf) { - memcpy(el->data + el->length, buf, len); - } else { - memset(el->data + el->length, TDB_PAD_BYTE, len); + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); } - el->length += len; - return 0; } - /* add a new entry at the end of the list */ - el = (struct tdb_transaction_el *)malloc(sizeof(*el)); - if (el == NULL) { - tdb->ecode = TDB_ERR_OOM; - tdb->transaction->transaction_error = 1; - return -1; + if (len == 0) { + return 0; } - el->next = NULL; - el->prev = tdb->transaction->elements_last; - el->offset = off; - el->length = len; - el->data = (unsigned char *)malloc(len); - if (el->data == NULL) { - free(el); - tdb->ecode = TDB_ERR_OOM; - tdb->transaction->transaction_error = 1; - return -1; + + blk = off / tdb->transaction->block_size; + off = off % tdb->transaction->block_size; + + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + return 0; } - if (buf) { - memcpy(el->data, buf, len); + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(tdb->transaction->blocks[blk] + off, 0, len); } else { - memset(el->data, TDB_PAD_BYTE, len); + memcpy(tdb->transaction->blocks[blk] + off, buf, len); } - if (el->prev) { - el->prev->next = el; - } else { - tdb->transaction->elements = el; + if (blk == tdb->transaction->num_blocks-1) { + if (len + off > tdb->transaction->last_block_size) { + tdb->transaction->last_block_size = len + off; + } } - tdb->transaction->elements_last = el; + return 0; fail: - TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", off, len)); - tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", + (blk*tdb->transaction->block_size) + off, len)); tdb->transaction->transaction_error = 1; return -1; } + /* accelerated hash chain head search, using the cached hash heads */ @@ -419,10 +444,14 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } + /* a page at a time seems like a reasonable compromise between compactness and efficiency */ + tdb->transaction->block_size = tdb->page_size; + /* get the transaction write lock. This is a blocking lock. As discussed with Volker, there are a number of ways we could make this async, which we will probably do in the future */ if (tdb_transaction_lock(tdb, F_WRLCK) == -1) { + SAFE_FREE(tdb->transaction->blocks); SAFE_FREE(tdb->transaction); return -1; } @@ -460,21 +489,12 @@ int tdb_transaction_start(struct tdb_context *tdb) tdb->transaction->io_methods = tdb->methods; tdb->methods = &transaction_methods; - /* by calling this transaction write here, we ensure that we don't grow the - transaction linked list due to hash table updates */ - if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads, - TDB_HASHTABLE_SIZE(tdb)) != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n")); - tdb->ecode = TDB_ERR_IO; - tdb->methods = tdb->transaction->io_methods; - goto fail; - } - return 0; fail: tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); tdb_transaction_unlock(tdb); + SAFE_FREE(tdb->transaction->blocks); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); return -1; @@ -486,6 +506,8 @@ fail: */ int tdb_transaction_cancel(struct tdb_context *tdb) { + int i; + if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n")); return -1; @@ -499,13 +521,13 @@ int tdb_transaction_cancel(struct tdb_context *tdb) tdb->map_size = tdb->transaction->old_map_size; - /* free all the transaction elements */ - while (tdb->transaction->elements) { - struct tdb_transaction_el *el = tdb->transaction->elements; - tdb->transaction->elements = el->next; - free(el->data); - free(el); + /* free all the transaction blocks */ + for (i=0;itransaction->num_blocks;i++) { + if (tdb->transaction->blocks[i] != NULL) { + free(tdb->transaction->blocks[i]); + } } + SAFE_FREE(tdb->transaction->blocks); /* remove any global lock created during the transaction */ if (tdb->global_lock.count != 0) { @@ -515,7 +537,6 @@ int tdb_transaction_cancel(struct tdb_context *tdb) /* remove any locks created during the transaction */ if (tdb->num_locks != 0) { - int i; for (i=0;inum_lockrecs;i++) { tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list, F_UNLCK,F_SETLKW, 0, 1); @@ -567,16 +588,24 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t */ static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) { - struct tdb_transaction_el *el; tdb_len_t recovery_size = 0; + int i; recovery_size = sizeof(uint32_t); - for (el=tdb->transaction->elements;el;el=el->next) { - if (el->offset >= tdb->transaction->old_map_size) { + for (i=0;itransaction->num_blocks;i++) { + if (i * tdb->transaction->block_size >= tdb->transaction->old_map_size) { + break; + } + if (tdb->transaction->blocks[i] == NULL) { continue; } - recovery_size += 2*sizeof(tdb_off_t) + el->length; - } + recovery_size += 2*sizeof(tdb_off_t); + if (i == tdb->transaction->num_blocks-1) { + recovery_size += tdb->transaction->last_block_size; + } else { + recovery_size += tdb->transaction->block_size; + } + } return recovery_size; } @@ -658,6 +687,10 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); return -1; } + if (transaction_write_existing(tdb, TDB_RECOVERY_HEAD, &recovery_head, sizeof(tdb_off_t)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); + return -1; + } return 0; } @@ -669,7 +702,6 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, static int transaction_setup_recovery(struct tdb_context *tdb, tdb_off_t *magic_offset) { - struct tdb_transaction_el *el; tdb_len_t recovery_size; unsigned char *data, *p; const struct tdb_methods *methods = tdb->transaction->io_methods; @@ -677,6 +709,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb_off_t recovery_offset, recovery_max_size; tdb_off_t old_map_size = tdb->transaction->old_map_size; uint32_t magic, tailer; + int i; /* check that the recovery area has enough space @@ -704,30 +737,43 @@ static int transaction_setup_recovery(struct tdb_context *tdb, /* build the recovery data into a single blob to allow us to do a single large write, which should be more efficient */ p = data + sizeof(*rec); - for (el=tdb->transaction->elements;el;el=el->next) { - if (el->offset >= old_map_size) { + for (i=0;itransaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; + + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * tdb->transaction->block_size; + length = tdb->transaction->block_size; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (offset >= old_map_size) { continue; } - if (el->offset + el->length > tdb->transaction->old_map_size) { + if (offset + length > tdb->transaction->old_map_size) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); free(data); tdb->ecode = TDB_ERR_CORRUPT; return -1; } - memcpy(p, &el->offset, 4); - memcpy(p+4, &el->length, 4); + memcpy(p, &offset, 4); + memcpy(p+4, &length, 4); if (DOCONV()) { tdb_convert(p, 8); } /* the recovery area contains the old data, not the new data, so we have to call the original tdb_read method to get it */ - if (methods->tdb_read(tdb, el->offset, p + 8, el->length, 0) != 0) { + if (methods->tdb_read(tdb, offset, p + 8, length, 0) != 0) { free(data); tdb->ecode = TDB_ERR_IO; return -1; } - p += 8 + el->length; + p += 8 + length; } /* and the tailer */ @@ -742,6 +788,12 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb->ecode = TDB_ERR_IO; return -1; } + if (transaction_write_existing(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write secondary recovery data\n")); + free(data); + tdb->ecode = TDB_ERR_IO; + return -1; + } /* as we don't have ordered writes, we have to sync the recovery data before we update the magic to indicate that the recovery @@ -763,6 +815,11 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb->ecode = TDB_ERR_IO; return -1; } + if (transaction_write_existing(tdb, *magic_offset, &magic, sizeof(magic)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write secondary recovery magic\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } /* ensure the recovery magic marker is on disk */ if (transaction_sync(tdb, *magic_offset, sizeof(magic)) == -1) { @@ -780,6 +837,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) const struct tdb_methods *methods; tdb_off_t magic_offset = 0; uint32_t zero = 0; + int i; if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n")); @@ -793,13 +851,14 @@ int tdb_transaction_commit(struct tdb_context *tdb) return -1; } + if (tdb->transaction->nesting != 0) { tdb->transaction->nesting--; return 0; } /* check for a null transaction */ - if (tdb->transaction->elements == NULL) { + if (tdb->transaction->blocks == NULL) { tdb_transaction_cancel(tdb); return 0; } @@ -858,10 +917,21 @@ int tdb_transaction_commit(struct tdb_context *tdb) } /* perform all the writes */ - while (tdb->transaction->elements) { - struct tdb_transaction_el *el = tdb->transaction->elements; + for (i=0;itransaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; - if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) { + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * tdb->transaction->block_size; + length = tdb->transaction->block_size; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (methods->tdb_write(tdb, offset, tdb->transaction->blocks[i], length) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n")); /* we've overwritten part of the data and @@ -876,11 +946,12 @@ int tdb_transaction_commit(struct tdb_context *tdb) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n")); return -1; } - tdb->transaction->elements = el->next; - free(el->data); - free(el); + SAFE_FREE(tdb->transaction->blocks[i]); } + SAFE_FREE(tdb->transaction->blocks); + tdb->transaction->num_blocks = 0; + if (!(tdb->flags & TDB_NOSYNC)) { /* ensure the new data is on disk */ if (transaction_sync(tdb, 0, tdb->map_size) == -1) { @@ -919,6 +990,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* use a transaction cancel to free memory and remove the transaction locks */ tdb_transaction_cancel(tdb); + return 0; } -- cgit From 524d280ad09c99bcbf63d789e909afdf7edb5860 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 18 Jan 2008 15:33:57 +1100 Subject: merged tdb changes from ctdb (This used to be commit c54c087a19e36e0522eb4546c9425ae446f0628b) --- source4/lib/tdb/common/transaction.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 0ecfb9b7ff..ea0e3a93f3 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -316,25 +316,15 @@ static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, return 0; } - /* overwrite part of an existing block */ - if (buf == NULL) { - memset(tdb->transaction->blocks[blk] + off, 0, len); - } else { - memcpy(tdb->transaction->blocks[blk] + off, buf, len); - } - if (blk == tdb->transaction->num_blocks-1) { - if (len + off > tdb->transaction->last_block_size) { - tdb->transaction->last_block_size = len + off; - } + if (blk == tdb->transaction->num_blocks-1 && + off + len > tdb->transaction->last_block_size) { + len = tdb->transaction->last_block_size - off; } - return 0; + /* overwrite part of an existing block */ + memcpy(tdb->transaction->blocks[blk] + off, buf, len); -fail: - TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", - (blk*tdb->transaction->block_size) + off, len)); - tdb->transaction->transaction_error = 1; - return -1; + return 0; } -- cgit From 61a015a786c52008f4471e62750ad93507bce518 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 18 Jan 2008 15:45:22 +1100 Subject: merged changes from v3-2-test (This used to be commit 7077df3e2e3f171532f6a5ac87d45201736c9c11) --- source4/lib/tdb/common/transaction.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index ea0e3a93f3..c3e7a4e2c0 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -219,9 +219,12 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, uint8_t **new_blocks; /* expand the blocks array */ if (tdb->transaction->blocks == NULL) { - new_blocks = malloc((blk+1)*sizeof(uint8_t *)); + new_blocks = (uint8_t **)malloc( + (blk+1)*sizeof(uint8_t *)); } else { - new_blocks = realloc(tdb->transaction->blocks, (blk+1)*sizeof(uint8_t *)); + new_blocks = (uint8_t **)realloc( + tdb->transaction->blocks, + (blk+1)*sizeof(uint8_t *)); } if (new_blocks == NULL) { tdb->ecode = TDB_ERR_OOM; -- cgit From 3a3ff8ebe7b7de24a7d3e2c9dd105d9fbeeb5d42 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 31 Jan 2008 09:48:46 +1100 Subject: merged tdb transaction fix (This used to be commit eb3af24926977208a8099c848a510704d2ae3524) --- source4/lib/tdb/common/transaction.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index c3e7a4e2c0..4e2127be64 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -321,6 +321,9 @@ static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, if (blk == tdb->transaction->num_blocks-1 && off + len > tdb->transaction->last_block_size) { + if (off >= tdb->transaction->last_block_size) { + return 0; + } len = tdb->transaction->last_block_size - off; } -- cgit From b39e332bd7803dd4ce8267f750c497b5deb837ee Mon Sep 17 00:00:00 2001 From: Simo Sorce Date: Wed, 3 Sep 2008 10:44:09 -0400 Subject: The msync manpage reports that msync *must* be called before munmap. Failure to do so may result in lost data. Fix an ifdef check, I really think we meant to check HAVE_MMAP here. (cherry picked from commit 74c8575b3f3b90ea21ae6aa7ccd95947838af956) (This used to be commit 8fd54bb55f0c23bd025d1719abcbe75c6a2ea8ac) --- source4/lib/tdb/common/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/lib/tdb/common/transaction.c') diff --git a/source4/lib/tdb/common/transaction.c b/source4/lib/tdb/common/transaction.c index 4e2127be64..7acda640c8 100644 --- a/source4/lib/tdb/common/transaction.c +++ b/source4/lib/tdb/common/transaction.c @@ -563,7 +563,7 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n")); return -1; } -#ifdef MS_SYNC +#ifdef HAVE_MMAP if (tdb->map_ptr) { tdb_off_t moffset = offset & ~(tdb->page_size-1); if (msync(moffset + (char *)tdb->map_ptr, -- cgit