From 2ec41571a3efbea254cc3e132280a194c86a2f89 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Fri, 11 Jan 2008 15:08:37 -0800 Subject: Sync tdb with the tdb changes in ctdb. Spoke to tridge about this. Fixes insidious problem with order n^2 freelist merging. Jeremy. (This used to be commit c6609c042b128e7d63eb64cfdfb0f6b65cb59d76) --- source3/lib/tdb/common/freelist.c | 101 ++++++++--- source3/lib/tdb/common/io.c | 20 +- source3/lib/tdb/common/lock.c | 7 + source3/lib/tdb/common/open.c | 16 +- source3/lib/tdb/common/tdb.c | 72 +++++++- source3/lib/tdb/common/tdb_private.h | 8 +- source3/lib/tdb/common/transaction.c | 343 +++++++++++++++++------------------ source3/lib/tdb/common/traverse.c | 3 + source3/lib/tdb/configure.ac | 2 +- source3/lib/tdb/include/tdb.h | 4 + 10 files changed, 358 insertions(+), 218 deletions(-) (limited to 'source3/lib') diff --git a/source3/lib/tdb/common/freelist.c b/source3/lib/tdb/common/freelist.c index b109643f23..c086c151fa 100644 --- a/source3/lib/tdb/common/freelist.c +++ b/source3/lib/tdb/common/freelist.c @@ -27,6 +27,12 @@ #include "tdb_private.h" +/* 'right' merges can involve O(n^2) cost when combined with a + traverse, so they are disabled until we find a way to do them in + O(1) time +*/ +#define USE_RIGHT_MERGES 0 + /* read a freelist record and check for simple errors */ int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec) { @@ -56,7 +62,7 @@ int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct } - +#if USE_RIGHT_MERGES /* Remove an element from the freelist. Must have alloc lock. */ static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next) { @@ -75,6 +81,7 @@ static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_ TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); } +#endif /* update a record tailer (must hold allocation lock) */ @@ -93,8 +100,6 @@ static int update_tailer(struct tdb_context *tdb, tdb_off_t offset, neccessary. */ int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) { - tdb_off_t right, left; - /* Allocation and tailer lock */ if (tdb_lock(tdb, -1, F_WRLCK) != 0) return -1; @@ -105,9 +110,10 @@ int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) goto fail; } +#if USE_RIGHT_MERGES /* Look right first (I'm an Australian, dammit) */ - right = offset + sizeof(*rec) + rec->rec_len; - if (right + sizeof(*rec) <= tdb->map_size) { + if (offset + sizeof(*rec) + rec->rec_len + sizeof(*rec) <= tdb->map_size) { + tdb_off_t right = offset + sizeof(*rec) + rec->rec_len; struct list_struct r; if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) { @@ -122,13 +128,18 @@ int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) goto left; } rec->rec_len += sizeof(r) + r.rec_len; + if (update_tailer(tdb, offset, rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); + goto fail; + } } } - left: +#endif + /* Look left */ - left = offset - sizeof(tdb_off_t); - if (left > TDB_DATA_START(tdb->header.hash_size)) { + if (offset - sizeof(tdb_off_t) > TDB_DATA_START(tdb->header.hash_size)) { + tdb_off_t left = offset - sizeof(tdb_off_t); struct list_struct l; tdb_off_t leftsize; @@ -145,7 +156,12 @@ left: left = offset - leftsize; - /* Now read in record */ + if (leftsize > offset || + left < TDB_DATA_START(tdb->header.hash_size)) { + goto update; + } + + /* Now read in the left record */ if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize)); goto update; @@ -153,21 +169,24 @@ left: /* If it's free, expand to include it. */ if (l.magic == TDB_FREE_MAGIC) { - if (remove_from_freelist(tdb, left, l.next) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left free failed at %u\n", left)); - goto update; - } else { - offset = left; - rec->rec_len += leftsize; + /* we now merge the new record into the left record, rather than the other + way around. This makes the operation O(1) instead of O(n). This change + prevents traverse from being O(n^2) after a lot of deletes */ + l.rec_len += sizeof(*rec) + rec->rec_len; + if (tdb_rec_write(tdb, left, &l) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_left failed at %u\n", left)); + goto fail; } + if (update_tailer(tdb, left, &l) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); + goto fail; + } + tdb_unlock(tdb, -1, F_WRLCK); + return 0; } } update: - if (update_tailer(tdb, offset, rec) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); - goto fail; - } /* Now, prepend to free list */ rec->magic = TDB_FREE_MAGIC; @@ -261,6 +280,7 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st tdb_off_t rec_ptr, last_ptr; tdb_len_t rec_len; } bestfit; + float multiplier = 1.0; if (tdb_lock(tdb, -1, F_WRLCK) == -1) return 0; @@ -295,18 +315,27 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st bestfit.rec_len = rec->rec_len; bestfit.rec_ptr = rec_ptr; bestfit.last_ptr = last_ptr; - /* consider a fit to be good enough if - we aren't wasting more than half - the space */ - if (bestfit.rec_len < 2*length) { - break; - } } } /* move to the next record */ last_ptr = rec_ptr; rec_ptr = rec->next; + + /* if we've found a record that is big enough, then + stop searching if its also not too big. The + definition of 'too big' changes as we scan + through */ + if (bestfit.rec_len > 0 && + bestfit.rec_len < length * multiplier) { + break; + } + + /* this multiplier means we only extremely rarely + search more than 50 or so records. At 50 records we + accept records up to 11 times larger than what we + want */ + multiplier *= 1.05; } if (bestfit.rec_ptr != 0) { @@ -328,3 +357,25 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st return 0; } + + +/* + return the size of the freelist - used to decide if we should repack +*/ +int tdb_freelist_size(struct tdb_context *tdb) +{ + tdb_off_t ptr; + int count=0; + + if (tdb_lock(tdb, -1, F_RDLCK) == -1) { + return -1; + } + + ptr = FREELIST_TOP; + while (tdb_ofs_read(tdb, ptr, &ptr) == 0 && ptr != 0) { + count++; + } + + tdb_unlock(tdb, -1, F_RDLCK); + return count; +} diff --git a/source3/lib/tdb/common/io.c b/source3/lib/tdb/common/io.c index 8ab0768883..172ab69d8c 100644 --- a/source3/lib/tdb/common/io.c +++ b/source3/lib/tdb/common/io.c @@ -101,8 +101,8 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, off+written); } if (written == -1) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d " "len=%d (%s)\n", off, len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_IO, -1); @@ -111,8 +111,8 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, "write %d bytes at %d in two attempts\n", len, off)); errno = ENOSPC; - return TDB_ERRCODE(TDB_ERR_IO, -1); - } + return TDB_ERRCODE(TDB_ERR_IO, -1); + } } return 0; } @@ -230,7 +230,7 @@ void tdb_mmap(struct tdb_context *tdb) says to use for mmap expansion */ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition) { - char buf[1024]; + char buf[8192]; if (tdb->read_only || tdb->traverse_read) { tdb->ecode = TDB_ERR_RDONLY; @@ -294,7 +294,7 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad int tdb_expand(struct tdb_context *tdb, tdb_off_t size) { struct list_struct rec; - tdb_off_t offset; + tdb_off_t offset, new_size; if (tdb_lock(tdb, -1, F_WRLCK) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n")); @@ -304,9 +304,11 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size) /* must know about any previous expansions by another process */ tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); - /* always make room for at least 10 more records, and round - the database up to a multiple of the page size */ - size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size; + /* always make room for at least 100 more records, and at + least 25% more space. Round the database up to a multiple + of the page size */ + new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25); + size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size; if (!(tdb->flags & TDB_INTERNAL)) tdb_munmap(tdb); diff --git a/source3/lib/tdb/common/lock.c b/source3/lib/tdb/common/lock.c index e3fe888c46..f156c0fa7b 100644 --- a/source3/lib/tdb/common/lock.c +++ b/source3/lib/tdb/common/lock.c @@ -505,6 +505,9 @@ int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key) /* record lock stops delete underneath */ int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off) { + if (tdb->global_lock.count) { + return 0; + } return off ? tdb->methods->tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0, 1) : 0; } @@ -537,6 +540,10 @@ int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off) struct tdb_traverse_lock *i; uint32_t count = 0; + if (tdb->global_lock.count) { + return 0; + } + if (off == 0) return 0; for (i = &tdb->travlocks; i; i = i->next) diff --git a/source3/lib/tdb/common/open.c b/source3/lib/tdb/common/open.c index 6efa482ac2..6bd8fda2bf 100644 --- a/source3/lib/tdb/common/open.c +++ b/source3/lib/tdb/common/open.c @@ -35,7 +35,7 @@ static struct tdb_context *tdbs = NULL; static unsigned int default_tdb_hash(TDB_DATA *key) { uint32_t value; /* Used to compute the hash value. */ - uint32_t i; /* Used to cycle through random values. */ + uint32_t i; /* Used to cycle through random values. */ /* Set the initial value from the key size. */ for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) @@ -90,7 +90,7 @@ static int tdb_new_database(struct tdb_context *tdb, int hash_size) size -= written; written = write(tdb->fd, newdb+written, size); if (written == size) { - ret = 0; + ret = 0; } else if (written >= 0) { /* a second incomplete write - we give up. * guessing the errno... */ @@ -152,6 +152,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int rev = 0, locked = 0; unsigned char *vp; uint32_t vertest; + unsigned v; if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { /* Can't log this */ @@ -178,7 +179,9 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->page_size = 0x2000; } - tdb->max_dead_records = (open_flags & TDB_VOLATILE) ? 5 : 0; + if (open_flags & TDB_VOLATILE) { + tdb->max_dead_records = 5; + } if ((open_flags & O_ACCMODE) == O_WRONLY) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n", @@ -213,6 +216,10 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, goto fail; /* errno set by open(2) */ } + /* on exec, don't inherit the fd */ + v = fcntl(tdb->fd, F_GETFD, 0); + fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC); + /* ensure there is only one process initialising at once */ if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n", @@ -240,7 +247,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, /* its not a valid database - possibly initialise it */ if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { if (errno == 0) { - errno = EIO; /* ie bad format or something */ + errno = EIO; /* ie bad format or something */ } goto fail; } @@ -281,6 +288,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->map_size = st.st_size; tdb->device = st.st_dev; tdb->inode = st.st_ino; + tdb->max_dead_records = 0; tdb_mmap(tdb); if (locked) { if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) { diff --git a/source3/lib/tdb/common/tdb.c b/source3/lib/tdb/common/tdb.c index 0e9d1dbd74..bf3abb71ac 100644 --- a/source3/lib/tdb/common/tdb.c +++ b/source3/lib/tdb/common/tdb.c @@ -102,8 +102,7 @@ static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, } /* As tdb_find, but if you succeed, keep the lock */ -tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, - uint32_t hash, int locktype, +tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype, struct list_struct *rec) { uint32_t rec_ptr; @@ -237,14 +236,15 @@ int tdb_exists(struct tdb_context *tdb, TDB_DATA key) } /* actually delete an entry in the database given the offset */ -int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec) +int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec) { tdb_off_t last_ptr, i; struct list_struct lastrec; if (tdb->read_only || tdb->traverse_read) return -1; - if (tdb_write_lock_record(tdb, rec_ptr) == -1) { + if (tdb->traverse_write != 0 || + tdb_write_lock_record(tdb, rec_ptr) == -1) { /* Someone traversing here: mark it as dead */ rec->magic = TDB_DEAD_MAGIC; return tdb_rec_write(tdb, rec_ptr, rec); @@ -666,6 +666,16 @@ int tdb_get_flags(struct tdb_context *tdb) return tdb->flags; } +void tdb_add_flags(struct tdb_context *tdb, unsigned flags) +{ + tdb->flags |= flags; +} + +void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) +{ + tdb->flags &= ~flags; +} + /* enable sequence number handling on an open tdb @@ -674,3 +684,57 @@ void tdb_enable_seqnum(struct tdb_context *tdb) { tdb->flags |= TDB_SEQNUM; } + + +/* + wipe the entire database, deleting all records. This can be done + very fast by using a global lock. The entire data portion of the + file becomes a single entry in the freelist. + */ +int tdb_wipe_all(struct tdb_context *tdb) +{ + int i; + tdb_off_t offset = 0; + ssize_t data_len; + + if (tdb_lockall(tdb) != 0) { + return -1; + } + + /* wipe the hashes */ + for (i=0;iheader.hash_size;i++) { + if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i)); + goto failed; + } + } + + /* wipe the freelist */ + if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n")); + goto failed; + } + + /* add all the rest of the file to the freelist */ + data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct); + if (data_len > 0) { + struct list_struct rec; + memset(&rec,'\0',sizeof(rec)); + rec.rec_len = data_len; + if (tdb_free(tdb, TDB_DATA_START(tdb->header.hash_size), &rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add free record\n")); + goto failed; + } + } + + if (tdb_unlockall(tdb) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n")); + goto failed; + } + + return 0; + +failed: + tdb_unlockall(tdb); + return -1; +} diff --git a/source3/lib/tdb/common/tdb_private.h b/source3/lib/tdb/common/tdb_private.h index 58c30c1706..63a6d04e72 100644 --- a/source3/lib/tdb/common/tdb_private.h +++ b/source3/lib/tdb/common/tdb_private.h @@ -38,6 +38,10 @@ typedef uint32_t tdb_len_t; typedef uint32_t tdb_off_t; +#ifndef offsetof +#define offsetof(t,f) ((unsigned int)&((t *)0)->f) +#endif + #define TDB_MAGIC_FOOD "TDB file\n" #define TDB_VERSION (0x26011967 + 6) #define TDB_MAGIC (0x26011999U) @@ -54,7 +58,7 @@ typedef uint32_t tdb_off_t; #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r)) #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t)) #define TDB_HASHTABLE_SIZE(tdb) ((tdb->header.hash_size+1)*sizeof(tdb_off_t)) -#define TDB_DATA_START(hash_size) TDB_HASH_TOP(hash_size-1) +#define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t)) #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start) #define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number) #define TDB_PAD_BYTE 0x42 @@ -144,6 +148,7 @@ struct tdb_context { tdb_len_t map_size; /* how much space has been mapped */ int read_only; /* opened read-only */ int traverse_read; /* read-only traversal */ + int traverse_write; /* read-write traversal */ struct tdb_lock_type global_lock; int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ @@ -173,7 +178,6 @@ struct tdb_context { int tdb_munmap(struct tdb_context *tdb); void tdb_mmap(struct tdb_context *tdb); int tdb_lock(struct tdb_context *tdb, int list, int ltype); -int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype); int tdb_unlock(struct tdb_context *tdb, int list, int ltype); int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len); int tdb_transaction_lock(struct tdb_context *tdb, int ltype); diff --git a/source3/lib/tdb/common/transaction.c b/source3/lib/tdb/common/transaction.c index 7eaacf7a16..2da09face0 100644 --- a/source3/lib/tdb/common/transaction.c +++ b/source3/lib/tdb/common/transaction.c @@ -87,13 +87,6 @@ */ -struct tdb_transaction_el { - struct tdb_transaction_el *next, *prev; - tdb_off_t offset; - tdb_len_t length; - unsigned char *data; -}; - /* hold the context of any current transaction */ @@ -105,12 +98,12 @@ struct tdb_transaction { /* the original io methods - used to do IOs to the real db */ const struct tdb_methods *io_methods; - /* the list of transaction elements. We use a doubly linked - list with a last pointer to allow us to keep the list - ordered, with first element at the front of the list. It - needs to be doubly linked as the read/write traversals need - to be backwards, while the commit needs to be forwards */ - struct tdb_transaction_el *elements, *elements_last; + /* the list of transaction blocks. When a block is first + written to, it gets created in this list */ + uint8_t **blocks; + uint32_t num_blocks; + uint32_t block_size; /* bytes in each block */ + uint32_t last_block_size; /* number of valid bytes in the last block */ /* non-zero when an internal transaction error has occurred. All write operations will then fail until the @@ -134,52 +127,48 @@ struct tdb_transaction { static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb_len_t len, int cv) { - struct tdb_transaction_el *el; - - /* we need to walk the list backwards to get the most recent data */ - for (el=tdb->transaction->elements_last;el;el=el->prev) { - tdb_len_t partial; + uint32_t blk; - if (off+len <= el->offset) { - continue; - } - if (off >= el->offset + el->length) { - continue; + /* break it down into block sized ops */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_read(tdb, off, buf, len2, cv) != 0) { + return -1; } + len -= len2; + off += len2; + buf = (void *)(len2 + (char *)buf); + } - /* an overlapping read - needs to be split into up to - 2 reads and a memcpy */ - if (off < el->offset) { - partial = el->offset - off; - if (transaction_read(tdb, off, buf, partial, cv) != 0) { - goto fail; - } - len -= partial; - off += partial; - buf = (void *)(partial + (char *)buf); - } - if (off + len <= el->offset + el->length) { - partial = len; - } else { - partial = el->offset + el->length - off; - } - memcpy(buf, el->data + (off - el->offset), partial); - if (cv) { - tdb_convert(buf, len); - } - len -= partial; - off += partial; - buf = (void *)(partial + (char *)buf); - - if (len != 0 && transaction_read(tdb, off, buf, len, cv) != 0) { + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + + /* see if we have it in the block list */ + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + /* nope, do a real read */ + if (tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv) != 0) { goto fail; } - return 0; } - /* its not in the transaction elements - do a real read */ - return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv); + /* it is in the block list. Now check for the last block */ + if (blk == tdb->transaction->num_blocks-1) { + if (len > tdb->transaction->last_block_size) { + goto fail; + } + } + + /* now copy it out of this block */ + memcpy(buf, tdb->transaction->blocks[blk] + (off % tdb->transaction->block_size), len); + if (cv) { + tdb_convert(buf, len); + } + return 0; fail: TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len)); @@ -195,12 +184,8 @@ fail: static int transaction_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { - struct tdb_transaction_el *el, *best_el=NULL; + uint32_t blk; - if (len == 0) { - return 0; - } - /* if the write is to a hash head, then update the transaction hash heads */ if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP && @@ -209,106 +194,88 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, memcpy(&tdb->transaction->hash_heads[chain], buf, len); } - /* first see if we can replace an existing entry */ - for (el=tdb->transaction->elements_last;el;el=el->prev) { - tdb_len_t partial; - - if (best_el == NULL && off == el->offset+el->length) { - best_el = el; + /* break it up into block sized chunks */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_write(tdb, off, buf, len2) != 0) { + return -1; } - - if (off+len <= el->offset) { - continue; - } - if (off >= el->offset + el->length) { - continue; + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); } + } - /* an overlapping write - needs to be split into up to - 2 writes and a memcpy */ - if (off < el->offset) { - partial = el->offset - off; - if (transaction_write(tdb, off, buf, partial) != 0) { - goto fail; - } - len -= partial; - off += partial; - buf = (const void *)(partial + (const char *)buf); - } - if (off + len <= el->offset + el->length) { - partial = len; + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + off = off % tdb->transaction->block_size; + + if (tdb->transaction->num_blocks <= blk) { + uint8_t **new_blocks; + /* expand the blocks array */ + if (tdb->transaction->blocks == NULL) { + new_blocks = malloc((blk+1)*sizeof(uint8_t *)); } else { - partial = el->offset + el->length - off; + new_blocks = realloc(tdb->transaction->blocks, (blk+1)*sizeof(uint8_t *)); } - memcpy(el->data + (off - el->offset), buf, partial); - len -= partial; - off += partial; - buf = (const void *)(partial + (const char *)buf); - - if (len != 0 && transaction_write(tdb, off, buf, len) != 0) { + if (new_blocks == NULL) { + tdb->ecode = TDB_ERR_OOM; goto fail; } - - return 0; + memset(&new_blocks[tdb->transaction->num_blocks], 0, + (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *)); + tdb->transaction->blocks = new_blocks; + tdb->transaction->num_blocks = blk+1; + tdb->transaction->last_block_size = 0; } - /* see if we can append the new entry to an existing entry */ - if (best_el && best_el->offset + best_el->length == off && - (off+len < tdb->transaction->old_map_size || - off > tdb->transaction->old_map_size)) { - unsigned char *data = best_el->data; - el = best_el; - el->data = (unsigned char *)realloc(el->data, - el->length + len); - if (el->data == NULL) { + /* allocate and fill a block? */ + if (tdb->transaction->blocks[blk] == NULL) { + tdb->transaction->blocks[blk] = (uint8_t *)calloc(tdb->transaction->block_size, 1); + if (tdb->transaction->blocks[blk] == NULL) { tdb->ecode = TDB_ERR_OOM; tdb->transaction->transaction_error = 1; - el->data = data; - return -1; + return -1; } - if (buf) { - memcpy(el->data + el->length, buf, len); - } else { - memset(el->data + el->length, TDB_PAD_BYTE, len); + if (tdb->transaction->old_map_size > blk * tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size; + if (len2 + (blk * tdb->transaction->block_size) > tdb->transaction->old_map_size) { + len2 = tdb->transaction->old_map_size - (blk * tdb->transaction->block_size); + } + if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size, + tdb->transaction->blocks[blk], + len2, 0) != 0) { + SAFE_FREE(tdb->transaction->blocks[blk]); + tdb->ecode = TDB_ERR_IO; + goto fail; + } + if (blk == tdb->transaction->num_blocks-1) { + tdb->transaction->last_block_size = len2; + } } - el->length += len; - return 0; } - - /* add a new entry at the end of the list */ - el = (struct tdb_transaction_el *)malloc(sizeof(*el)); - if (el == NULL) { - tdb->ecode = TDB_ERR_OOM; - tdb->transaction->transaction_error = 1; - return -1; - } - el->next = NULL; - el->prev = tdb->transaction->elements_last; - el->offset = off; - el->length = len; - el->data = (unsigned char *)malloc(len); - if (el->data == NULL) { - free(el); - tdb->ecode = TDB_ERR_OOM; - tdb->transaction->transaction_error = 1; - return -1; - } - if (buf) { - memcpy(el->data, buf, len); + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(tdb->transaction->blocks[blk] + off, 0, len); } else { - memset(el->data, TDB_PAD_BYTE, len); + memcpy(tdb->transaction->blocks[blk] + off, buf, len); } - if (el->prev) { - el->prev->next = el; - } else { - tdb->transaction->elements = el; + if (blk == tdb->transaction->num_blocks-1) { + if (len + off > tdb->transaction->last_block_size) { + tdb->transaction->last_block_size = len + off; + } } - tdb->transaction->elements_last = el; + return 0; fail: - TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", off, len)); - tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", + (blk*tdb->transaction->block_size) + off, len)); tdb->transaction->transaction_error = 1; return -1; } @@ -419,10 +386,14 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } + /* a page at a time seems like a reasonable compromise between compactness and efficiency */ + tdb->transaction->block_size = tdb->page_size; + /* get the transaction write lock. This is a blocking lock. As discussed with Volker, there are a number of ways we could make this async, which we will probably do in the future */ if (tdb_transaction_lock(tdb, F_WRLCK) == -1) { + SAFE_FREE(tdb->transaction->blocks); SAFE_FREE(tdb->transaction); return -1; } @@ -460,21 +431,12 @@ int tdb_transaction_start(struct tdb_context *tdb) tdb->transaction->io_methods = tdb->methods; tdb->methods = &transaction_methods; - /* by calling this transaction write here, we ensure that we don't grow the - transaction linked list due to hash table updates */ - if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads, - TDB_HASHTABLE_SIZE(tdb)) != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n")); - tdb->ecode = TDB_ERR_IO; - tdb->methods = tdb->transaction->io_methods; - goto fail; - } - return 0; fail: tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); tdb_transaction_unlock(tdb); + SAFE_FREE(tdb->transaction->blocks); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); return -1; @@ -486,6 +448,8 @@ fail: */ int tdb_transaction_cancel(struct tdb_context *tdb) { + int i; + if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n")); return -1; @@ -499,13 +463,13 @@ int tdb_transaction_cancel(struct tdb_context *tdb) tdb->map_size = tdb->transaction->old_map_size; - /* free all the transaction elements */ - while (tdb->transaction->elements) { - struct tdb_transaction_el *el = tdb->transaction->elements; - tdb->transaction->elements = el->next; - free(el->data); - free(el); + /* free all the transaction blocks */ + for (i=0;itransaction->num_blocks;i++) { + if (tdb->transaction->blocks[i] != NULL) { + free(tdb->transaction->blocks[i]); + } } + SAFE_FREE(tdb->transaction->blocks); /* remove any global lock created during the transaction */ if (tdb->global_lock.count != 0) { @@ -515,7 +479,6 @@ int tdb_transaction_cancel(struct tdb_context *tdb) /* remove any locks created during the transaction */ if (tdb->num_locks != 0) { - int i; for (i=0;inum_lockrecs;i++) { tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list, F_UNLCK,F_SETLKW, 0, 1); @@ -567,16 +530,24 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t */ static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) { - struct tdb_transaction_el *el; tdb_len_t recovery_size = 0; + int i; recovery_size = sizeof(uint32_t); - for (el=tdb->transaction->elements;el;el=el->next) { - if (el->offset >= tdb->transaction->old_map_size) { + for (i=0;itransaction->num_blocks;i++) { + if (i * tdb->transaction->block_size >= tdb->transaction->old_map_size) { + break; + } + if (tdb->transaction->blocks[i] == NULL) { continue; } - recovery_size += 2*sizeof(tdb_off_t) + el->length; - } + recovery_size += 2*sizeof(tdb_off_t); + if (i == tdb->transaction->num_blocks-1) { + recovery_size += tdb->transaction->last_block_size; + } else { + recovery_size += tdb->transaction->block_size; + } + } return recovery_size; } @@ -669,7 +640,6 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, static int transaction_setup_recovery(struct tdb_context *tdb, tdb_off_t *magic_offset) { - struct tdb_transaction_el *el; tdb_len_t recovery_size; unsigned char *data, *p; const struct tdb_methods *methods = tdb->transaction->io_methods; @@ -677,6 +647,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb_off_t recovery_offset, recovery_max_size; tdb_off_t old_map_size = tdb->transaction->old_map_size; uint32_t magic, tailer; + int i; /* check that the recovery area has enough space @@ -704,30 +675,43 @@ static int transaction_setup_recovery(struct tdb_context *tdb, /* build the recovery data into a single blob to allow us to do a single large write, which should be more efficient */ p = data + sizeof(*rec); - for (el=tdb->transaction->elements;el;el=el->next) { - if (el->offset >= old_map_size) { + for (i=0;itransaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; + + if (tdb->transaction->blocks[i] == NULL) { continue; } - if (el->offset + el->length > tdb->transaction->old_map_size) { + + offset = i * tdb->transaction->block_size; + length = tdb->transaction->block_size; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (offset >= old_map_size) { + continue; + } + if (offset + length > tdb->transaction->old_map_size) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); free(data); tdb->ecode = TDB_ERR_CORRUPT; return -1; } - memcpy(p, &el->offset, 4); - memcpy(p+4, &el->length, 4); + memcpy(p, &offset, 4); + memcpy(p+4, &length, 4); if (DOCONV()) { tdb_convert(p, 8); } /* the recovery area contains the old data, not the new data, so we have to call the original tdb_read method to get it */ - if (methods->tdb_read(tdb, el->offset, p + 8, el->length, 0) != 0) { + if (methods->tdb_read(tdb, offset, p + 8, length, 0) != 0) { free(data); tdb->ecode = TDB_ERR_IO; return -1; } - p += 8 + el->length; + p += 8 + length; } /* and the tailer */ @@ -780,6 +764,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) const struct tdb_methods *methods; tdb_off_t magic_offset = 0; uint32_t zero = 0; + int i; if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n")); @@ -799,7 +784,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) } /* check for a null transaction */ - if (tdb->transaction->elements == NULL) { + if (tdb->transaction->blocks == NULL) { tdb_transaction_cancel(tdb); return 0; } @@ -858,10 +843,21 @@ int tdb_transaction_commit(struct tdb_context *tdb) } /* perform all the writes */ - while (tdb->transaction->elements) { - struct tdb_transaction_el *el = tdb->transaction->elements; + for (i=0;itransaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; - if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) { + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * tdb->transaction->block_size; + length = tdb->transaction->block_size; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (methods->tdb_write(tdb, offset, tdb->transaction->blocks[i], length) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n")); /* we've overwritten part of the data and @@ -876,11 +872,12 @@ int tdb_transaction_commit(struct tdb_context *tdb) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n")); return -1; } - tdb->transaction->elements = el->next; - free(el->data); - free(el); + SAFE_FREE(tdb->transaction->blocks[i]); } + SAFE_FREE(tdb->transaction->blocks); + tdb->transaction->num_blocks = 0; + if (!(tdb->flags & TDB_NOSYNC)) { /* ensure the new data is on disk */ if (transaction_sync(tdb, 0, tdb->map_size) == -1) { diff --git a/source3/lib/tdb/common/traverse.c b/source3/lib/tdb/common/traverse.c index 27b2cfc54a..07b0c23858 100644 --- a/source3/lib/tdb/common/traverse.c +++ b/source3/lib/tdb/common/traverse.c @@ -241,7 +241,9 @@ int tdb_traverse(struct tdb_context *tdb, return -1; } + tdb->traverse_write++; ret = tdb_traverse_internal(tdb, fn, private_data, &tl); + tdb->traverse_write--; tdb_transaction_unlock(tdb); @@ -333,3 +335,4 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); return key; } + diff --git a/source3/lib/tdb/configure.ac b/source3/lib/tdb/configure.ac index de8fc9c2b5..1085055bc9 100644 --- a/source3/lib/tdb/configure.ac +++ b/source3/lib/tdb/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.50) AC_DEFUN([SMB_MODULE_DEFAULT], [echo -n ""]) AC_DEFUN([SMB_LIBRARY_ENABLE], [echo -n ""]) AC_DEFUN([SMB_ENABLE], [echo -n ""]) -AC_INIT(tdb, 1.1.0) +AC_INIT(tdb, 1.1.1) AC_CONFIG_SRCDIR([common/tdb.c]) AC_CONFIG_HEADER(include/config.h) AC_LIBREPLACE_ALL_CHECKS diff --git a/source3/lib/tdb/include/tdb.h b/source3/lib/tdb/include/tdb.h index fce6628b84..0008085de5 100644 --- a/source3/lib/tdb/include/tdb.h +++ b/source3/lib/tdb/include/tdb.h @@ -135,6 +135,8 @@ int tdb_get_seqnum(struct tdb_context *tdb); int tdb_hash_size(struct tdb_context *tdb); size_t tdb_map_size(struct tdb_context *tdb); int tdb_get_flags(struct tdb_context *tdb); +void tdb_add_flags(struct tdb_context *tdb, unsigned flag); +void tdb_remove_flags(struct tdb_context *tdb, unsigned flag); void tdb_enable_seqnum(struct tdb_context *tdb); void tdb_increment_seqnum_nonblock(struct tdb_context *tdb); @@ -153,6 +155,8 @@ void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *sigptr) void tdb_dump_all(struct tdb_context *tdb); int tdb_printfreelist(struct tdb_context *tdb); int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries); +int tdb_wipe_all(struct tdb_context *tdb); +int tdb_freelist_size(struct tdb_context *tdb); extern TDB_DATA tdb_null; -- cgit From 360673e80e8d46167afb6c393773895b369ff290 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Sat, 12 Jan 2008 00:15:46 -0800 Subject: CID 458. Don't leak dlopen handles on failing to load module. Jeremy. (This used to be commit 8809eaeb154ea12543455f589e31172dc905d83a) --- source3/lib/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'source3/lib') diff --git a/source3/lib/module.c b/source3/lib/module.c index fa06d14b49..285bd9c4c0 100644 --- a/source3/lib/module.c +++ b/source3/lib/module.c @@ -57,6 +57,7 @@ static NTSTATUS do_smb_load_module(const char *module_name, bool is_probe) if (error) { DEBUG(0, ("Error trying to resolve symbol 'init_module' in %s: %s\n", module_name, error)); + sys_dlclose(handle); return NT_STATUS_UNSUCCESSFUL; } @@ -66,6 +67,7 @@ static NTSTATUS do_smb_load_module(const char *module_name, bool is_probe) if (!NT_STATUS_IS_OK(status)) { DEBUG(0, ("Module '%s' initialization failed: %s\n", module_name, get_friendly_nt_error_msg(status))); + sys_dlclose(handle); } return status; -- cgit From 4af27ec877d67afc719bccd350dae33ef8dd62b5 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Sat, 12 Jan 2008 10:38:17 +0100 Subject: Restructure dbwrap_rbt In this low-level code, play tricks to reduce the number of allocations to the possible minimum. I would not recommend this for higher-level code, but here it pays off. (This used to be commit 71b1e6ff1595fbaa8dd49b996c45541531c7e98c) --- source3/lib/dbwrap_rbt.c | 186 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 127 insertions(+), 59 deletions(-) (limited to 'source3/lib') diff --git a/source3/lib/dbwrap_rbt.c b/source3/lib/dbwrap_rbt.c index 93d73f29d1..633b695b52 100644 --- a/source3/lib/dbwrap_rbt.c +++ b/source3/lib/dbwrap_rbt.c @@ -1,7 +1,7 @@ /* Unix SMB/CIFS implementation. Database interface wrapper around red-black trees - Copyright (C) Volker Lendecke 2007 + Copyright (C) Volker Lendecke 2007, 2008 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,6 +20,8 @@ #include "includes.h" #include "rbtree.h" +#define ALIGN(_size_) (((_size_)+15)&~15) + struct db_rbt_ctx { struct rb_root tree; }; @@ -43,6 +45,35 @@ struct db_rbt_node { char data[]; }; +/* + * Hide the ugly pointer calculations in a function + */ + +static struct db_rbt_node *db_rbt2node(struct rb_node *node) +{ + return (struct db_rbt_node *) + ((char *)node - offsetof(struct db_rbt_node, rb_node)); +} + +/* + * Compare two keys + */ + +static int db_rbt_compare(TDB_DATA a, TDB_DATA b) +{ + int res; + + res = memcmp(a.dptr, b.dptr, MIN(a.dsize, b.dsize)); + + if ((res < 0) || ((res == 0) && (a.dsize < b.dsize))) { + return -1; + } + if ((res > 0) || ((res == 0) && (a.dsize > b.dsize))) { + return 1; + } + return 0; +} + /* * dissect a db_rbt_node into its implicit key and value parts */ @@ -58,9 +89,7 @@ static void db_rbt_parse_node(struct db_rbt_node *node, static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) { - struct db_rbt_rec *rec_priv = talloc_get_type_abort( - rec->private_data, struct db_rbt_rec); - + struct db_rbt_rec *rec_priv = (struct db_rbt_rec *)rec->private_data; struct db_rbt_node *node; struct rb_node ** p; @@ -133,22 +162,16 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) parent = (*p); - r = (struct db_rbt_node *) - ((char *)(*p) - offsetof(struct db_rbt_node, rb_node)); + r = db_rbt2node(*p); db_rbt_parse_node(r, &search_key, &search_val); - res = memcmp(this_key.dptr, search_key.dptr, - MIN(this_key.dsize, search_key.dsize)); + res = db_rbt_compare(this_key, search_key); - if ((res < 0) - || ((res == 0) - && (this_key.dsize < search_key.dsize))) { + if (res == -1) { p = &(*p)->rb_left; } - else if ((res > 0) - || ((res == 0) - && (this_key.dsize > search_key.dsize))) { + else if (res == 1) { p = &(*p)->rb_right; } else { @@ -164,8 +187,7 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag) static NTSTATUS db_rbt_delete(struct db_record *rec) { - struct db_rbt_rec *rec_priv = talloc_get_type_abort( - rec->private_data, struct db_rbt_rec); + struct db_rbt_rec *rec_priv = (struct db_rbt_rec *)rec->private_data; if (rec_priv->node == NULL) { return NT_STATUS_OK; @@ -187,85 +209,128 @@ static struct db_record *db_rbt_fetch_locked(struct db_context *db_ctx, struct db_rbt_rec *rec_priv; struct db_record *result; struct rb_node *n; + size_t size; + bool found = false; + struct db_rbt_node *r = NULL; + TDB_DATA search_key = tdb_null, search_val = tdb_null; - result = talloc(mem_ctx, struct db_record); + n = ctx->tree.rb_node; - if (result == NULL) { - return NULL; + while (n != NULL) { + int res; + + r = db_rbt2node(n); + + db_rbt_parse_node(r, &search_key, &search_val); + + res = db_rbt_compare(key, search_key); + + if (res == -1) { + n = n->rb_left; + } + else if (res == 1) { + n = n->rb_right; + } + else { + found = true; + break; + } } - rec_priv = talloc(result, struct db_rbt_rec); + /* + * In this low-level routine, play tricks to reduce the number of + * tallocs to one. Not recommened for general use, but here it pays + * off. + */ - if (rec_priv == NULL) { - TALLOC_FREE(result); + size = ALIGN(sizeof(struct db_record)) + sizeof(struct db_rbt_rec); + + if (!found) { + /* + * We need to keep the key around for later store + */ + size += key.dsize; + } + + result = (struct db_record *)talloc_size(mem_ctx, size); + if (result == NULL) { return NULL; } + rec_priv = (struct db_rbt_rec *) + ((char *)result + ALIGN(sizeof(struct db_record))); rec_priv->db_ctx = ctx; result->store = db_rbt_store; result->delete_rec = db_rbt_delete; result->private_data = rec_priv; + if (found) { + rec_priv->node = r; + result->key = search_key; + result->value = search_val; + } + else { + rec_priv->node = NULL; + result->key.dptr = (uint8 *) + ((char *)rec_priv + sizeof(*rec_priv)); + result->key.dsize = key.dsize; + memcpy(result->key.dptr, key.dptr, key.dsize); + + result->value = tdb_null; + } + + return result; +} + +static int db_rbt_fetch(struct db_context *db, TALLOC_CTX *mem_ctx, + TDB_DATA key, TDB_DATA *data) +{ + struct db_rbt_ctx *ctx = talloc_get_type_abort( + db->private_data, struct db_rbt_ctx); + + struct rb_node *n; + bool found = false; + struct db_rbt_node *r = NULL; + TDB_DATA search_key, search_val; + uint8_t *result; + n = ctx->tree.rb_node; while (n != NULL) { - struct db_rbt_node *r; - TDB_DATA search_key, search_val; int res; - r = (struct db_rbt_node *) - ((char *)n - offsetof(struct db_rbt_node, rb_node)); + r = db_rbt2node(n); db_rbt_parse_node(r, &search_key, &search_val); - res = memcmp(key.dptr, search_key.dptr, - MIN(key.dsize, search_key.dsize)); + res = db_rbt_compare(key, search_key); - if ((res < 0) - || ((res == 0) && (key.dsize < search_key.dsize))) { + if (res == -1) { n = n->rb_left; } - else if ((res > 0) - || ((res == 0) && (key.dsize > search_key.dsize))) { + else if (res == 1) { n = n->rb_right; } else { - rec_priv->node = r; - result->key = search_key; - result->value = search_val; - return result; + found = true; + break; } } - result->key.dsize = key.dsize; - result->key.dptr = (uint8_t *)talloc_memdup( - result, key.dptr, key.dsize); - - if (result->key.dptr == NULL) { - TALLOC_FREE(result); - return NULL; + if (!found) { + *data = tdb_null; + return 0; } - rec_priv->node = NULL; - result->value.dsize = 0; - result->value.dptr = NULL; - return result; -} - -static int db_rbt_fetch(struct db_context *db, TALLOC_CTX *mem_ctx, - TDB_DATA key, TDB_DATA *data) -{ - struct db_record *rec; - - if (!(rec = db->fetch_locked(db, mem_ctx, key))) { + result = (uint8 *)talloc_memdup(mem_ctx, search_val.dptr, + search_val.dsize); + if (result == NULL) { return -1; } - data->dsize = rec->value.dsize; - data->dptr = (uint8 *)talloc_memdup(mem_ctx, rec->value.dptr, - rec->value.dsize); - TALLOC_FREE(rec); + data->dptr = result; + data->dsize = search_val.dsize; return 0; } @@ -275,6 +340,9 @@ static int db_rbt_traverse(struct db_context *db, void *private_data), void *private_data) { + /* + * Nobody uses this so far, and unused code is broken code :-) + */ return -1; } -- cgit From 1ee6d3e1ee56554d83437a8c79cb169a26732154 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Sun, 13 Jan 2008 01:40:05 +0100 Subject: Introduce a libnet_conf context created by libnet_conf_open(). The libnet_conf_ctx stores the information necessary to interoperate with the configuration. It is created by calling libnet_conf_open() and destroyed by calling libnet_conf_close(). The context is passed to all the libnet_conf functions. It currently stores the token to access the registry. Later, it could store more data, e.g. the server to connect to, credentials, and so on. For support of other backends than registry or support of remote configuration, only the open function will have to be changed. In net_conf, the calls to the actual net_conf functions is wrapped into a function that calls libnet_conf_open()/_close(). Thus an individual variant of net_conf_runfunction2() and functable2 is used to cope with functions being called by the wrapper with the additional libnet_conf_ctx argument. Michael (This used to be commit c2a9346faa26e79af5948197a1b322e545f0ed09) --- source3/lib/netapi/serverinfo.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'source3/lib') diff --git a/source3/lib/netapi/serverinfo.c b/source3/lib/netapi/serverinfo.c index 0e356e0ee7..67680ba55a 100644 --- a/source3/lib/netapi/serverinfo.c +++ b/source3/lib/netapi/serverinfo.c @@ -149,6 +149,10 @@ static WERROR NetServerSetInfoLocal_1005(struct libnetapi_ctx *ctx, uint8_t *buffer, uint32_t *parm_error) { + WERROR werr; + struct libnet_conf_ctx *conf_ctx; + TALLOC_CTX *mem_ctx; + struct srvsvc_NetSrvInfo1005 *info1005; if (!buffer) { @@ -167,8 +171,20 @@ static WERROR NetServerSetInfoLocal_1005(struct libnetapi_ctx *ctx, return WERR_NOT_SUPPORTED; } - return libnet_conf_set_global_parameter("server string", + mem_ctx = talloc_stackframe(); + werr = libnet_conf_open(mem_ctx, &conf_ctx); + if (!W_ERROR_IS_OK(werr)) { + goto done; + } + + werr = libnet_conf_set_global_parameter(conf_ctx, + "server string", info1005->comment); + +done: + libnet_conf_close(conf_ctx); + TALLOC_FREE(mem_ctx); + return werr; } static WERROR NetServerSetInfoLocal(struct libnetapi_ctx *ctx, -- cgit From 5b6520044b7026baf1cff554b400f7ff1f024d77 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 9 Jan 2008 23:00:48 +0100 Subject: Fix the max_dead_record calculations (This used to be commit 4aaf4e7e73a5c7fa97ef730fbff5c7cb12df2d6c) --- source3/lib/tdb/common/open.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'source3/lib') diff --git a/source3/lib/tdb/common/open.c b/source3/lib/tdb/common/open.c index 6bd8fda2bf..94140a4baa 100644 --- a/source3/lib/tdb/common/open.c +++ b/source3/lib/tdb/common/open.c @@ -179,9 +179,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->page_size = 0x2000; } - if (open_flags & TDB_VOLATILE) { - tdb->max_dead_records = 5; - } + tdb->max_dead_records = (open_flags & TDB_VOLATILE) ? 5 : 0; if ((open_flags & O_ACCMODE) == O_WRONLY) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n", @@ -288,7 +286,6 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->map_size = st.st_size; tdb->device = st.st_dev; tdb->inode = st.st_ino; - tdb->max_dead_records = 0; tdb_mmap(tdb); if (locked) { if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) { -- cgit From a1902700c45935f0a1071100810e91142ceb273e Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 14 Jan 2008 15:38:43 +1100 Subject: merged tdb fix from ctdb tree (This used to be commit c91e9c785b5bf4b0c479edf8eb33da22bf615387) --- source3/lib/tdb/common/tdb.c | 47 ++++++++++++++++++++++ source3/lib/tdb/common/transaction.c | 75 ++++++++++++++++++++++++++++++++++++ source3/lib/tdb/include/tdb.h | 1 + 3 files changed, 123 insertions(+) (limited to 'source3/lib') diff --git a/source3/lib/tdb/common/tdb.c b/source3/lib/tdb/common/tdb.c index bf3abb71ac..fd4e1cc8af 100644 --- a/source3/lib/tdb/common/tdb.c +++ b/source3/lib/tdb/common/tdb.c @@ -715,6 +715,11 @@ int tdb_wipe_all(struct tdb_context *tdb) goto failed; } + if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write recovery head\n")); + goto failed; + } + /* add all the rest of the file to the freelist */ data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct); if (data_len > 0) { @@ -738,3 +743,45 @@ failed: tdb_unlockall(tdb); return -1; } + + +/* + validate the integrity of all tdb hash chains. Useful when debugging + */ +int tdb_validate(struct tdb_context *tdb) +{ + int h; + for (h=-1;h<(int)tdb->header.hash_size;h++) { + tdb_off_t rec_ptr; + uint32_t count = 0; + if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &rec_ptr) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read at top of hash %d\n", h)); + return -1; + } + while (rec_ptr) { + struct list_struct r; + tdb_off_t size; + + if (tdb_rec_read(tdb, rec_ptr, &r) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed rec_read h=%d rec_ptr=%u count=%u\n", + h, rec_ptr, count)); + return -1; + } + if (tdb_ofs_read(tdb, rec_ptr + sizeof(r) + r.rec_len - sizeof(tdb_off_t), &size) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read h=%d rec_ptr=%u count=%u\n", + h, rec_ptr, count)); + return -1; + } + if (size != r.rec_len + sizeof(r)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed size check size=%u h=%d rec_ptr=%u count=%u\n", + size, h, rec_ptr, count)); + return -1; + } + rec_ptr = r.next; + count++; + } + } + return 0; +} + + diff --git a/source3/lib/tdb/common/transaction.c b/source3/lib/tdb/common/transaction.c index 2da09face0..0ecfb9b7ff 100644 --- a/source3/lib/tdb/common/transaction.c +++ b/source3/lib/tdb/common/transaction.c @@ -87,6 +87,7 @@ */ + /* hold the context of any current transaction */ @@ -280,6 +281,63 @@ fail: return -1; } + +/* + write while in a transaction - this varient never expands the transaction blocks, it only + updates existing blocks. This means it cannot change the recovery size +*/ +static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + uint32_t blk; + + /* break it up into block sized chunks */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_write_existing(tdb, off, buf, len2) != 0) { + return -1; + } + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); + } + } + + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + off = off % tdb->transaction->block_size; + + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + return 0; + } + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(tdb->transaction->blocks[blk] + off, 0, len); + } else { + memcpy(tdb->transaction->blocks[blk] + off, buf, len); + } + if (blk == tdb->transaction->num_blocks-1) { + if (len + off > tdb->transaction->last_block_size) { + tdb->transaction->last_block_size = len + off; + } + } + + return 0; + +fail: + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", + (blk*tdb->transaction->block_size) + off, len)); + tdb->transaction->transaction_error = 1; + return -1; +} + + /* accelerated hash chain head search, using the cached hash heads */ @@ -629,6 +687,10 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); return -1; } + if (transaction_write_existing(tdb, TDB_RECOVERY_HEAD, &recovery_head, sizeof(tdb_off_t)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); + return -1; + } return 0; } @@ -726,6 +788,12 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb->ecode = TDB_ERR_IO; return -1; } + if (transaction_write_existing(tdb, recovery_offset, data, sizeof(*rec) + recovery_size) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write secondary recovery data\n")); + free(data); + tdb->ecode = TDB_ERR_IO; + return -1; + } /* as we don't have ordered writes, we have to sync the recovery data before we update the magic to indicate that the recovery @@ -747,6 +815,11 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb->ecode = TDB_ERR_IO; return -1; } + if (transaction_write_existing(tdb, *magic_offset, &magic, sizeof(magic)) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: failed to write secondary recovery magic\n")); + tdb->ecode = TDB_ERR_IO; + return -1; + } /* ensure the recovery magic marker is on disk */ if (transaction_sync(tdb, *magic_offset, sizeof(magic)) == -1) { @@ -778,6 +851,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) return -1; } + if (tdb->transaction->nesting != 0) { tdb->transaction->nesting--; return 0; @@ -916,6 +990,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) /* use a transaction cancel to free memory and remove the transaction locks */ tdb_transaction_cancel(tdb); + return 0; } diff --git a/source3/lib/tdb/include/tdb.h b/source3/lib/tdb/include/tdb.h index 0008085de5..daa2d43135 100644 --- a/source3/lib/tdb/include/tdb.h +++ b/source3/lib/tdb/include/tdb.h @@ -157,6 +157,7 @@ int tdb_printfreelist(struct tdb_context *tdb); int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries); int tdb_wipe_all(struct tdb_context *tdb); int tdb_freelist_size(struct tdb_context *tdb); +int tdb_validate(struct tdb_context *tdb); extern TDB_DATA tdb_null; -- cgit From 63b1c765de439b295c73430be14f8656873bfad7 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 14 Jan 2008 16:26:52 +1100 Subject: samba3 already has tdb_validate() (This used to be commit 9f002eeff9bcf33a00e5641cb14338e866a17afe) --- source3/lib/tdb/common/tdb.c | 42 ------------------------------------------ source3/lib/tdb/include/tdb.h | 1 - 2 files changed, 43 deletions(-) (limited to 'source3/lib') diff --git a/source3/lib/tdb/common/tdb.c b/source3/lib/tdb/common/tdb.c index fd4e1cc8af..ea5d9ccc60 100644 --- a/source3/lib/tdb/common/tdb.c +++ b/source3/lib/tdb/common/tdb.c @@ -743,45 +743,3 @@ failed: tdb_unlockall(tdb); return -1; } - - -/* - validate the integrity of all tdb hash chains. Useful when debugging - */ -int tdb_validate(struct tdb_context *tdb) -{ - int h; - for (h=-1;h<(int)tdb->header.hash_size;h++) { - tdb_off_t rec_ptr; - uint32_t count = 0; - if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &rec_ptr) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read at top of hash %d\n", h)); - return -1; - } - while (rec_ptr) { - struct list_struct r; - tdb_off_t size; - - if (tdb_rec_read(tdb, rec_ptr, &r) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed rec_read h=%d rec_ptr=%u count=%u\n", - h, rec_ptr, count)); - return -1; - } - if (tdb_ofs_read(tdb, rec_ptr + sizeof(r) + r.rec_len - sizeof(tdb_off_t), &size) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read h=%d rec_ptr=%u count=%u\n", - h, rec_ptr, count)); - return -1; - } - if (size != r.rec_len + sizeof(r)) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed size check size=%u h=%d rec_ptr=%u count=%u\n", - size, h, rec_ptr, count)); - return -1; - } - rec_ptr = r.next; - count++; - } - } - return 0; -} - - diff --git a/source3/lib/tdb/include/tdb.h b/source3/lib/tdb/include/tdb.h index daa2d43135..0008085de5 100644 --- a/source3/lib/tdb/include/tdb.h +++ b/source3/lib/tdb/include/tdb.h @@ -157,7 +157,6 @@ int tdb_printfreelist(struct tdb_context *tdb); int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries); int tdb_wipe_all(struct tdb_context *tdb); int tdb_freelist_size(struct tdb_context *tdb); -int tdb_validate(struct tdb_context *tdb); extern TDB_DATA tdb_null; -- cgit From edd4cb0373a668c422b3aa2a460c1004682f3d1d Mon Sep 17 00:00:00 2001 From: Alexander Bokovoy Date: Mon, 14 Jan 2008 21:32:59 +0300 Subject: Fix crash in winbind clients: instead of talloc-based pointer we passed address of a local variable. (This used to be commit a861ff20917eeca303e2d36de71cd8614e937d5f) --- source3/lib/winbind_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source3/lib') diff --git a/source3/lib/winbind_util.c b/source3/lib/winbind_util.c index 3cf068a6e0..14356b09cf 100644 --- a/source3/lib/winbind_util.c +++ b/source3/lib/winbind_util.c @@ -201,7 +201,7 @@ bool winbind_lookup_rids(TALLOC_CTX *mem_ctx, *types = TALLOC_ARRAY(mem_ctx, enum lsa_SidType, num_rids); for(i=0; i