diff options
Diffstat (limited to 'source3/lib/tdb/common')
-rw-r--r-- | source3/lib/tdb/common/freelist.c | 85 | ||||
-rw-r--r-- | source3/lib/tdb/common/open.c | 3 | ||||
-rw-r--r-- | source3/lib/tdb/common/tdb.c | 83 | ||||
-rw-r--r-- | source3/lib/tdb/common/tdb_private.h | 2 | ||||
-rw-r--r-- | source3/lib/tdb/common/transaction.c | 30 |
5 files changed, 129 insertions, 74 deletions
diff --git a/source3/lib/tdb/common/freelist.c b/source3/lib/tdb/common/freelist.c index c086c151fa..2f2a4c379b 100644 --- a/source3/lib/tdb/common/freelist.c +++ b/source3/lib/tdb/common/freelist.c @@ -208,62 +208,61 @@ update: } + /* the core of tdb_allocate - called when we have decided which free list entry to use + + Note that we try to allocate by grabbing data from the end of an existing record, + not the beginning. This is so the left merge in a free is more likely to be + able to free up the record without fragmentation */ -static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr, - struct list_struct *rec, tdb_off_t last_ptr) +static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, + tdb_len_t length, tdb_off_t rec_ptr, + struct list_struct *rec, tdb_off_t last_ptr) { - struct list_struct newrec; - tdb_off_t newrec_ptr; +#define MIN_REC_SIZE (sizeof(struct list_struct) + sizeof(tdb_off_t) + 8) - memset(&newrec, '\0', sizeof(newrec)); + if (rec->rec_len < length + MIN_REC_SIZE) { + /* we have to grab the whole record */ - /* found it - now possibly split it up */ - if (rec->rec_len > length + MIN_REC_SIZE) { - /* Length of left piece */ - length = TDB_ALIGN(length, TDB_ALIGNMENT); - - /* Right piece to go on free list */ - newrec.rec_len = rec->rec_len - (sizeof(*rec) + length); - newrec_ptr = rec_ptr + sizeof(*rec) + length; - - /* And left record is shortened */ - rec->rec_len = length; - } else { - newrec_ptr = 0; + /* unlink it from the previous record */ + if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) { + return 0; + } + + /* mark it not free */ + rec->magic = TDB_MAGIC; + if (tdb_rec_write(tdb, rec_ptr, rec) == -1) { + return 0; + } + return rec_ptr; + } + + /* we're going to just shorten the existing record */ + rec->rec_len -= (length + sizeof(*rec)); + if (tdb_rec_write(tdb, rec_ptr, rec) == -1) { + return 0; } - - /* Remove allocated record from the free list */ - if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) { + if (update_tailer(tdb, rec_ptr, rec) == -1) { return 0; } - - /* Update header: do this before we drop alloc - lock, otherwise tdb_free() might try to - merge with us, thinking we're free. - (Thanks Jeremy Allison). */ + + /* and setup the new record */ + rec_ptr += sizeof(*rec) + rec->rec_len; + + memset(rec, '\0', sizeof(*rec)); + rec->rec_len = length; rec->magic = TDB_MAGIC; + if (tdb_rec_write(tdb, rec_ptr, rec) == -1) { return 0; } - - /* Did we create new block? */ - if (newrec_ptr) { - /* Update allocated record tailer (we - shortened it). */ - if (update_tailer(tdb, rec_ptr, rec) == -1) { - return 0; - } - - /* Free new record */ - if (tdb_free(tdb, newrec_ptr, &newrec) == -1) { - return 0; - } + + if (update_tailer(tdb, rec_ptr, rec) == -1) { + return 0; } - - /* all done - return the new record offset */ + return rec_ptr; } @@ -287,6 +286,7 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st /* Extra bytes required for tailer */ length += sizeof(tdb_off_t); + length = TDB_ALIGN(length, TDB_ALIGNMENT); again: last_ptr = FREELIST_TOP; @@ -343,7 +343,8 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st goto fail; } - newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr); + newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, + rec, bestfit.last_ptr); tdb_unlock(tdb, -1, F_WRLCK); return newrec_ptr; } diff --git a/source3/lib/tdb/common/open.c b/source3/lib/tdb/common/open.c index 94140a4baa..b19e4cea29 100644 --- a/source3/lib/tdb/common/open.c +++ b/source3/lib/tdb/common/open.c @@ -179,7 +179,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->page_size = 0x2000; } - tdb->max_dead_records = (open_flags & TDB_VOLATILE) ? 5 : 0; + tdb->max_dead_records = (tdb_flags & TDB_VOLATILE) ? 5 : 0; if ((open_flags & O_ACCMODE) == O_WRONLY) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n", @@ -227,6 +227,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, /* we need to zero database if we are the only one with it open */ if ((tdb_flags & TDB_CLEAR_IF_FIRST) && + (!tdb->read_only) && (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) { open_flags |= O_CREAT; if (ftruncate(tdb->fd, 0) == -1) { diff --git a/source3/lib/tdb/common/tdb.c b/source3/lib/tdb/common/tdb.c index ea5d9ccc60..767452c9b3 100644 --- a/source3/lib/tdb/common/tdb.c +++ b/source3/lib/tdb/common/tdb.c @@ -687,20 +687,66 @@ void tdb_enable_seqnum(struct tdb_context *tdb) /* + add a region of the file to the freelist. Length is the size of the region in bytes, + which includes the free list header that needs to be added + */ +static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length) +{ + struct list_struct rec; + if (length <= sizeof(rec)) { + /* the region is not worth adding */ + return 0; + } + if (length + offset > tdb->map_size) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n")); + return -1; + } + memset(&rec,'\0',sizeof(rec)); + rec.rec_len = length - sizeof(rec); + if (tdb_free(tdb, offset, &rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n")); + return -1; + } + return 0; +} + +/* wipe the entire database, deleting all records. This can be done very fast by using a global lock. The entire data portion of the file becomes a single entry in the freelist. + + This code carefully steps around the recovery area, leaving it alone */ int tdb_wipe_all(struct tdb_context *tdb) { int i; tdb_off_t offset = 0; ssize_t data_len; + tdb_off_t recovery_head; + tdb_len_t recovery_size = 0; if (tdb_lockall(tdb) != 0) { return -1; } + /* see if the tdb has a recovery area, and remember its size + if so. We don't want to lose this as otherwise each + tdb_wipe_all() in a transaction will increase the size of + the tdb by the size of the recovery area */ + if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n")); + goto failed; + } + + if (recovery_head != 0) { + struct list_struct rec; + if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n")); + return -1; + } + recovery_size = rec.rec_len + sizeof(rec); + } + /* wipe the hashes */ for (i=0;i<tdb->header.hash_size;i++) { if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) { @@ -715,19 +761,30 @@ int tdb_wipe_all(struct tdb_context *tdb) goto failed; } - if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &offset) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write recovery head\n")); - goto failed; - } - - /* add all the rest of the file to the freelist */ - data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct); - if (data_len > 0) { - struct list_struct rec; - memset(&rec,'\0',sizeof(rec)); - rec.rec_len = data_len; - if (tdb_free(tdb, TDB_DATA_START(tdb->header.hash_size), &rec) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add free record\n")); + /* add all the rest of the file to the freelist, possibly leaving a gap + for the recovery area */ + if (recovery_size == 0) { + /* the simple case - the whole file can be used as a freelist */ + data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)); + if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) { + goto failed; + } + } else { + /* we need to add two freelist entries - one on either + side of the recovery area + + Note that we cannot shift the recovery area during + this operation. Only the transaction.c code may + move the recovery area or we risk subtle data + corruption + */ + data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size)); + if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) { + goto failed; + } + /* and the 2nd free list entry after the recovery area - if any */ + data_len = tdb->map_size - (recovery_head+recovery_size); + if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) { goto failed; } } diff --git a/source3/lib/tdb/common/tdb_private.h b/source3/lib/tdb/common/tdb_private.h index 63a6d04e72..ffac89ff0e 100644 --- a/source3/lib/tdb/common/tdb_private.h +++ b/source3/lib/tdb/common/tdb_private.h @@ -49,7 +49,6 @@ typedef uint32_t tdb_off_t; #define TDB_DEAD_MAGIC (0xFEE1DEAD) #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) #define TDB_ALIGNMENT 4 -#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT) #define DEFAULT_HASH_SIZE 131 #define FREELIST_TOP (sizeof(struct tdb_header)) #define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1)) @@ -178,6 +177,7 @@ struct tdb_context { int tdb_munmap(struct tdb_context *tdb); void tdb_mmap(struct tdb_context *tdb); int tdb_lock(struct tdb_context *tdb, int list, int ltype); +int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype); int tdb_unlock(struct tdb_context *tdb, int list, int ltype); int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len); int tdb_transaction_lock(struct tdb_context *tdb, int ltype); diff --git a/source3/lib/tdb/common/transaction.c b/source3/lib/tdb/common/transaction.c index 0ecfb9b7ff..4e2127be64 100644 --- a/source3/lib/tdb/common/transaction.c +++ b/source3/lib/tdb/common/transaction.c @@ -219,9 +219,12 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, uint8_t **new_blocks; /* expand the blocks array */ if (tdb->transaction->blocks == NULL) { - new_blocks = malloc((blk+1)*sizeof(uint8_t *)); + new_blocks = (uint8_t **)malloc( + (blk+1)*sizeof(uint8_t *)); } else { - new_blocks = realloc(tdb->transaction->blocks, (blk+1)*sizeof(uint8_t *)); + new_blocks = (uint8_t **)realloc( + tdb->transaction->blocks, + (blk+1)*sizeof(uint8_t *)); } if (new_blocks == NULL) { tdb->ecode = TDB_ERR_OOM; @@ -316,25 +319,18 @@ static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, return 0; } - /* overwrite part of an existing block */ - if (buf == NULL) { - memset(tdb->transaction->blocks[blk] + off, 0, len); - } else { - memcpy(tdb->transaction->blocks[blk] + off, buf, len); - } - if (blk == tdb->transaction->num_blocks-1) { - if (len + off > tdb->transaction->last_block_size) { - tdb->transaction->last_block_size = len + off; + if (blk == tdb->transaction->num_blocks-1 && + off + len > tdb->transaction->last_block_size) { + if (off >= tdb->transaction->last_block_size) { + return 0; } + len = tdb->transaction->last_block_size - off; } - return 0; + /* overwrite part of an existing block */ + memcpy(tdb->transaction->blocks[blk] + off, buf, len); -fail: - TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", - (blk*tdb->transaction->block_size) + off, len)); - tdb->transaction->transaction_error = 1; - return -1; + return 0; } |