diff options
-rw-r--r-- | source4/lib/tdb/tdb.c | 145 |
1 files changed, 97 insertions, 48 deletions
diff --git a/source4/lib/tdb/tdb.c b/source4/lib/tdb/tdb.c index 47ba2cb52c..c8ac7babad 100644 --- a/source4/lib/tdb/tdb.c +++ b/source4/lib/tdb/tdb.c @@ -530,7 +530,7 @@ static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset) return 0; } - printf(" rec: offset=%u next=%d rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n", + printf(" rec: offset=0x%08x next=0x%08x rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n", offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic); tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off); @@ -609,7 +609,8 @@ int tdb_printfreelist(TDB_CONTEXT *tdb) return -1; } - printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)]\n", rec.next, rec.rec_len, rec.rec_len ); + printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n", + rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len); total_free += rec.rec_len; /* move to the next record */ @@ -850,6 +851,66 @@ static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size) return -1; } + +/* + the core of tdb_allocate - called when we have decided which + free list entry to use + */ +static tdb_off tdb_allocate_ofs(TDB_CONTEXT *tdb, tdb_len length, tdb_off rec_ptr, + struct list_struct *rec, tdb_off last_ptr) +{ + struct list_struct newrec; + tdb_off newrec_ptr; + + memset(&newrec, '\0', sizeof(newrec)); + + /* found it - now possibly split it up */ + if (rec->rec_len > length + MIN_REC_SIZE) { + /* Length of left piece */ + length = TDB_ALIGN(length, TDB_ALIGNMENT); + + /* Right piece to go on free list */ + newrec.rec_len = rec->rec_len - (sizeof(*rec) + length); + newrec_ptr = rec_ptr + sizeof(*rec) + length; + + /* And left record is shortened */ + rec->rec_len = length; + } else { + newrec_ptr = 0; + } + + /* Remove allocated record from the free list */ + if (ofs_write(tdb, last_ptr, &rec->next) == -1) { + return 0; + } + + /* Update header: do this before we drop alloc + lock, otherwise tdb_free() might try to + merge with us, thinking we're free. + (Thanks Jeremy Allison). */ + rec->magic = TDB_MAGIC; + if (rec_write(tdb, rec_ptr, rec) == -1) { + return 0; + } + + /* Did we create new block? */ + if (newrec_ptr) { + /* Update allocated record tailer (we + shortened it). */ + if (update_tailer(tdb, rec_ptr, rec) == -1) { + return 0; + } + + /* Free new record */ + if (tdb_free(tdb, newrec_ptr, &newrec) == -1) { + return 0; + } + } + + /* all done - return the new record offset */ + return rec_ptr; +} + /* allocate some space from the free list. The offset returned points to a unconnected list_struct within the database with room for at least length bytes of total data @@ -860,9 +921,10 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length, struct list_struct *rec) { tdb_off rec_ptr, last_ptr, newrec_ptr; - struct list_struct newrec; - - memset(&newrec, '\0', sizeof(newrec)); + struct { + tdb_off rec_ptr, last_ptr; + tdb_len rec_len; + } bestfit; if (tdb_lock(tdb, -1, F_WRLCK) == -1) return 0; @@ -877,59 +939,46 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length, if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) goto fail; - /* keep looking until we find a freelist record big enough */ + bestfit.rec_ptr = 0; + + /* + this is a best fit allocation strategy. Originally we used + a first fit strategy, but it suffered from massive fragmentation + issues when faced with a slowly increasing record size. + */ while (rec_ptr) { - if (rec_free_read(tdb, rec_ptr, rec) == -1) + if (rec_free_read(tdb, rec_ptr, rec) == -1) { goto fail; + } if (rec->rec_len >= length) { - /* found it - now possibly split it up */ - if (rec->rec_len > length + MIN_REC_SIZE) { - /* Length of left piece */ - length = TDB_ALIGN(length, TDB_ALIGNMENT); - - /* Right piece to go on free list */ - newrec.rec_len = rec->rec_len - - (sizeof(*rec) + length); - newrec_ptr = rec_ptr + sizeof(*rec) + length; - - /* And left record is shortened */ - rec->rec_len = length; - } else - newrec_ptr = 0; - - /* Remove allocated record from the free list */ - if (ofs_write(tdb, last_ptr, &rec->next) == -1) - goto fail; - - /* Update header: do this before we drop alloc - lock, otherwise tdb_free() might try to - merge with us, thinking we're free. - (Thanks Jeremy Allison). */ - rec->magic = TDB_MAGIC; - if (rec_write(tdb, rec_ptr, rec) == -1) - goto fail; - - /* Did we create new block? */ - if (newrec_ptr) { - /* Update allocated record tailer (we - shortened it). */ - if (update_tailer(tdb, rec_ptr, rec) == -1) - goto fail; - - /* Free new record */ - if (tdb_free(tdb, newrec_ptr, &newrec) == -1) - goto fail; + if (bestfit.rec_ptr == 0 || + rec->rec_len < bestfit.rec_len) { + bestfit.rec_len = rec->rec_len; + bestfit.rec_ptr = rec_ptr; + bestfit.last_ptr = last_ptr; + /* consider a fit to be good enough if we aren't wasting more than half the space */ + if (bestfit.rec_len < 2*length) { + break; + } } - - /* all done - return the new record offset */ - tdb_unlock(tdb, -1, F_WRLCK); - return rec_ptr; } + /* move to the next record */ last_ptr = rec_ptr; rec_ptr = rec->next; } + + if (bestfit.rec_ptr != 0) { + if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) { + goto fail; + } + + newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr); + tdb_unlock(tdb, -1, F_WRLCK); + return newrec_ptr; + } + /* we didn't find enough space. See if we can expand the database and if we can then try again */ if (tdb_expand(tdb, length + sizeof(*rec)) == 0) |