summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2004-05-03 04:24:30 +0000
committerGerald (Jerry) Carter <jerry@samba.org>2007-10-10 12:51:43 -0500
commitcd16ca987675d70526010072dfe8552a1cbf87d9 (patch)
tree34fcd44e1d030383bacafe9b57a3150a9095c314
parentd8bb3d81a6396c4051be00de0808c60839b6945e (diff)
downloadsamba-cd16ca987675d70526010072dfe8552a1cbf87d9.tar.gz
samba-cd16ca987675d70526010072dfe8552a1cbf87d9.tar.bz2
samba-cd16ca987675d70526010072dfe8552a1cbf87d9.zip
r452: move from first-fit to best-fit in tdb record allocation. For a
situation where we are continually increasing the size of a record (such as ldb index records) this reduces the resulting tdb size by a factor of over 100x, due to reductions in fragmentation. It appears to have no noticable effect on the speed in other cases. (This used to be commit b61d7f8bbc0c01d648ce204ffb6ea657e0b04c03)
-rw-r--r--source4/lib/tdb/tdb.c145
1 files changed, 97 insertions, 48 deletions
diff --git a/source4/lib/tdb/tdb.c b/source4/lib/tdb/tdb.c
index 47ba2cb52c..c8ac7babad 100644
--- a/source4/lib/tdb/tdb.c
+++ b/source4/lib/tdb/tdb.c
@@ -530,7 +530,7 @@ static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
return 0;
}
- printf(" rec: offset=%u next=%d rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
+ printf(" rec: offset=0x%08x next=0x%08x rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic);
tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off);
@@ -609,7 +609,8 @@ int tdb_printfreelist(TDB_CONTEXT *tdb)
return -1;
}
- printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)]\n", rec.next, rec.rec_len, rec.rec_len );
+ printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n",
+ rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len);
total_free += rec.rec_len;
/* move to the next record */
@@ -850,6 +851,66 @@ static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size)
return -1;
}
+
+/*
+ the core of tdb_allocate - called when we have decided which
+ free list entry to use
+ */
+static tdb_off tdb_allocate_ofs(TDB_CONTEXT *tdb, tdb_len length, tdb_off rec_ptr,
+ struct list_struct *rec, tdb_off last_ptr)
+{
+ struct list_struct newrec;
+ tdb_off newrec_ptr;
+
+ memset(&newrec, '\0', sizeof(newrec));
+
+ /* found it - now possibly split it up */
+ if (rec->rec_len > length + MIN_REC_SIZE) {
+ /* Length of left piece */
+ length = TDB_ALIGN(length, TDB_ALIGNMENT);
+
+ /* Right piece to go on free list */
+ newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
+ newrec_ptr = rec_ptr + sizeof(*rec) + length;
+
+ /* And left record is shortened */
+ rec->rec_len = length;
+ } else {
+ newrec_ptr = 0;
+ }
+
+ /* Remove allocated record from the free list */
+ if (ofs_write(tdb, last_ptr, &rec->next) == -1) {
+ return 0;
+ }
+
+ /* Update header: do this before we drop alloc
+ lock, otherwise tdb_free() might try to
+ merge with us, thinking we're free.
+ (Thanks Jeremy Allison). */
+ rec->magic = TDB_MAGIC;
+ if (rec_write(tdb, rec_ptr, rec) == -1) {
+ return 0;
+ }
+
+ /* Did we create new block? */
+ if (newrec_ptr) {
+ /* Update allocated record tailer (we
+ shortened it). */
+ if (update_tailer(tdb, rec_ptr, rec) == -1) {
+ return 0;
+ }
+
+ /* Free new record */
+ if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
+ return 0;
+ }
+ }
+
+ /* all done - return the new record offset */
+ return rec_ptr;
+}
+
/* allocate some space from the free list. The offset returned points
to a unconnected list_struct within the database with room for at
least length bytes of total data
@@ -860,9 +921,10 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,
struct list_struct *rec)
{
tdb_off rec_ptr, last_ptr, newrec_ptr;
- struct list_struct newrec;
-
- memset(&newrec, '\0', sizeof(newrec));
+ struct {
+ tdb_off rec_ptr, last_ptr;
+ tdb_len rec_len;
+ } bestfit;
if (tdb_lock(tdb, -1, F_WRLCK) == -1)
return 0;
@@ -877,59 +939,46 @@ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,
if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
goto fail;
- /* keep looking until we find a freelist record big enough */
+ bestfit.rec_ptr = 0;
+
+ /*
+ this is a best fit allocation strategy. Originally we used
+ a first fit strategy, but it suffered from massive fragmentation
+ issues when faced with a slowly increasing record size.
+ */
while (rec_ptr) {
- if (rec_free_read(tdb, rec_ptr, rec) == -1)
+ if (rec_free_read(tdb, rec_ptr, rec) == -1) {
goto fail;
+ }
if (rec->rec_len >= length) {
- /* found it - now possibly split it up */
- if (rec->rec_len > length + MIN_REC_SIZE) {
- /* Length of left piece */
- length = TDB_ALIGN(length, TDB_ALIGNMENT);
-
- /* Right piece to go on free list */
- newrec.rec_len = rec->rec_len
- - (sizeof(*rec) + length);
- newrec_ptr = rec_ptr + sizeof(*rec) + length;
-
- /* And left record is shortened */
- rec->rec_len = length;
- } else
- newrec_ptr = 0;
-
- /* Remove allocated record from the free list */
- if (ofs_write(tdb, last_ptr, &rec->next) == -1)
- goto fail;
-
- /* Update header: do this before we drop alloc
- lock, otherwise tdb_free() might try to
- merge with us, thinking we're free.
- (Thanks Jeremy Allison). */
- rec->magic = TDB_MAGIC;
- if (rec_write(tdb, rec_ptr, rec) == -1)
- goto fail;
-
- /* Did we create new block? */
- if (newrec_ptr) {
- /* Update allocated record tailer (we
- shortened it). */
- if (update_tailer(tdb, rec_ptr, rec) == -1)
- goto fail;
-
- /* Free new record */
- if (tdb_free(tdb, newrec_ptr, &newrec) == -1)
- goto fail;
+ if (bestfit.rec_ptr == 0 ||
+ rec->rec_len < bestfit.rec_len) {
+ bestfit.rec_len = rec->rec_len;
+ bestfit.rec_ptr = rec_ptr;
+ bestfit.last_ptr = last_ptr;
+ /* consider a fit to be good enough if we aren't wasting more than half the space */
+ if (bestfit.rec_len < 2*length) {
+ break;
+ }
}
-
- /* all done - return the new record offset */
- tdb_unlock(tdb, -1, F_WRLCK);
- return rec_ptr;
}
+
/* move to the next record */
last_ptr = rec_ptr;
rec_ptr = rec->next;
}
+
+ if (bestfit.rec_ptr != 0) {
+ if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
+ goto fail;
+ }
+
+ newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
+ tdb_unlock(tdb, -1, F_WRLCK);
+ return newrec_ptr;
+ }
+
/* we didn't find enough space. See if we can expand the
database and if we can then try again */
if (tdb_expand(tdb, length + sizeof(*rec)) == 0)