From a3d61e0485c70ec5215c34b6caf40e2e6c6c5338 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Mon, 24 May 2004 16:27:23 +0000
Subject: r848: convert lib/tdb into the same layout as lib/ldb

metze
(This used to be commit bacab322ce89979f0ad0811cd15b73d81eceb69d)
---
 source4/lib/tdb/common/tdb.c | 2139 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2139 insertions(+)
 create mode 100644 source4/lib/tdb/common/tdb.c

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
new file mode 100644
index 0000000000..c8ac7babad
--- /dev/null
+++ b/source4/lib/tdb/common/tdb.c
@@ -0,0 +1,2139 @@
+ /* 
+   Unix SMB/CIFS implementation.
+
+   trivial database library
+
+   Copyright (C) Andrew Tridgell              1999-2004
+   Copyright (C) Paul `Rusty' Russell		   2000
+   Copyright (C) Jeremy Allison			   2000-2003
+   
+     ** NOTE! The following LGPL license applies to the tdb
+     ** library. This does NOT imply that all of Samba is released
+     ** under the LGPL
+   
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+/* NOTE: If you use tdbs under valgrind, and in particular if you run
+ * tdbtorture, you may get spurious "uninitialized value" warnings.  I
+ * think this is because valgrind doesn't understand that the mmap'd
+ * area may be written to by other processes.  Memory can, from the
+ * point of view of the grinded process, spontaneously become
+ * initialized.
+ *
+ * I can think of a few solutions.  [mbp 20030311]
+ *
+ * 1 - Write suppressions for Valgrind so that it doesn't complain
+ * about this.  Probably the most reasonable but people need to
+ * remember to use them.
+ *
+ * 2 - Use IO not mmap when running under valgrind.  Not so nice.
+ *
+ * 3 - Use the special valgrind macros to mark memory as valid at the
+ * right time.  Probably too hard -- the process just doesn't know.
+ */ 
+
+#ifdef STANDALONE
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include "tdb.h"
+#include "spinlock.h"
+#else
+#include "includes.h"
+#endif
+
+#define TDB_MAGIC_FOOD "TDB file\n"
+#define TDB_VERSION (0x26011967 + 6)
+#define TDB_MAGIC (0x26011999U)
+#define TDB_FREE_MAGIC (~TDB_MAGIC)
+#define TDB_DEAD_MAGIC (0xFEE1DEAD)
+#define TDB_ALIGNMENT 4
+#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)
+#define DEFAULT_HASH_SIZE 131
+#define TDB_PAGE_SIZE 0x2000
+#define FREELIST_TOP (sizeof(struct tdb_header))
+#define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
+#define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
+#define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)
+#define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
+#define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off))
+#define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + TDB_SPINLOCK_SIZE(hash_size))
+
+
+/* NB assumes there is a local variable called "tdb" that is the
+ * current context, also takes doubly-parenthesized print-style
+ * argument. */
+#define TDB_LOG(x) (tdb->log_fn?((tdb->log_fn x),0) : 0)
+
+/* lock offsets */
+#define GLOBAL_LOCK 0
+#define ACTIVE_LOCK 4
+
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+/* free memory if the pointer is valid and zero the pointer */
+#ifndef SAFE_FREE
+#define SAFE_FREE(x) do { if ((x) != NULL) {free((x)); (x)=NULL;} } while(0)
+#endif
+
+#define BUCKET(hash) ((hash) % tdb->header.hash_size)
+TDB_DATA tdb_null;
+
+/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
+static TDB_CONTEXT *tdbs = NULL;
+
+static int tdb_munmap(TDB_CONTEXT *tdb)
+{
+	if (tdb->flags & TDB_INTERNAL)
+		return 0;
+
+#ifdef HAVE_MMAP
+	if (tdb->map_ptr) {
+		int ret = munmap(tdb->map_ptr, tdb->map_size);
+		if (ret != 0)
+			return ret;
+	}
+#endif
+	tdb->map_ptr = NULL;
+	return 0;
+}
+
+static void tdb_mmap(TDB_CONTEXT *tdb)
+{
+	if (tdb->flags & TDB_INTERNAL)
+		return;
+
+#ifdef HAVE_MMAP
+	if (!(tdb->flags & TDB_NOMMAP)) {
+		tdb->map_ptr = mmap(NULL, tdb->map_size, 
+				    PROT_READ|(tdb->read_only? 0:PROT_WRITE), 
+				    MAP_SHARED|MAP_FILE, tdb->fd, 0);
+
+		/*
+		 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
+		 */
+
+		if (tdb->map_ptr == MAP_FAILED) {
+			tdb->map_ptr = NULL;
+			TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n", 
+				 tdb->map_size, strerror(errno)));
+		}
+	} else {
+		tdb->map_ptr = NULL;
+	}
+#else
+	tdb->map_ptr = NULL;
+#endif
+}
+
+/* Endian conversion: we only ever deal with 4 byte quantities */
+static void *convert(void *buf, u32 size)
+{
+	u32 i, *p = buf;
+	for (i = 0; i < size / 4; i++)
+		p[i] = TDB_BYTEREV(p[i]);
+	return buf;
+}
+#define DOCONV() (tdb->flags & TDB_CONVERT)
+#define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x)
+
+/* the body of the database is made of one list_struct for the free space
+   plus a separate data list for each hash value */
+struct list_struct {
+	tdb_off next; /* offset of the next record in the list */
+	tdb_len rec_len; /* total byte length of record */
+	tdb_len key_len; /* byte length of key */
+	tdb_len data_len; /* byte length of data */
+	u32 full_hash; /* the full 32 bit hash of the key */
+	u32 magic;   /* try to catch errors */
+	/* the following union is implied:
+		union {
+			char record[rec_len];
+			struct {
+				char key[key_len];
+				char data[data_len];
+			}
+			u32 totalsize; (tailer)
+		}
+	*/
+};
+
+/***************************************************************
+ Allow a caller to set a "alarm" flag that tdb can check to abort
+ a blocking lock on SIGALRM.
+***************************************************************/
+
+static sig_atomic_t *palarm_fired;
+
+void tdb_set_lock_alarm(sig_atomic_t *palarm)
+{
+	palarm_fired = palarm;
+}
+
+/* a byte range locking function - return 0 on success
+   this functions locks/unlocks 1 byte at the specified offset.
+
+   On error, errno is also set so that errors are passed back properly
+   through tdb_open(). */
+static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, 
+		      int rw_type, int lck_type, int probe)
+{
+	struct flock fl;
+	int ret;
+
+	if (tdb->flags & TDB_NOLOCK)
+		return 0;
+	if ((rw_type == F_WRLCK) && (tdb->read_only)) {
+		errno = EACCES;
+		return -1;
+	}
+
+	fl.l_type = rw_type;
+	fl.l_whence = SEEK_SET;
+	fl.l_start = offset;
+	fl.l_len = 1;
+	fl.l_pid = 0;
+
+	do {
+		ret = fcntl(tdb->fd,lck_type,&fl);
+		if (ret == -1 && errno == EINTR && palarm_fired && *palarm_fired)
+			break;
+	} while (ret == -1 && errno == EINTR);
+
+	if (ret == -1) {
+		if (!probe && lck_type != F_SETLK) {
+			/* Ensure error code is set for log fun to examine. */
+			if (errno == EINTR && palarm_fired && *palarm_fired)
+				tdb->ecode = TDB_ERR_LOCK_TIMEOUT;
+			else
+				tdb->ecode = TDB_ERR_LOCK;
+			TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n", 
+				 tdb->fd, offset, rw_type, lck_type));
+		}
+		/* Was it an alarm timeout ? */
+		if (errno == EINTR && palarm_fired && *palarm_fired) {
+			TDB_LOG((tdb, 5, "tdb_brlock timed out (fd=%d) at offset %d rw_type=%d lck_type=%d\n", 
+				 tdb->fd, offset, rw_type, lck_type));
+			return TDB_ERRCODE(TDB_ERR_LOCK_TIMEOUT, -1);
+		}
+		/* Otherwise - generic lock error. errno set by fcntl.
+		 * EAGAIN is an expected return from non-blocking
+		 * locks. */
+		if (errno != EAGAIN) {
+		TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n", 
+				 tdb->fd, offset, rw_type, lck_type, 
+				 strerror(errno)));
+		}
+		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
+	}
+	return 0;
+}
+
+/* lock a list in the database. list -1 is the alloc list */
+static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype)
+{
+	if (list < -1 || list >= (int)tdb->header.hash_size) {
+		TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n", 
+			   list, ltype));
+		return -1;
+	}
+	if (tdb->flags & TDB_NOLOCK)
+		return 0;
+
+	/* Since fcntl locks don't nest, we do a lock for the first one,
+	   and simply bump the count for future ones */
+	if (tdb->locked[list+1].count == 0) {
+		if (!tdb->read_only && tdb->header.rwlocks) {
+			if (tdb_spinlock(tdb, list, ltype)) {
+				TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list ltype=%d\n", 
+					   list, ltype));
+				return -1;
+			}
+		} else if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) {
+			TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n", 
+					   list, ltype, strerror(errno)));
+			return -1;
+		}
+		tdb->locked[list+1].ltype = ltype;
+	}
+	tdb->locked[list+1].count++;
+	return 0;
+}
+
+/* unlock the database: returns void because it's too late for errors. */
+	/* changed to return int it may be interesting to know there
+	   has been an error  --simo */
+static int tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype)
+{
+	int ret = -1;
+
+	if (tdb->flags & TDB_NOLOCK)
+		return 0;
+
+	/* Sanity checks */
+	if (list < -1 || list >= (int)tdb->header.hash_size) {
+		TDB_LOG((tdb, 0, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size));
+		return ret;
+	}
+
+	if (tdb->locked[list+1].count==0) {
+		TDB_LOG((tdb, 0, "tdb_unlock: count is 0\n"));
+		return ret;
+	}
+
+	if (tdb->locked[list+1].count == 1) {
+		/* Down to last nested lock: unlock underneath */
+		if (!tdb->read_only && tdb->header.rwlocks) {
+			ret = tdb_spinunlock(tdb, list, ltype);
+		} else {
+			ret = tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0);
+		}
+	} else {
+		ret = 0;
+	}
+	tdb->locked[list+1].count--;
+
+	if (ret)
+		TDB_LOG((tdb, 0,"tdb_unlock: An error occurred unlocking!\n")); 
+	return ret;
+}
+
+/* This is based on the hash algorithm from gdbm */
+static u32 tdb_hash(TDB_DATA *key)
+{
+	u32 value;	/* Used to compute the hash value.  */
+	u32   i;	/* Used to cycle through random values. */
+
+	/* Set the initial value from the key size. */
+	for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
+		value = (value + (key->dptr[i] << (i*5 % 24)));
+
+	return (1103515243 * value + 12345);  
+}
+
+/* check for an out of bounds access - if it is out of bounds then
+   see if the database has been expanded by someone else and expand
+   if necessary 
+   note that "len" is the minimum length needed for the db
+*/
+static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe)
+{
+	struct stat st;
+	if (len <= tdb->map_size)
+		return 0;
+	if (tdb->flags & TDB_INTERNAL) {
+		if (!probe) {
+			/* Ensure ecode is set for log fn. */
+			tdb->ecode = TDB_ERR_IO;
+			TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
+				 (int)len, (int)tdb->map_size));
+		}
+		return TDB_ERRCODE(TDB_ERR_IO, -1);
+	}
+
+	if (fstat(tdb->fd, &st) == -1)
+		return TDB_ERRCODE(TDB_ERR_IO, -1);
+
+	if (st.st_size < (size_t)len) {
+		if (!probe) {
+			/* Ensure ecode is set for log fn. */
+			tdb->ecode = TDB_ERR_IO;
+			TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
+				 (int)len, (int)st.st_size));
+		}
+		return TDB_ERRCODE(TDB_ERR_IO, -1);
+	}
+
+	/* Unmap, update size, remap */
+	if (tdb_munmap(tdb) == -1)
+		return TDB_ERRCODE(TDB_ERR_IO, -1);
+	tdb->map_size = st.st_size;
+	tdb_mmap(tdb);
+	return 0;
+}
+
+/* write a lump of data at a specified offset */
+static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len)
+{
+	if (tdb_oob(tdb, off + len, 0) != 0)
+		return -1;
+
+	if (tdb->map_ptr)
+		memcpy(off + (char *)tdb->map_ptr, buf, len);
+#ifdef HAVE_PWRITE
+	else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
+#else
+	else if (lseek(tdb->fd, off, SEEK_SET) != off
+		 || write(tdb->fd, buf, len) != (ssize_t)len) {
+#endif
+		/* Ensure ecode is set for log fn. */
+		tdb->ecode = TDB_ERR_IO;
+		TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
+			   off, len, strerror(errno)));
+		return TDB_ERRCODE(TDB_ERR_IO, -1);
+	}
+	return 0;
+}
+
+/* read a lump of data at a specified offset, maybe convert */
+static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv)
+{
+	if (tdb_oob(tdb, off + len, 0) != 0)
+		return -1;
+
+	if (tdb->map_ptr)
+		memcpy(buf, off + (char *)tdb->map_ptr, len);
+#ifdef HAVE_PREAD
+	else if (pread(tdb->fd, buf, len, off) != (ssize_t)len) {
+#else
+	else if (lseek(tdb->fd, off, SEEK_SET) != off
+		 || read(tdb->fd, buf, len) != (ssize_t)len) {
+#endif
+		/* Ensure ecode is set for log fn. */
+		tdb->ecode = TDB_ERR_IO;
+		TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n",
+			   off, len, strerror(errno)));
+		return TDB_ERRCODE(TDB_ERR_IO, -1);
+	}
+	if (cv)
+		convert(buf, len);
+	return 0;
+}
+
+/* read a lump of data, allocating the space for it */
+static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
+{
+	char *buf;
+
+	if (!(buf = malloc(len))) {
+		/* Ensure ecode is set for log fn. */
+		tdb->ecode = TDB_ERR_OOM;
+		TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
+			   len, strerror(errno)));
+		return TDB_ERRCODE(TDB_ERR_OOM, buf);
+	}
+	if (tdb_read(tdb, offset, buf, len, 0) == -1) {
+		SAFE_FREE(buf);
+		return NULL;
+	}
+	return buf;
+}
+
+/* read/write a tdb_off */
+static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
+{
+	return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
+}
+static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
+{
+	tdb_off off = *d;
+	return tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
+}
+
+/* read/write a record */
+static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
+{
+	if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
+		return -1;
+	if (TDB_BAD_MAGIC(rec)) {
+		/* Ensure ecode is set for log fn. */
+		tdb->ecode = TDB_ERR_CORRUPT;
+		TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
+		return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
+	}
+	return tdb_oob(tdb, rec->next+sizeof(*rec), 0);
+}
+static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
+{
+	struct list_struct r = *rec;
+	return tdb_write(tdb, offset, CONVERT(r), sizeof(r));
+}
+
+/* read a freelist record and check for simple errors */
+static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec)
+{
+	if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
+		return -1;
+
+	if (rec->magic == TDB_MAGIC) {
+		/* this happens when a app is showdown while deleting a record - we should
+		   not completely fail when this happens */
+		TDB_LOG((tdb, 0,"rec_free_read non-free magic at offset=%d - fixing\n", 
+			 rec->magic, off));
+		rec->magic = TDB_FREE_MAGIC;
+		if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
+			return -1;
+	}
+
+	if (rec->magic != TDB_FREE_MAGIC) {
+		/* Ensure ecode is set for log fn. */
+		tdb->ecode = TDB_ERR_CORRUPT;
+		TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n", 
+			   rec->magic, off));
+		return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
+	}
+	if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
+		return -1;
+	return 0;
+}
+
+/* update a record tailer (must hold allocation lock) */
+static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset,
+			 const struct list_struct *rec)
+{
+	tdb_off totalsize;
+
+	/* Offset of tailer from record header */
+	totalsize = sizeof(*rec) + rec->rec_len;
+	return ofs_write(tdb, offset + totalsize - sizeof(tdb_off),
+			 &totalsize);
+}
+
+static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
+{
+	struct list_struct rec;
+	tdb_off tailer_ofs, tailer;
+
+	if (tdb_read(tdb, offset, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
+		printf("ERROR: failed to read record at %u\n", offset);
+		return 0;
+	}
+
+	printf(" rec: offset=0x%08x next=0x%08x rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
+	       offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic);
+
+	tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off);
+	if (ofs_read(tdb, tailer_ofs, &tailer) == -1) {
+		printf("ERROR: failed to read tailer at %u\n", tailer_ofs);
+		return rec.next;
+	}
+
+	if (tailer != rec.rec_len + sizeof(rec)) {
+		printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n",
+				(unsigned)tailer, (unsigned)(rec.rec_len + sizeof(rec)));
+	}
+	return rec.next;
+}
+
+static int tdb_dump_chain(TDB_CONTEXT *tdb, int i)
+{
+	tdb_off rec_ptr, top;
+
+	top = TDB_HASH_TOP(i);
+
+	if (tdb_lock(tdb, i, F_WRLCK) != 0)
+		return -1;
+
+	if (ofs_read(tdb, top, &rec_ptr) == -1)
+		return tdb_unlock(tdb, i, F_WRLCK);
+
+	if (rec_ptr)
+		printf("hash=%d\n", i);
+
+	while (rec_ptr) {
+		rec_ptr = tdb_dump_record(tdb, rec_ptr);
+	}
+
+	return tdb_unlock(tdb, i, F_WRLCK);
+}
+
+void tdb_dump_all(TDB_CONTEXT *tdb)
+{
+	int i;
+	for (i=0;i<tdb->header.hash_size;i++) {
+		tdb_dump_chain(tdb, i);
+	}
+	printf("freelist:\n");
+	tdb_dump_chain(tdb, -1);
+}
+
+int tdb_printfreelist(TDB_CONTEXT *tdb)
+{
+	int ret;
+	long total_free = 0;
+	tdb_off offset, rec_ptr;
+	struct list_struct rec;
+
+	if ((ret = tdb_lock(tdb, -1, F_WRLCK)) != 0)
+		return ret;
+
+	offset = FREELIST_TOP;
+
+	/* read in the freelist top */
+	if (ofs_read(tdb, offset, &rec_ptr) == -1) {
+		tdb_unlock(tdb, -1, F_WRLCK);
+		return 0;
+	}
+
+	printf("freelist top=[0x%08x]\n", rec_ptr );
+	while (rec_ptr) {
+		if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
+			tdb_unlock(tdb, -1, F_WRLCK);
+			return -1;
+		}
+
+		if (rec.magic != TDB_FREE_MAGIC) {
+			printf("bad magic 0x%08x in free list\n", rec.magic);
+			tdb_unlock(tdb, -1, F_WRLCK);
+			return -1;
+		}
+
+		printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n", 
+		       rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len);
+		total_free += rec.rec_len;
+
+		/* move to the next record */
+		rec_ptr = rec.next;
+	}
+	printf("total rec_len = [0x%08x (%d)]\n", (int)total_free, 
+               (int)total_free);
+
+	return tdb_unlock(tdb, -1, F_WRLCK);
+}
+
+/* Remove an element from the freelist.  Must have alloc lock. */
+static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next)
+{
+	tdb_off last_ptr, i;
+
+	/* read in the freelist top */
+	last_ptr = FREELIST_TOP;
+	while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
+		if (i == off) {
+			/* We've found it! */
+			return ofs_write(tdb, last_ptr, &next);
+		}
+		/* Follow chain (next offset is at start of record) */
+		last_ptr = i;
+	}
+	TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off));
+	return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
+}
+
+/* Add an element into the freelist. Merge adjacent records if
+   neccessary. */
+static int tdb_free(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
+{
+	tdb_off right, left;
+
+	/* Allocation and tailer lock */
+	if (tdb_lock(tdb, -1, F_WRLCK) != 0)
+		return -1;
+
+	/* set an initial tailer, so if we fail we don't leave a bogus record */
+	if (update_tailer(tdb, offset, rec) != 0) {
+		TDB_LOG((tdb, 0, "tdb_free: upfate_tailer failed!\n"));
+		goto fail;
+	}
+
+	/* Look right first (I'm an Australian, dammit) */
+	right = offset + sizeof(*rec) + rec->rec_len;
+	if (right + sizeof(*rec) <= tdb->map_size) {
+		struct list_struct r;
+
+		if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
+			TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right));
+			goto left;
+		}
+
+		/* If it's free, expand to include it. */
+		if (r.magic == TDB_FREE_MAGIC) {
+			if (remove_from_freelist(tdb, right, r.next) == -1) {
+				TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right));
+				goto left;
+			}
+			rec->rec_len += sizeof(r) + r.rec_len;
+		}
+	}
+
+left:
+	/* Look left */
+	left = offset - sizeof(tdb_off);
+	if (left > TDB_DATA_START(tdb->header.hash_size)) {
+		struct list_struct l;
+		tdb_off leftsize;
+		
+		/* Read in tailer and jump back to header */
+		if (ofs_read(tdb, left, &leftsize) == -1) {
+			TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left));
+			goto update;
+		}
+		left = offset - leftsize;
+
+		/* Now read in record */
+		if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
+			TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
+			goto update;
+		}
+
+		/* If it's free, expand to include it. */
+		if (l.magic == TDB_FREE_MAGIC) {
+			if (remove_from_freelist(tdb, left, l.next) == -1) {
+				TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left));
+				goto update;
+			} else {
+				offset = left;
+				rec->rec_len += leftsize;
+			}
+		}
+	}
+
+update:
+	if (update_tailer(tdb, offset, rec) == -1) {
+		TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset));
+		goto fail;
+	}
+
+	/* Now, prepend to free list */
+	rec->magic = TDB_FREE_MAGIC;
+
+	if (ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
+	    rec_write(tdb, offset, rec) == -1 ||
+	    ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
+		TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset));
+		goto fail;
+	}
+
+	/* And we're done. */
+	tdb_unlock(tdb, -1, F_WRLCK);
+	return 0;
+
+ fail:
+	tdb_unlock(tdb, -1, F_WRLCK);
+	return -1;
+}
+
+
+/* expand a file.  we prefer to use ftruncate, as that is what posix
+  says to use for mmap expansion */
+static int expand_file(TDB_CONTEXT *tdb, tdb_off size, tdb_off addition)
+{
+	char buf[1024];
+#if HAVE_FTRUNCATE_EXTEND
+	if (ftruncate(tdb->fd, size+addition) != 0) {
+		TDB_LOG((tdb, 0, "expand_file ftruncate to %d failed (%s)\n", 
+			   size+addition, strerror(errno)));
+		return -1;
+	}
+#else
+	char b = 0;
+
+#ifdef HAVE_PWRITE
+	if (pwrite(tdb->fd,  &b, 1, (size+addition) - 1) != 1) {
+#else
+	if (lseek(tdb->fd, (size+addition) - 1, SEEK_SET) != (size+addition) - 1 || 
+	    write(tdb->fd, &b, 1) != 1) {
+#endif
+		TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n", 
+			   size+addition, strerror(errno)));
+		return -1;
+	}
+#endif
+
+	/* now fill the file with something. This ensures that the file isn't sparse, which would be
+	   very bad if we ran out of disk. This must be done with write, not via mmap */
+	memset(buf, 0x42, sizeof(buf));
+	while (addition) {
+		int n = addition>sizeof(buf)?sizeof(buf):addition;
+#ifdef HAVE_PWRITE
+		int ret = pwrite(tdb->fd, buf, n, size);
+#else
+		int ret;
+		if (lseek(tdb->fd, size, SEEK_SET) != size)
+			return -1;
+		ret = write(tdb->fd, buf, n);
+#endif
+		if (ret != n) {
+			TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n", 
+				   n, strerror(errno)));
+			return -1;
+		}
+		addition -= n;
+		size += n;
+	}
+	return 0;
+}
+
+
+/* expand the database at least size bytes by expanding the underlying
+   file and doing the mmap again if necessary */
+static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size)
+{
+	struct list_struct rec;
+	tdb_off offset;
+
+	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
+		TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));
+		return -1;
+	}
+
+	/* must know about any previous expansions by another process */
+	tdb_oob(tdb, tdb->map_size + 1, 1);
+
+	/* always make room for at least 10 more records, and round
+           the database up to a multiple of TDB_PAGE_SIZE */
+	size = TDB_ALIGN(tdb->map_size + size*10, TDB_PAGE_SIZE) - tdb->map_size;
+
+	if (!(tdb->flags & TDB_INTERNAL))
+		tdb_munmap(tdb);
+
+	/*
+	 * We must ensure the file is unmapped before doing this
+	 * to ensure consistency with systems like OpenBSD where
+	 * writes and mmaps are not consistent.
+	 */
+
+	/* expand the file itself */
+	if (!(tdb->flags & TDB_INTERNAL)) {
+		if (expand_file(tdb, tdb->map_size, size) != 0)
+			goto fail;
+	}
+
+	tdb->map_size += size;
+
+	if (tdb->flags & TDB_INTERNAL)
+		tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size);
+	else {
+		/*
+		 * We must ensure the file is remapped before adding the space
+		 * to ensure consistency with systems like OpenBSD where
+		 * writes and mmaps are not consistent.
+		 */
+
+		/* We're ok if the mmap fails as we'll fallback to read/write */
+		tdb_mmap(tdb);
+	}
+
+	/* form a new freelist record */
+	memset(&rec,'\0',sizeof(rec));
+	rec.rec_len = size - sizeof(rec);
+
+	/* link it into the free list */
+	offset = tdb->map_size - size;
+	if (tdb_free(tdb, offset, &rec) == -1)
+		goto fail;
+
+	tdb_unlock(tdb, -1, F_WRLCK);
+	return 0;
+ fail:
+	tdb_unlock(tdb, -1, F_WRLCK);
+	return -1;
+}
+
+
+/* 
+   the core of tdb_allocate - called when we have decided which
+   free list entry to use
+ */
+static tdb_off tdb_allocate_ofs(TDB_CONTEXT *tdb, tdb_len length, tdb_off rec_ptr,
+				struct list_struct *rec, tdb_off last_ptr)
+{
+	struct list_struct newrec;
+	tdb_off newrec_ptr;
+
+	memset(&newrec, '\0', sizeof(newrec));
+
+	/* found it - now possibly split it up  */
+	if (rec->rec_len > length + MIN_REC_SIZE) {
+		/* Length of left piece */
+		length = TDB_ALIGN(length, TDB_ALIGNMENT);
+		
+		/* Right piece to go on free list */
+		newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
+		newrec_ptr = rec_ptr + sizeof(*rec) + length;
+		
+		/* And left record is shortened */
+		rec->rec_len = length;
+	} else {
+		newrec_ptr = 0;
+	}
+	
+	/* Remove allocated record from the free list */
+	if (ofs_write(tdb, last_ptr, &rec->next) == -1) {
+		return 0;
+	}
+	
+	/* Update header: do this before we drop alloc
+	   lock, otherwise tdb_free() might try to
+	   merge with us, thinking we're free.
+	   (Thanks Jeremy Allison). */
+	rec->magic = TDB_MAGIC;
+	if (rec_write(tdb, rec_ptr, rec) == -1) {
+		return 0;
+	}
+	
+	/* Did we create new block? */
+	if (newrec_ptr) {
+		/* Update allocated record tailer (we
+		   shortened it). */
+		if (update_tailer(tdb, rec_ptr, rec) == -1) {
+			return 0;
+		}
+		
+		/* Free new record */
+		if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
+			return 0;
+		}
+	}
+	
+	/* all done - return the new record offset */
+	return rec_ptr;
+}
+
+/* allocate some space from the free list. The offset returned points
+   to a unconnected list_struct within the database with room for at
+   least length bytes of total data
+
+   0 is returned if the space could not be allocated
+ */
+static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,
+			    struct list_struct *rec)
+{
+	tdb_off rec_ptr, last_ptr, newrec_ptr;
+	struct {
+		tdb_off rec_ptr, last_ptr;
+		tdb_len rec_len;
+	} bestfit;
+
+	if (tdb_lock(tdb, -1, F_WRLCK) == -1)
+		return 0;
+
+	/* Extra bytes required for tailer */
+	length += sizeof(tdb_off);
+
+ again:
+	last_ptr = FREELIST_TOP;
+
+	/* read in the freelist top */
+	if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
+		goto fail;
+
+	bestfit.rec_ptr = 0;
+
+	/* 
+	   this is a best fit allocation strategy. Originally we used
+	   a first fit strategy, but it suffered from massive fragmentation
+	   issues when faced with a slowly increasing record size.
+	 */
+	while (rec_ptr) {
+		if (rec_free_read(tdb, rec_ptr, rec) == -1) {
+			goto fail;
+		}
+
+		if (rec->rec_len >= length) {
+			if (bestfit.rec_ptr == 0 ||
+			    rec->rec_len < bestfit.rec_len) {
+				bestfit.rec_len = rec->rec_len;
+				bestfit.rec_ptr = rec_ptr;
+				bestfit.last_ptr = last_ptr;
+				/* consider a fit to be good enough if we aren't wasting more than half the space */
+				if (bestfit.rec_len < 2*length) {
+					break;
+				}
+			}
+		}
+
+		/* move to the next record */
+		last_ptr = rec_ptr;
+		rec_ptr = rec->next;
+	}
+
+	if (bestfit.rec_ptr != 0) {
+		if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
+			goto fail;
+		}
+
+		newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
+		tdb_unlock(tdb, -1, F_WRLCK);
+		return newrec_ptr;
+	}
+
+	/* we didn't find enough space. See if we can expand the
+	   database and if we can then try again */
+	if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
+		goto again;
+ fail:
+	tdb_unlock(tdb, -1, F_WRLCK);
+	return 0;
+}
+
+/* initialise a new database with a specified hash size */
+static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
+{
+	struct tdb_header *newdb;
+	int size, ret = -1;
+
+	/* We make it up in memory, then write it out if not internal */
+	size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off);
+	if (!(newdb = calloc(size, 1)))
+		return TDB_ERRCODE(TDB_ERR_OOM, -1);
+
+	/* Fill in the header */
+	newdb->version = TDB_VERSION;
+	newdb->hash_size = hash_size;
+#ifdef USE_SPINLOCKS
+	newdb->rwlocks = size;
+#endif
+	if (tdb->flags & TDB_INTERNAL) {
+		tdb->map_size = size;
+		tdb->map_ptr = (char *)newdb;
+		memcpy(&tdb->header, newdb, sizeof(tdb->header));
+		/* Convert the `ondisk' version if asked. */
+		CONVERT(*newdb);
+		return 0;
+	}
+	if (lseek(tdb->fd, 0, SEEK_SET) == -1)
+		goto fail;
+
+	if (ftruncate(tdb->fd, 0) == -1)
+		goto fail;
+
+	/* This creates an endian-converted header, as if read from disk */
+	CONVERT(*newdb);
+	memcpy(&tdb->header, newdb, sizeof(tdb->header));
+	/* Don't endian-convert the magic food! */
+	memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
+	if (write(tdb->fd, newdb, size) != size)
+		ret = -1;
+	else
+		ret = tdb_create_rwlocks(tdb->fd, hash_size);
+
+  fail:
+	SAFE_FREE(newdb);
+	return ret;
+}
+
+/* Returns 0 on fail.  On success, return offset of record, and fills
+   in rec */
+static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash,
+			struct list_struct *r)
+{
+	tdb_off rec_ptr;
+	
+	/* read in the hash top */
+	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
+		return 0;
+
+	/* keep looking until we find the right record */
+	while (rec_ptr) {
+		if (rec_read(tdb, rec_ptr, r) == -1)
+			return 0;
+
+		if (!TDB_DEAD(r) && hash==r->full_hash && key.dsize==r->key_len) {
+			char *k;
+			/* a very likely hit - read the key */
+			k = tdb_alloc_read(tdb, rec_ptr + sizeof(*r), 
+					   r->key_len);
+			if (!k)
+				return 0;
+
+			if (memcmp(key.dptr, k, key.dsize) == 0) {
+				SAFE_FREE(k);
+				return rec_ptr;
+			}
+			SAFE_FREE(k);
+		}
+		rec_ptr = r->next;
+	}
+	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
+}
+
+/* If they do lockkeys, check that this hash is one they locked */
+static int tdb_keylocked(TDB_CONTEXT *tdb, u32 hash)
+{
+	u32 i;
+	if (!tdb->lockedkeys)
+		return 1;
+	for (i = 0; i < tdb->lockedkeys[0]; i++)
+		if (tdb->lockedkeys[i+1] == hash)
+			return 1;
+	return TDB_ERRCODE(TDB_ERR_NOLOCK, 0);
+}
+
+/* As tdb_find, but if you succeed, keep the lock */
+static tdb_off tdb_find_lock_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, int locktype,
+			     struct list_struct *rec)
+{
+	u32 rec_ptr;
+
+	if (!tdb_keylocked(tdb, hash))
+		return 0;
+	if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
+		return 0;
+	if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
+		tdb_unlock(tdb, BUCKET(hash), locktype);
+	return rec_ptr;
+}
+
+enum TDB_ERROR tdb_error(TDB_CONTEXT *tdb)
+{
+	return tdb->ecode;
+}
+
+static struct tdb_errname {
+	enum TDB_ERROR ecode; const char *estring;
+} emap[] = { {TDB_SUCCESS, "Success"},
+	     {TDB_ERR_CORRUPT, "Corrupt database"},
+	     {TDB_ERR_IO, "IO Error"},
+	     {TDB_ERR_LOCK, "Locking error"},
+	     {TDB_ERR_OOM, "Out of memory"},
+	     {TDB_ERR_EXISTS, "Record exists"},
+	     {TDB_ERR_NOLOCK, "Lock exists on other keys"},
+	     {TDB_ERR_NOEXIST, "Record does not exist"} };
+
+/* Error string for the last tdb error */
+const char *tdb_errorstr(TDB_CONTEXT *tdb)
+{
+	u32 i;
+	for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++)
+		if (tdb->ecode == emap[i].ecode)
+			return emap[i].estring;
+	return "Invalid error code";
+}
+
+/* update an entry in place - this only works if the new data size
+   is <= the old data size and the key exists.
+   on failure return -1.
+*/
+
+static int tdb_update_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
+{
+	struct list_struct rec;
+	tdb_off rec_ptr;
+
+	/* find entry */
+	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
+		return -1;
+
+	/* must be long enough key, data and tailer */
+	if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off)) {
+		tdb->ecode = TDB_SUCCESS; /* Not really an error */
+		return -1;
+	}
+
+	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
+		      dbuf.dptr, dbuf.dsize) == -1)
+		return -1;
+
+	if (dbuf.dsize != rec.data_len) {
+		/* update size */
+		rec.data_len = dbuf.dsize;
+		return rec_write(tdb, rec_ptr, &rec);
+	}
+ 
+	return 0;
+}
+
+/* find an entry in the database given a key */
+/* If an entry doesn't exist tdb_err will be set to
+ * TDB_ERR_NOEXIST. If a key has no data attached
+ * tdb_err will not be set. Both will return a
+ * zero pptr and zero dsize.
+ */
+
+TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	tdb_off rec_ptr;
+	struct list_struct rec;
+	TDB_DATA ret;
+	u32 hash;
+
+	/* find which hash bucket it is in */
+	hash = tdb_hash(&key);
+	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
+		return tdb_null;
+
+	if (rec.data_len)
+		ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
+					  rec.data_len);
+	else
+		ret.dptr = NULL;
+	ret.dsize = rec.data_len;
+	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
+	return ret;
+}
+
+/* check if an entry in the database exists 
+
+   note that 1 is returned if the key is found and 0 is returned if not found
+   this doesn't match the conventions in the rest of this module, but is
+   compatible with gdbm
+*/
+static int tdb_exists_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash)
+{
+	struct list_struct rec;
+	
+	if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
+		return 0;
+	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
+	return 1;
+}
+
+int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	u32 hash = tdb_hash(&key);
+	return tdb_exists_hash(tdb, key, hash);
+}
+
+/* record lock stops delete underneath */
+static int lock_record(TDB_CONTEXT *tdb, tdb_off off)
+{
+	return off ? tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0) : 0;
+}
+/*
+  Write locks override our own fcntl readlocks, so check it here.
+  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
+  an error to fail to get the lock here.
+*/
+ 
+static int write_lock_record(TDB_CONTEXT *tdb, tdb_off off)
+{
+	struct tdb_traverse_lock *i;
+	for (i = &tdb->travlocks; i; i = i->next)
+		if (i->off == off)
+			return -1;
+	return tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1);
+}
+
+/*
+  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
+  an error to fail to get the lock here.
+*/
+
+static int write_unlock_record(TDB_CONTEXT *tdb, tdb_off off)
+{
+	return tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0);
+}
+/* fcntl locks don't stack: avoid unlocking someone else's */
+static int unlock_record(TDB_CONTEXT *tdb, tdb_off off)
+{
+	struct tdb_traverse_lock *i;
+	u32 count = 0;
+
+	if (off == 0)
+		return 0;
+	for (i = &tdb->travlocks; i; i = i->next)
+		if (i->off == off)
+			count++;
+	return (count == 1 ? tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0) : 0);
+}
+
+/* actually delete an entry in the database given the offset */
+static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec)
+{
+	tdb_off last_ptr, i;
+	struct list_struct lastrec;
+
+	if (tdb->read_only) return -1;
+
+	if (write_lock_record(tdb, rec_ptr) == -1) {
+		/* Someone traversing here: mark it as dead */
+		rec->magic = TDB_DEAD_MAGIC;
+		return rec_write(tdb, rec_ptr, rec);
+	}
+	if (write_unlock_record(tdb, rec_ptr) != 0)
+		return -1;
+
+	/* find previous record in hash chain */
+	if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
+		return -1;
+	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
+		if (rec_read(tdb, i, &lastrec) == -1)
+			return -1;
+
+	/* unlink it: next ptr is at start of record. */
+	if (last_ptr == 0)
+		last_ptr = TDB_HASH_TOP(rec->full_hash);
+	if (ofs_write(tdb, last_ptr, &rec->next) == -1)
+		return -1;
+
+	/* recover the space */
+	if (tdb_free(tdb, rec_ptr, rec) == -1)
+		return -1;
+	return 0;
+}
+
+/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */
+static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
+			 struct list_struct *rec)
+{
+	int want_next = (tlock->off != 0);
+
+	/* No traversal allows if you've called tdb_lockkeys() */
+	if (tdb->lockedkeys)
+		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
+
+	/* Lock each chain from the start one. */
+	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
+		if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)
+			return -1;
+
+		/* No previous record?  Start at top of chain. */
+		if (!tlock->off) {
+			if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash),
+				     &tlock->off) == -1)
+				goto fail;
+		} else {
+			/* Otherwise unlock the previous record. */
+			if (unlock_record(tdb, tlock->off) != 0)
+				goto fail;
+		}
+
+		if (want_next) {
+			/* We have offset of old record: grab next */
+			if (rec_read(tdb, tlock->off, rec) == -1)
+				goto fail;
+			tlock->off = rec->next;
+		}
+
+		/* Iterate through chain */
+		while( tlock->off) {
+			tdb_off current;
+			if (rec_read(tdb, tlock->off, rec) == -1)
+				goto fail;
+			if (!TDB_DEAD(rec)) {
+				/* Woohoo: we found one! */
+				if (lock_record(tdb, tlock->off) != 0)
+					goto fail;
+				return tlock->off;
+			}
+			/* Try to clean dead ones from old traverses */
+			current = tlock->off;
+			tlock->off = rec->next;
+			if (!tdb->read_only && 
+			    do_delete(tdb, current, rec) != 0)
+				goto fail;
+		}
+		tdb_unlock(tdb, tlock->hash, F_WRLCK);
+		want_next = 0;
+	}
+	/* We finished iteration without finding anything */
+	return TDB_ERRCODE(TDB_SUCCESS, 0);
+
+ fail:
+	tlock->off = 0;
+	if (tdb_unlock(tdb, tlock->hash, F_WRLCK) != 0)
+		TDB_LOG((tdb, 0, "tdb_next_lock: On error unlock failed!\n"));
+	return -1;
+}
+
+/* traverse the entire database - calling fn(tdb, key, data) on each element.
+   return -1 on error or the record count traversed
+   if fn is NULL then it is not called
+   a non-zero return value from fn() indicates that the traversal should stop
+  */
+int tdb_traverse(TDB_CONTEXT *tdb, tdb_traverse_func fn, void *private)
+{
+	TDB_DATA key, dbuf;
+	struct list_struct rec;
+	struct tdb_traverse_lock tl = { NULL, 0, 0 };
+	int ret, count = 0;
+
+	/* This was in the initializaton, above, but the IRIX compiler
+	 * did not like it.  crh
+	 */
+	tl.next = tdb->travlocks.next;
+
+	/* fcntl locks don't stack: beware traverse inside traverse */
+	tdb->travlocks.next = &tl;
+
+	/* tdb_next_lock places locks on the record returned, and its chain */
+	while ((ret = tdb_next_lock(tdb, &tl, &rec)) > 0) {
+		count++;
+		/* now read the full record */
+		key.dptr = tdb_alloc_read(tdb, tl.off + sizeof(rec), 
+					  rec.key_len + rec.data_len);
+		if (!key.dptr) {
+			ret = -1;
+			if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0)
+				goto out;
+			if (unlock_record(tdb, tl.off) != 0)
+				TDB_LOG((tdb, 0, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
+			goto out;
+		}
+		key.dsize = rec.key_len;
+		dbuf.dptr = key.dptr + rec.key_len;
+		dbuf.dsize = rec.data_len;
+
+		/* Drop chain lock, call out */
+		if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) {
+			ret = -1;
+			goto out;
+		}
+		if (fn && fn(tdb, key, dbuf, private)) {
+			/* They want us to terminate traversal */
+			ret = count;
+			if (unlock_record(tdb, tl.off) != 0) {
+				TDB_LOG((tdb, 0, "tdb_traverse: unlock_record failed!\n"));;
+				ret = -1;
+			}
+			tdb->travlocks.next = tl.next;
+			SAFE_FREE(key.dptr);
+			return count;
+		}
+		SAFE_FREE(key.dptr);
+	}
+out:
+	tdb->travlocks.next = tl.next;
+	if (ret < 0)
+		return -1;
+	else
+		return count;
+}
+
+/* find the first entry in the database and return its key */
+TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb)
+{
+	TDB_DATA key;
+	struct list_struct rec;
+
+	/* release any old lock */
+	if (unlock_record(tdb, tdb->travlocks.off) != 0)
+		return tdb_null;
+	tdb->travlocks.off = tdb->travlocks.hash = 0;
+
+	if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0)
+		return tdb_null;
+	/* now read the key */
+	key.dsize = rec.key_len;
+	key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
+	if (tdb_unlock(tdb, BUCKET(tdb->travlocks.hash), F_WRLCK) != 0)
+		TDB_LOG((tdb, 0, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
+	return key;
+}
+
+/* find the next entry in the database, returning its key */
+TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA oldkey)
+{
+	u32 oldhash;
+	TDB_DATA key = tdb_null;
+	struct list_struct rec;
+	char *k = NULL;
+
+	/* Is locked key the old key?  If so, traverse will be reliable. */
+	if (tdb->travlocks.off) {
+		if (tdb_lock(tdb,tdb->travlocks.hash,F_WRLCK))
+			return tdb_null;
+		if (rec_read(tdb, tdb->travlocks.off, &rec) == -1
+		    || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
+					    rec.key_len))
+		    || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
+			/* No, it wasn't: unlock it and start from scratch */
+			if (unlock_record(tdb, tdb->travlocks.off) != 0)
+				return tdb_null;
+			if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0)
+				return tdb_null;
+			tdb->travlocks.off = 0;
+		}
+
+		SAFE_FREE(k);
+	}
+
+	if (!tdb->travlocks.off) {
+		/* No previous element: do normal find, and lock record */
+		tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb_hash(&oldkey), F_WRLCK, &rec);
+		if (!tdb->travlocks.off)
+			return tdb_null;
+		tdb->travlocks.hash = BUCKET(rec.full_hash);
+		if (lock_record(tdb, tdb->travlocks.off) != 0) {
+			TDB_LOG((tdb, 0, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
+			return tdb_null;
+		}
+	}
+	oldhash = tdb->travlocks.hash;
+
+	/* Grab next record: locks chain and returned record,
+	   unlocks old record */
+	if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) {
+		key.dsize = rec.key_len;
+		key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
+					  key.dsize);
+		/* Unlock the chain of this new record */
+		if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0)
+			TDB_LOG((tdb, 0, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
+	}
+	/* Unlock the chain of old record */
+	if (tdb_unlock(tdb, BUCKET(oldhash), F_WRLCK) != 0)
+		TDB_LOG((tdb, 0, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
+	return key;
+}
+
+/* delete an entry in the database given a key */
+static int tdb_delete_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash)
+{
+	tdb_off rec_ptr;
+	struct list_struct rec;
+	int ret;
+
+	if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec)))
+		return -1;
+	ret = do_delete(tdb, rec_ptr, &rec);
+	if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
+		TDB_LOG((tdb, 0, "tdb_delete: WARNING tdb_unlock failed!\n"));
+	return ret;
+}
+
+int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	u32 hash = tdb_hash(&key);
+	return tdb_delete_hash(tdb, key, hash);
+}
+
+/* store an element in the database, replacing any existing element
+   with the same key 
+
+   return 0 on success, -1 on failure
+*/
+int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
+{
+	struct list_struct rec;
+	u32 hash;
+	tdb_off rec_ptr;
+	char *p = NULL;
+	int ret = 0;
+
+	/* find which hash bucket it is in */
+	hash = tdb_hash(&key);
+	if (!tdb_keylocked(tdb, hash))
+		return -1;
+	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
+		return -1;
+
+	/* check for it existing, on insert. */
+	if (flag == TDB_INSERT) {
+		if (tdb_exists_hash(tdb, key, hash)) {
+			tdb->ecode = TDB_ERR_EXISTS;
+			goto fail;
+		}
+	} else {
+		/* first try in-place update, on modify or replace. */
+		if (tdb_update_hash(tdb, key, hash, dbuf) == 0)
+			goto out;
+		if (tdb->ecode == TDB_ERR_NOEXIST &&
+		    flag == TDB_MODIFY) {
+			/* if the record doesn't exist and we are in TDB_MODIFY mode then
+			 we should fail the store */
+			goto fail;
+		}
+	}
+	/* reset the error code potentially set by the tdb_update() */
+	tdb->ecode = TDB_SUCCESS;
+
+	/* delete any existing record - if it doesn't exist we don't
+           care.  Doing this first reduces fragmentation, and avoids
+           coalescing with `allocated' block before it's updated. */
+	if (flag != TDB_INSERT)
+		tdb_delete_hash(tdb, key, hash);
+
+	/* Copy key+value *before* allocating free space in case malloc
+	   fails and we are left with a dead spot in the tdb. */
+
+	if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
+		tdb->ecode = TDB_ERR_OOM;
+		goto fail;
+	}
+
+	memcpy(p, key.dptr, key.dsize);
+	if (dbuf.dsize)
+		memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
+
+	/* we have to allocate some space */
+	if (!(rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec)))
+		goto fail;
+
+	/* Read hash top into next ptr */
+	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
+		goto fail;
+
+	rec.key_len = key.dsize;
+	rec.data_len = dbuf.dsize;
+	rec.full_hash = hash;
+	rec.magic = TDB_MAGIC;
+
+	/* write out and point the top of the hash chain at it */
+	if (rec_write(tdb, rec_ptr, &rec) == -1
+	    || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
+	    || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
+		/* Need to tdb_unallocate() here */
+		goto fail;
+	}
+ out:
+	SAFE_FREE(p); 
+	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
+	return ret;
+fail:
+	ret = -1;
+	goto out;
+}
+
+/* Attempt to append data to an entry in place - this only works if the new data size
+   is <= the old data size and the key exists.
+   on failure return -1. Record must be locked before calling.
+*/
+static int tdb_append_inplace(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA new_dbuf)
+{
+	struct list_struct rec;
+	tdb_off rec_ptr;
+
+	/* find entry */
+	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
+		return -1;
+
+	/* Append of 0 is always ok. */
+	if (new_dbuf.dsize == 0)
+		return 0;
+
+	/* must be long enough for key, old data + new data and tailer */
+	if (rec.rec_len < key.dsize + rec.data_len + new_dbuf.dsize + sizeof(tdb_off)) {
+		/* No room. */
+		tdb->ecode = TDB_SUCCESS; /* Not really an error */
+		return -1;
+	}
+
+	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len + rec.data_len,
+		      new_dbuf.dptr, new_dbuf.dsize) == -1)
+		return -1;
+
+	/* update size */
+	rec.data_len += new_dbuf.dsize;
+	return rec_write(tdb, rec_ptr, &rec);
+}
+
+/* Append to an entry. Create if not exist. */
+
+int tdb_append(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf)
+{
+	struct list_struct rec;
+	u32 hash;
+	tdb_off rec_ptr;
+	char *p = NULL;
+	int ret = 0;
+	size_t new_data_size = 0;
+
+	/* find which hash bucket it is in */
+	hash = tdb_hash(&key);
+	if (!tdb_keylocked(tdb, hash))
+		return -1;
+	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
+		return -1;
+
+	/* first try in-place. */
+	if (tdb_append_inplace(tdb, key, hash, new_dbuf) == 0)
+		goto out;
+
+	/* reset the error code potentially set by the tdb_append_inplace() */
+	tdb->ecode = TDB_SUCCESS;
+
+	/* find entry */
+	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
+		if (tdb->ecode != TDB_ERR_NOEXIST)
+			goto fail;
+
+		/* Not found - create. */
+
+		ret = tdb_store(tdb, key, new_dbuf, TDB_INSERT);
+		goto out;
+	}
+
+	new_data_size = rec.data_len + new_dbuf.dsize;
+
+	/* Copy key+old_value+value *before* allocating free space in case malloc
+	   fails and we are left with a dead spot in the tdb. */
+
+	if (!(p = (char *)malloc(key.dsize + new_data_size))) {
+		tdb->ecode = TDB_ERR_OOM;
+		goto fail;
+	}
+
+	/* Copy the key in place. */
+	memcpy(p, key.dptr, key.dsize);
+
+	/* Now read the old data into place. */
+	if (rec.data_len &&
+		tdb_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, p + key.dsize, rec.data_len, 0) == -1)
+			goto fail;
+
+	/* Finally append the new data. */
+	if (new_dbuf.dsize)
+		memcpy(p+key.dsize+rec.data_len, new_dbuf.dptr, new_dbuf.dsize);
+
+	/* delete any existing record - if it doesn't exist we don't
+           care.  Doing this first reduces fragmentation, and avoids
+           coalescing with `allocated' block before it's updated. */
+
+	tdb_delete_hash(tdb, key, hash);
+
+	if (!(rec_ptr = tdb_allocate(tdb, key.dsize + new_data_size, &rec)))
+		goto fail;
+
+	/* Read hash top into next ptr */
+	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
+		goto fail;
+
+	rec.key_len = key.dsize;
+	rec.data_len = new_data_size;
+	rec.full_hash = hash;
+	rec.magic = TDB_MAGIC;
+
+	/* write out and point the top of the hash chain at it */
+	if (rec_write(tdb, rec_ptr, &rec) == -1
+	    || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+new_data_size)==-1
+	    || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
+		/* Need to tdb_unallocate() here */
+		goto fail;
+	}
+
+ out:
+	SAFE_FREE(p); 
+	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
+	return ret;
+
+fail:
+	ret = -1;
+	goto out;
+}
+
+static int tdb_already_open(dev_t device,
+			    ino_t ino)
+{
+	TDB_CONTEXT *i;
+	
+	for (i = tdbs; i; i = i->next) {
+		if (i->device == device && i->inode == ino) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/* open the database, creating it if necessary 
+
+   The open_flags and mode are passed straight to the open call on the
+   database file. A flags value of O_WRONLY is invalid. The hash size
+   is advisory, use zero for a default value.
+
+   Return is NULL on error, in which case errno is also set.  Don't 
+   try to call tdb_error or tdb_errname, just do strerror(errno).
+
+   @param name may be NULL for internal databases. */
+TDB_CONTEXT *tdb_open(const char *name, int hash_size, int tdb_flags,
+		      int open_flags, mode_t mode)
+{
+	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL);
+}
+
+
+TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
+			 int open_flags, mode_t mode,
+			 tdb_log_func log_fn)
+{
+	TDB_CONTEXT *tdb;
+	struct stat st;
+	int rev = 0, locked = 0;
+	unsigned char *vp;
+	u32 vertest;
+
+	if (!(tdb = calloc(1, sizeof *tdb))) {
+		/* Can't log this */
+		errno = ENOMEM;
+		goto fail;
+	}
+	tdb->fd = -1;
+	tdb->name = NULL;
+	tdb->map_ptr = NULL;
+	tdb->lockedkeys = NULL;
+	tdb->flags = tdb_flags;
+	tdb->open_flags = open_flags;
+	tdb->log_fn = log_fn;
+	
+	if ((open_flags & O_ACCMODE) == O_WRONLY) {
+		TDB_LOG((tdb, 0, "tdb_open_ex: can't open tdb %s write-only\n",
+			 name));
+		errno = EINVAL;
+		goto fail;
+	}
+	
+	if (hash_size == 0)
+		hash_size = DEFAULT_HASH_SIZE;
+	if ((open_flags & O_ACCMODE) == O_RDONLY) {
+		tdb->read_only = 1;
+		/* read only databases don't do locking or clear if first */
+		tdb->flags |= TDB_NOLOCK;
+		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
+	}
+
+	/* internal databases don't mmap or lock, and start off cleared */
+	if (tdb->flags & TDB_INTERNAL) {
+		tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
+		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
+		if (tdb_new_database(tdb, hash_size) != 0) {
+			TDB_LOG((tdb, 0, "tdb_open_ex: tdb_new_database failed!"));
+			goto fail;
+		}
+		goto internal;
+	}
+
+	if ((tdb->fd = open(name, open_flags, mode)) == -1) {
+		TDB_LOG((tdb, 5, "tdb_open_ex: could not open file %s: %s\n",
+			 name, strerror(errno)));
+		goto fail;	/* errno set by open(2) */
+	}
+
+	/* ensure there is only one process initialising at once */
+	if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0) == -1) {
+		TDB_LOG((tdb, 0, "tdb_open_ex: failed to get global lock on %s: %s\n",
+			 name, strerror(errno)));
+		goto fail;	/* errno set by tdb_brlock */
+	}
+
+	/* we need to zero database if we are the only one with it open */
+	if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
+		(locked = (tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0) == 0))) {
+		open_flags |= O_CREAT;
+		if (ftruncate(tdb->fd, 0) == -1) {
+			TDB_LOG((tdb, 0, "tdb_open_ex: "
+				 "failed to truncate %s: %s\n",
+				 name, strerror(errno)));
+			goto fail; /* errno set by ftruncate */
+		}
+	}
+
+	if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
+	    || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0
+	    || (tdb->header.version != TDB_VERSION
+		&& !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) {
+		/* its not a valid database - possibly initialise it */
+		if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) {
+			errno = EIO; /* ie bad format or something */
+			goto fail;
+		}
+		rev = (tdb->flags & TDB_CONVERT);
+	}
+	vp = (unsigned char *)&tdb->header.version;
+	vertest = (((u32)vp[0]) << 24) | (((u32)vp[1]) << 16) |
+		  (((u32)vp[2]) << 8) | (u32)vp[3];
+	tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0;
+	if (!rev)
+		tdb->flags &= ~TDB_CONVERT;
+	else {
+		tdb->flags |= TDB_CONVERT;
+		convert(&tdb->header, sizeof(tdb->header));
+	}
+	if (fstat(tdb->fd, &st) == -1)
+		goto fail;
+
+	/* Is it already in the open list?  If so, fail. */
+	if (tdb_already_open(st.st_dev, st.st_ino)) {
+		TDB_LOG((tdb, 2, "tdb_open_ex: "
+			 "%s (%d,%d) is already open in this process\n",
+			 name, st.st_dev, st.st_ino));
+		errno = EBUSY;
+		goto fail;
+	}
+
+	if (!(tdb->name = (char *)strdup(name))) {
+		errno = ENOMEM;
+		goto fail;
+	}
+
+	tdb->map_size = st.st_size;
+	tdb->device = st.st_dev;
+	tdb->inode = st.st_ino;
+	tdb->locked = calloc(tdb->header.hash_size+1, sizeof(tdb->locked[0]));
+	if (!tdb->locked) {
+		TDB_LOG((tdb, 2, "tdb_open_ex: "
+			 "failed to allocate lock structure for %s\n",
+			 name));
+		errno = ENOMEM;
+		goto fail;
+	}
+	tdb_mmap(tdb);
+	if (locked) {
+		if (!tdb->read_only)
+			if (tdb_clear_spinlocks(tdb) != 0) {
+				TDB_LOG((tdb, 0, "tdb_open_ex: "
+				"failed to clear spinlock\n"));
+				goto fail;
+			}
+		if (tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0) == -1) {
+			TDB_LOG((tdb, 0, "tdb_open_ex: "
+				 "failed to take ACTIVE_LOCK on %s: %s\n",
+				 name, strerror(errno)));
+			goto fail;
+		}
+
+	}
+
+	/* We always need to do this if the CLEAR_IF_FIRST flag is set, even if
+	   we didn't get the initial exclusive lock as we need to let all other
+	   users know we're using it. */
+
+	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
+	/* leave this lock in place to indicate it's in use */
+	if (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1)
+		goto fail;
+	}
+
+
+ internal:
+	/* Internal (memory-only) databases skip all the code above to
+	 * do with disk files, and resume here by releasing their
+	 * global lock and hooking into the active list. */
+	if (tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0) == -1)
+		goto fail;
+	tdb->next = tdbs;
+	tdbs = tdb;
+	return tdb;
+
+ fail:
+	{ int save_errno = errno;
+
+	if (!tdb)
+		return NULL;
+	
+	if (tdb->map_ptr) {
+		if (tdb->flags & TDB_INTERNAL)
+			SAFE_FREE(tdb->map_ptr);
+		else
+			tdb_munmap(tdb);
+	}
+	SAFE_FREE(tdb->name);
+	if (tdb->fd != -1)
+		if (close(tdb->fd) != 0)
+			TDB_LOG((tdb, 5, "tdb_open_ex: failed to close tdb->fd on error!\n"));
+	SAFE_FREE(tdb->locked);
+	SAFE_FREE(tdb);
+	errno = save_errno;
+	return NULL;
+	}
+}
+
+/**
+ * Close a database.
+ *
+ * @returns -1 for error; 0 for success.
+ **/
+int tdb_close(TDB_CONTEXT *tdb)
+{
+	TDB_CONTEXT **i;
+	int ret = 0;
+
+	if (tdb->map_ptr) {
+		if (tdb->flags & TDB_INTERNAL)
+			SAFE_FREE(tdb->map_ptr);
+		else
+			tdb_munmap(tdb);
+	}
+	SAFE_FREE(tdb->name);
+	if (tdb->fd != -1)
+		ret = close(tdb->fd);
+	SAFE_FREE(tdb->locked);
+	SAFE_FREE(tdb->lockedkeys);
+
+	/* Remove from contexts list */
+	for (i = &tdbs; *i; i = &(*i)->next) {
+		if (*i == tdb) {
+			*i = tdb->next;
+			break;
+		}
+	}
+
+	memset(tdb, 0, sizeof(*tdb));
+	SAFE_FREE(tdb);
+
+	return ret;
+}
+
+/* lock/unlock entire database */
+int tdb_lockall(TDB_CONTEXT *tdb)
+{
+	u32 i;
+
+	/* There are no locks on read-only dbs */
+	if (tdb->read_only)
+		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
+	if (tdb->lockedkeys)
+		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
+	for (i = 0; i < tdb->header.hash_size; i++) 
+		if (tdb_lock(tdb, i, F_WRLCK))
+			break;
+
+	/* If error, release locks we have... */
+	if (i < tdb->header.hash_size) {
+		u32 j;
+
+		for ( j = 0; j < i; j++)
+			tdb_unlock(tdb, j, F_WRLCK);
+		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
+	}
+
+	return 0;
+}
+void tdb_unlockall(TDB_CONTEXT *tdb)
+{
+	u32 i;
+	for (i=0; i < tdb->header.hash_size; i++)
+		tdb_unlock(tdb, i, F_WRLCK);
+}
+
+int tdb_lockkeys(TDB_CONTEXT *tdb, u32 number, TDB_DATA keys[])
+{
+	u32 i, j, hash;
+
+	/* Can't lock more keys if already locked */
+	if (tdb->lockedkeys)
+		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
+	if (!(tdb->lockedkeys = malloc(sizeof(u32) * (number+1))))
+		return TDB_ERRCODE(TDB_ERR_OOM, -1);
+	/* First number in array is # keys */
+	tdb->lockedkeys[0] = number;
+
+	/* Insertion sort by bucket */
+	for (i = 0; i < number; i++) {
+		hash = tdb_hash(&keys[i]);
+		for (j = 0; j < i && BUCKET(tdb->lockedkeys[j+1]) < BUCKET(hash); j++);
+			memmove(&tdb->lockedkeys[j+2], &tdb->lockedkeys[j+1], sizeof(u32) * (i-j));
+		tdb->lockedkeys[j+1] = hash;
+	}
+	/* Finally, lock in order */
+	for (i = 0; i < number; i++)
+		if (tdb_lock(tdb, i, F_WRLCK))
+			break;
+
+	/* If error, release locks we have... */
+	if (i < number) {
+		for ( j = 0; j < i; j++)
+			tdb_unlock(tdb, j, F_WRLCK);
+		SAFE_FREE(tdb->lockedkeys);
+		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
+	}
+	return 0;
+}
+
+/* Unlock the keys previously locked by tdb_lockkeys() */
+void tdb_unlockkeys(TDB_CONTEXT *tdb)
+{
+	u32 i;
+	if (!tdb->lockedkeys)
+		return;
+	for (i = 0; i < tdb->lockedkeys[0]; i++)
+		tdb_unlock(tdb, tdb->lockedkeys[i+1], F_WRLCK);
+	SAFE_FREE(tdb->lockedkeys);
+}
+
+/* lock/unlock one hash chain. This is meant to be used to reduce
+   contention - it cannot guarantee how many records will be locked */
+int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
+}
+
+int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	return tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
+}
+
+int tdb_chainlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_RDLCK);
+}
+
+int tdb_chainunlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
+{
+	return tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_RDLCK);
+}
+
+
+/* register a loging function */
+void tdb_logging_function(TDB_CONTEXT *tdb, void (*fn)(TDB_CONTEXT *, int , const char *, ...))
+{
+	tdb->log_fn = fn;
+}
+
+
+/* reopen a tdb - this can be used after a fork to ensure that we have an independent
+   seek pointer from our parent and to re-establish locks */
+int tdb_reopen(TDB_CONTEXT *tdb)
+{
+	struct stat st;
+
+	if (tdb->flags & TDB_INTERNAL)
+		return 0; /* Nothing to do. */
+	if (tdb_munmap(tdb) != 0) {
+		TDB_LOG((tdb, 0, "tdb_reopen: munmap failed (%s)\n", strerror(errno)));
+		goto fail;
+	}
+	if (close(tdb->fd) != 0)
+		TDB_LOG((tdb, 0, "tdb_reopen: WARNING closing tdb->fd failed!\n"));
+	tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0);
+	if (tdb->fd == -1) {
+		TDB_LOG((tdb, 0, "tdb_reopen: open failed (%s)\n", strerror(errno)));
+		goto fail;
+	}
+	if (fstat(tdb->fd, &st) != 0) {
+		TDB_LOG((tdb, 0, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
+		goto fail;
+	}
+	if (st.st_ino != tdb->inode || st.st_dev != tdb->device) {
+		TDB_LOG((tdb, 0, "tdb_reopen: file dev/inode has changed!\n"));
+		goto fail;
+	}
+	tdb_mmap(tdb);
+	if ((tdb->flags & TDB_CLEAR_IF_FIRST) && (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1)) {
+		TDB_LOG((tdb, 0, "tdb_reopen: failed to obtain active lock\n"));
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	tdb_close(tdb);
+	return -1;
+}
+
+/* reopen all tdb's */
+int tdb_reopen_all(void)
+{
+	TDB_CONTEXT *tdb;
+
+	for (tdb=tdbs; tdb; tdb = tdb->next) {
+		/* Ensure no clear-if-first. */
+		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
+		if (tdb_reopen(tdb) != 0)
+			return -1;
+	}
+
+	return 0;
+}
-- 
cgit 


From 45e93c19ef95978f908f5b14962770510634cd3b Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Sat, 29 May 2004 08:11:46 +0000
Subject: r943: change samba4 to use 'uint8_t' instead of 'unsigned char'

metze
(This used to be commit b5378803fdcb3b3afe7c2932a38828e83470f61a)
---
 source4/lib/tdb/common/tdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index c8ac7babad..b7b9631444 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1762,7 +1762,7 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 	TDB_CONTEXT *tdb;
 	struct stat st;
 	int rev = 0, locked = 0;
-	unsigned char *vp;
+	uint8_t *vp;
 	u32 vertest;
 
 	if (!(tdb = calloc(1, sizeof *tdb))) {
@@ -1841,7 +1841,7 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 		}
 		rev = (tdb->flags & TDB_CONVERT);
 	}
-	vp = (unsigned char *)&tdb->header.version;
+	vp = (uint8_t *)&tdb->header.version;
 	vertest = (((u32)vp[0]) << 24) | (((u32)vp[1]) << 16) |
 		  (((u32)vp[2]) << 8) | (u32)vp[3];
 	tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0;
-- 
cgit 


From 770e3307ce3da928762e15a136c562df86a9c799 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Tue, 1 Jun 2004 10:12:52 +0000
Subject: r962: convert 'unsigned' and 'unsigned int' to uint_t

metze
(This used to be commit 57151e80eb1090281401930c8fe25b20a8cf3a38)
---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index b7b9631444..d4c0928217 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -541,7 +541,7 @@ static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
 
 	if (tailer != rec.rec_len + sizeof(rec)) {
 		printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n",
-				(unsigned)tailer, (unsigned)(rec.rec_len + sizeof(rec)));
+				(uint_t)tailer, (uint_t)(rec.rec_len + sizeof(rec)));
 	}
 	return rec.next;
 }
-- 
cgit 


From b619abb98e0c9384f75586a56e63fd3a1fc6badb Mon Sep 17 00:00:00 2001
From: Tim Potter <tpot@samba.org>
Date: Mon, 7 Jun 2004 01:51:04 +0000
Subject: r1053: Make tdb build standalone:

  - #include <stdint.h>

  - uint_t isn't a valid type, change back to unsigned int
(This used to be commit f690325565d2393bba3cb9f6e7cdf3753cbd4423)
---
 source4/lib/tdb/common/tdb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index d4c0928217..ef13955fab 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -53,6 +53,7 @@
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <string.h>
@@ -541,7 +542,7 @@ static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
 
 	if (tailer != rec.rec_len + sizeof(rec)) {
 		printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n",
-				(uint_t)tailer, (uint_t)(rec.rec_len + sizeof(rec)));
+				(unsigned int)tailer, (unsigned int)(rec.rec_len + sizeof(rec)));
 	}
 	return rec.next;
 }
-- 
cgit 


From 85d8d24d3dd5e84c465050a2440d9a21c970c826 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Wed, 25 Aug 2004 01:12:59 +0000
Subject: r2034: Allow user-selectable hash functions on open only. Jeremy.
 (This used to be commit b43320ee5c9ac14c330e61ae62e0786b088e04fa)

---
 source4/lib/tdb/common/tdb.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index ef13955fab..b9ca46c322 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -331,7 +331,7 @@ static int tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype)
 }
 
 /* This is based on the hash algorithm from gdbm */
-static u32 tdb_hash(TDB_DATA *key)
+static u32 default_tdb_hash(TDB_DATA *key)
 {
 	u32 value;	/* Used to compute the hash value.  */
 	u32   i;	/* Used to cycle through random values. */
@@ -1171,7 +1171,7 @@ TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
 	u32 hash;
 
 	/* find which hash bucket it is in */
-	hash = tdb_hash(&key);
+	hash = tdb->hash_fn(&key);
 	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 		return tdb_null;
 
@@ -1203,7 +1203,7 @@ static int tdb_exists_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash)
 
 int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
 {
-	u32 hash = tdb_hash(&key);
+	u32 hash = tdb->hash_fn(&key);
 	return tdb_exists_hash(tdb, key, hash);
 }
 
@@ -1463,7 +1463,7 @@ TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA oldkey)
 
 	if (!tdb->travlocks.off) {
 		/* No previous element: do normal find, and lock record */
-		tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb_hash(&oldkey), F_WRLCK, &rec);
+		tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), F_WRLCK, &rec);
 		if (!tdb->travlocks.off)
 			return tdb_null;
 		tdb->travlocks.hash = BUCKET(rec.full_hash);
@@ -1507,7 +1507,7 @@ static int tdb_delete_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash)
 
 int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
 {
-	u32 hash = tdb_hash(&key);
+	u32 hash = tdb->hash_fn(&key);
 	return tdb_delete_hash(tdb, key, hash);
 }
 
@@ -1525,7 +1525,7 @@ int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	int ret = 0;
 
 	/* find which hash bucket it is in */
-	hash = tdb_hash(&key);
+	hash = tdb->hash_fn(&key);
 	if (!tdb_keylocked(tdb, hash))
 		return -1;
 	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
@@ -1643,7 +1643,7 @@ int tdb_append(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 	size_t new_data_size = 0;
 
 	/* find which hash bucket it is in */
-	hash = tdb_hash(&key);
+	hash = tdb->hash_fn(&key);
 	if (!tdb_keylocked(tdb, hash))
 		return -1;
 	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
@@ -1752,13 +1752,14 @@ static int tdb_already_open(dev_t device,
 TDB_CONTEXT *tdb_open(const char *name, int hash_size, int tdb_flags,
 		      int open_flags, mode_t mode)
 {
-	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL);
+	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL);
 }
 
 
 TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 			 int open_flags, mode_t mode,
-			 tdb_log_func log_fn)
+			 tdb_log_func log_fn,
+			 tdb_hash_func hash_fn)
 {
 	TDB_CONTEXT *tdb;
 	struct stat st;
@@ -1778,7 +1779,8 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 	tdb->flags = tdb_flags;
 	tdb->open_flags = open_flags;
 	tdb->log_fn = log_fn;
-	
+	tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash;
+
 	if ((open_flags & O_ACCMODE) == O_WRONLY) {
 		TDB_LOG((tdb, 0, "tdb_open_ex: can't open tdb %s write-only\n",
 			 name));
@@ -2023,7 +2025,7 @@ int tdb_lockkeys(TDB_CONTEXT *tdb, u32 number, TDB_DATA keys[])
 
 	/* Insertion sort by bucket */
 	for (i = 0; i < number; i++) {
-		hash = tdb_hash(&keys[i]);
+		hash = tdb->hash_fn(&keys[i]);
 		for (j = 0; j < i && BUCKET(tdb->lockedkeys[j+1]) < BUCKET(hash); j++);
 			memmove(&tdb->lockedkeys[j+2], &tdb->lockedkeys[j+1], sizeof(u32) * (i-j));
 		tdb->lockedkeys[j+1] = hash;
@@ -2058,22 +2060,22 @@ void tdb_unlockkeys(TDB_CONTEXT *tdb)
    contention - it cannot guarantee how many records will be locked */
 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
 {
-	return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
+	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
 }
 
 int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
 {
-	return tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK);
+	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
 }
 
 int tdb_chainlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
 {
-	return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_RDLCK);
+	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
 }
 
 int tdb_chainunlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
 {
-	return tdb_unlock(tdb, BUCKET(tdb_hash(&key)), F_RDLCK);
+	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
 }
 
 
-- 
cgit 


From cfedadec2211676ed7f99685c695e1e2f0d41923 Mon Sep 17 00:00:00 2001
From: Tim Potter <tpot@samba.org>
Date: Tue, 31 Aug 2004 23:57:14 +0000
Subject: r2154: Merge of Jim's format string fixup in r2130:2131 (This used to
 be commit 17a53eaac2d0d76413c8bb27a6f1c80536ccf3a7)

---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index b9ca46c322..aefa08cdb8 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -490,7 +490,7 @@ static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec)
 	if (rec->magic == TDB_MAGIC) {
 		/* this happens when a app is showdown while deleting a record - we should
 		   not completely fail when this happens */
-		TDB_LOG((tdb, 0,"rec_free_read non-free magic at offset=%d - fixing\n", 
+		TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n", 
 			 rec->magic, off));
 		rec->magic = TDB_FREE_MAGIC;
 		if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
-- 
cgit 


From 8fa455d815ff4ad3bda57d1c7b683adfe04efd85 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 7 Sep 2004 13:44:49 +0000
Subject: r2238: the tdb_debug() function was totally bogus - remove it (you
 can't convert a ... varargs function to a va_list by just a cast!!)

also mark the tdb log function with PRINTF_ATTRIBUTE() and fixed some
bad format errors in tdb.c that jim found.
(This used to be commit c26c92eb8f538748fcbb2ae5a0a8a02bffbbbf86)
---
 source4/lib/tdb/common/tdb.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index aefa08cdb8..4ae23bead7 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -89,7 +89,7 @@
 /* NB assumes there is a local variable called "tdb" that is the
  * current context, also takes doubly-parenthesized print-style
  * argument. */
-#define TDB_LOG(x) (tdb->log_fn?((tdb->log_fn x),0) : 0)
+#define TDB_LOG(x) tdb->log_fn x
 
 /* lock offsets */
 #define GLOBAL_LOCK 0
@@ -277,7 +277,7 @@ static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype)
 	if (tdb->locked[list+1].count == 0) {
 		if (!tdb->read_only && tdb->header.rwlocks) {
 			if (tdb_spinlock(tdb, list, ltype)) {
-				TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list ltype=%d\n", 
+				TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list %d ltype=%d\n", 
 					   list, ltype));
 				return -1;
 			}
@@ -1755,6 +1755,11 @@ TDB_CONTEXT *tdb_open(const char *name, int hash_size, int tdb_flags,
 	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL);
 }
 
+/* a default logging function */
+static void null_log_fn(TDB_CONTEXT *tdb, int level, const char *fmt, ...)
+{
+}
+
 
 TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 			 int open_flags, mode_t mode,
@@ -1778,7 +1783,7 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 	tdb->lockedkeys = NULL;
 	tdb->flags = tdb_flags;
 	tdb->open_flags = open_flags;
-	tdb->log_fn = log_fn;
+	tdb->log_fn = log_fn?log_fn:null_log_fn;
 	tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash;
 
 	if ((open_flags & O_ACCMODE) == O_WRONLY) {
@@ -1861,7 +1866,7 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 	if (tdb_already_open(st.st_dev, st.st_ino)) {
 		TDB_LOG((tdb, 2, "tdb_open_ex: "
 			 "%s (%d,%d) is already open in this process\n",
-			 name, st.st_dev, st.st_ino));
+			 name, (int)st.st_dev, (int)st.st_ino));
 		errno = EBUSY;
 		goto fail;
 	}
@@ -2082,7 +2087,7 @@ int tdb_chainunlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
 /* register a loging function */
 void tdb_logging_function(TDB_CONTEXT *tdb, void (*fn)(TDB_CONTEXT *, int , const char *, ...))
 {
-	tdb->log_fn = fn;
+	tdb->log_fn = fn?fn:null_log_fn;
 }
 
 
-- 
cgit 


From ddd5f1163d5dee1df34f606ac8d2aba35150337c Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Fri, 15 Oct 2004 00:03:26 +0000
Subject: r2981: Fix incorrect locks/unlocks in
 tdb_lockkeys()/tdb_unlockkeys(). Spotted by Taj Khattra
 <taj.khattra@gmail.com>. Jeremy. (This used to be commit
 a7d92301bbf45cb9e475e4876fdbb37644ad5871)

---
 source4/lib/tdb/common/tdb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 4ae23bead7..c1deef80cd 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -2037,13 +2037,13 @@ int tdb_lockkeys(TDB_CONTEXT *tdb, u32 number, TDB_DATA keys[])
 	}
 	/* Finally, lock in order */
 	for (i = 0; i < number; i++)
-		if (tdb_lock(tdb, i, F_WRLCK))
+		if (tdb_lock(tdb, BUCKET(tdb->lockedkeys[i+1]), F_WRLCK))
 			break;
 
 	/* If error, release locks we have... */
 	if (i < number) {
 		for ( j = 0; j < i; j++)
-			tdb_unlock(tdb, j, F_WRLCK);
+			tdb_unlock(tdb, BUCKET(tdb->lockedkeys[j+1]), F_WRLCK);
 		SAFE_FREE(tdb->lockedkeys);
 		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
 	}
@@ -2057,7 +2057,7 @@ void tdb_unlockkeys(TDB_CONTEXT *tdb)
 	if (!tdb->lockedkeys)
 		return;
 	for (i = 0; i < tdb->lockedkeys[0]; i++)
-		tdb_unlock(tdb, tdb->lockedkeys[i+1], F_WRLCK);
+		tdb_unlock(tdb, BUCKET(tdb->lockedkeys[i+1]), F_WRLCK);
 	SAFE_FREE(tdb->lockedkeys);
 }
 
-- 
cgit 


From 83928ac670bb17d4a1a8204d52468e5cca7c03d6 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 15 Oct 2004 06:41:35 +0000
Subject: r2985: got rid of the unused tdb_lockkeys() and tdb_unlockkeys()
 functions

they have been broken for 4 years (ever since they were added) and
have been never used, which makes them prime candidates for
destruction.
(This used to be commit 0b53ab85aae4569c04495f07c18a65fd6b47bf4c)
---
 source4/lib/tdb/common/tdb.c | 71 --------------------------------------------
 1 file changed, 71 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index c1deef80cd..67d9f9903d 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1070,26 +1070,12 @@ static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash,
 	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 }
 
-/* If they do lockkeys, check that this hash is one they locked */
-static int tdb_keylocked(TDB_CONTEXT *tdb, u32 hash)
-{
-	u32 i;
-	if (!tdb->lockedkeys)
-		return 1;
-	for (i = 0; i < tdb->lockedkeys[0]; i++)
-		if (tdb->lockedkeys[i+1] == hash)
-			return 1;
-	return TDB_ERRCODE(TDB_ERR_NOLOCK, 0);
-}
-
 /* As tdb_find, but if you succeed, keep the lock */
 static tdb_off tdb_find_lock_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, int locktype,
 			     struct list_struct *rec)
 {
 	u32 rec_ptr;
 
-	if (!tdb_keylocked(tdb, hash))
-		return 0;
 	if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
 		return 0;
 	if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
@@ -1291,10 +1277,6 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 {
 	int want_next = (tlock->off != 0);
 
-	/* No traversal allows if you've called tdb_lockkeys() */
-	if (tdb->lockedkeys)
-		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
-
 	/* Lock each chain from the start one. */
 	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
 		if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)
@@ -1526,8 +1508,6 @@ int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 
 	/* find which hash bucket it is in */
 	hash = tdb->hash_fn(&key);
-	if (!tdb_keylocked(tdb, hash))
-		return -1;
 	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 		return -1;
 
@@ -1644,8 +1624,6 @@ int tdb_append(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 
 	/* find which hash bucket it is in */
 	hash = tdb->hash_fn(&key);
-	if (!tdb_keylocked(tdb, hash))
-		return -1;
 	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 		return -1;
 
@@ -1780,7 +1758,6 @@ TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
 	tdb->fd = -1;
 	tdb->name = NULL;
 	tdb->map_ptr = NULL;
-	tdb->lockedkeys = NULL;
 	tdb->flags = tdb_flags;
 	tdb->open_flags = open_flags;
 	tdb->log_fn = log_fn?log_fn:null_log_fn;
@@ -1968,7 +1945,6 @@ int tdb_close(TDB_CONTEXT *tdb)
 	if (tdb->fd != -1)
 		ret = close(tdb->fd);
 	SAFE_FREE(tdb->locked);
-	SAFE_FREE(tdb->lockedkeys);
 
 	/* Remove from contexts list */
 	for (i = &tdbs; *i; i = &(*i)->next) {
@@ -1992,8 +1968,6 @@ int tdb_lockall(TDB_CONTEXT *tdb)
 	/* There are no locks on read-only dbs */
 	if (tdb->read_only)
 		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
-	if (tdb->lockedkeys)
-		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
 	for (i = 0; i < tdb->header.hash_size; i++) 
 		if (tdb_lock(tdb, i, F_WRLCK))
 			break;
@@ -2016,51 +1990,6 @@ void tdb_unlockall(TDB_CONTEXT *tdb)
 		tdb_unlock(tdb, i, F_WRLCK);
 }
 
-int tdb_lockkeys(TDB_CONTEXT *tdb, u32 number, TDB_DATA keys[])
-{
-	u32 i, j, hash;
-
-	/* Can't lock more keys if already locked */
-	if (tdb->lockedkeys)
-		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
-	if (!(tdb->lockedkeys = malloc(sizeof(u32) * (number+1))))
-		return TDB_ERRCODE(TDB_ERR_OOM, -1);
-	/* First number in array is # keys */
-	tdb->lockedkeys[0] = number;
-
-	/* Insertion sort by bucket */
-	for (i = 0; i < number; i++) {
-		hash = tdb->hash_fn(&keys[i]);
-		for (j = 0; j < i && BUCKET(tdb->lockedkeys[j+1]) < BUCKET(hash); j++);
-			memmove(&tdb->lockedkeys[j+2], &tdb->lockedkeys[j+1], sizeof(u32) * (i-j));
-		tdb->lockedkeys[j+1] = hash;
-	}
-	/* Finally, lock in order */
-	for (i = 0; i < number; i++)
-		if (tdb_lock(tdb, BUCKET(tdb->lockedkeys[i+1]), F_WRLCK))
-			break;
-
-	/* If error, release locks we have... */
-	if (i < number) {
-		for ( j = 0; j < i; j++)
-			tdb_unlock(tdb, BUCKET(tdb->lockedkeys[j+1]), F_WRLCK);
-		SAFE_FREE(tdb->lockedkeys);
-		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
-	}
-	return 0;
-}
-
-/* Unlock the keys previously locked by tdb_lockkeys() */
-void tdb_unlockkeys(TDB_CONTEXT *tdb)
-{
-	u32 i;
-	if (!tdb->lockedkeys)
-		return;
-	for (i = 0; i < tdb->lockedkeys[0]; i++)
-		tdb_unlock(tdb, BUCKET(tdb->lockedkeys[i+1]), F_WRLCK);
-	SAFE_FREE(tdb->lockedkeys);
-}
-
 /* lock/unlock one hash chain. This is meant to be used to reduce
    contention - it cannot guarantee how many records will be locked */
 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
-- 
cgit 


From ead3508ac81ff3ed2a48753f3b5e23537ba6ec73 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 2 Nov 2004 00:24:21 +0000
Subject: r3447: more include/system/XXX.h include files (This used to be
 commit 264ce9181089922547e8f6f67116f2d7277a5105)

---
 source4/lib/tdb/common/tdb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 67d9f9903d..d41809c3d8 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -66,6 +66,8 @@
 #include "spinlock.h"
 #else
 #include "includes.h"
+#include "system/time.h"
+#include "system/shmem.h"
 #endif
 
 #define TDB_MAGIC_FOOD "TDB file\n"
-- 
cgit 


From 6148deca663f7b6504b044120b166d6c9ae28750 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 2 Nov 2004 03:13:06 +0000
Subject: r3454: moved a few more things out if includes.h into the
 include/system/ include files.

this brings us down to about 11k lines of headers included with
includes.h, while still retaining the speed of building with pch
(This used to be commit 10188869ef072309ca580b8b933e172571fcdda7)
---
 source4/lib/tdb/common/tdb.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index d41809c3d8..afe2ca1621 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -68,6 +68,7 @@
 #include "includes.h"
 #include "system/time.h"
 #include "system/shmem.h"
+#include "system/filesys.h"
 #endif
 
 #define TDB_MAGIC_FOOD "TDB file\n"
-- 
cgit 


From f9e507980e6a3340ae135e81d3106a836116d6b0 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Sat, 1 Jan 2005 05:06:22 +0000
Subject: r4466: rather than defining "STANDALONE" for building tdb, ldb and
 talloc outside the tree, instead defined _SAMBA_BUILD_ inside the Samba
 build. This makes it easier to pull code out of Samba for external use. (This
 used to be commit 09e98c8745cca7ccb1ad7134c0c09b8e4c0f4f06)

---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index afe2ca1621..46b2499179 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -46,7 +46,7 @@
  * right time.  Probably too hard -- the process just doesn't know.
  */ 
 
-#ifdef STANDALONE
+#ifndef _SAMBA_BUILD_
 #if HAVE_CONFIG_H
 #include <config.h>
 #endif
-- 
cgit 


From fedf0b0d91fdf2a6ae0ef47acd4047f662bd3374 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 10 Feb 2005 03:48:43 +0000
Subject: r5296: - only include the tdb headers where they are needed

- removed the u32 hack in events.c as I think this was only needed as
  tdb.h defines u32. Metze, can you check that this hack is indeed no
  longer needed on your suse system?
(This used to be commit 6f79432fe656164d4770dbce114a30dda5e7bf9a)
---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 46b2499179..134ee8a4ea 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -63,9 +63,9 @@
 #include <sys/stat.h>
 #include <signal.h>
 #include "tdb.h"
-#include "spinlock.h"
 #else
 #include "includes.h"
+#include "lib/tdb/include/tdb.h"
 #include "system/time.h"
 #include "system/shmem.h"
 #include "system/filesys.h"
-- 
cgit 


From b14727583d6108f11427ec8033986e9d617320c5 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Thu, 24 Feb 2005 01:10:57 +0000
Subject: r5533: Patch to detect infinite loops when traversing a tdb from
 "Shlomi Yaakobovich" <Shlomi@exanet.com> Jeremy. (This used to be commit
 84f6d2b3dd29c1aa478708db9617a79382158f64)

---
 source4/lib/tdb/common/tdb.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 134ee8a4ea..90b586d487 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1314,6 +1314,13 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 					goto fail;
 				return tlock->off;
 			}
+
+			/* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
+			if (tlock->off == rec->next) {
+				TDB_LOG((tdb, 0, "tdb_next_lock: loop detected.\n"));
+				goto fail;
+			}
+
 			/* Try to clean dead ones from old traverses */
 			current = tlock->off;
 			tlock->off = rec->next;
-- 
cgit 


From 639edc4097d1d5f011ce642b97b522ac4cb8b4ae Mon Sep 17 00:00:00 2001
From: Tim Potter <tpot@samba.org>
Date: Sun, 13 Mar 2005 01:40:45 +0000
Subject: r5779: Remove signal and timeout gubbage from tdb. (This used to be
 commit dbb56e9a59cf00d57b09ded6d60bf9424d5f1f4c)

---
 source4/lib/tdb/common/tdb.c | 28 ++--------------------------
 1 file changed, 2 insertions(+), 26 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 90b586d487..d9fabaeda9 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -61,7 +61,6 @@
 #include <errno.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
-#include <signal.h>
 #include "tdb.h"
 #else
 #include "includes.h"
@@ -193,18 +192,6 @@ struct list_struct {
 	*/
 };
 
-/***************************************************************
- Allow a caller to set a "alarm" flag that tdb can check to abort
- a blocking lock on SIGALRM.
-***************************************************************/
-
-static sig_atomic_t *palarm_fired;
-
-void tdb_set_lock_alarm(sig_atomic_t *palarm)
-{
-	palarm_fired = palarm;
-}
-
 /* a byte range locking function - return 0 on success
    this functions locks/unlocks 1 byte at the specified offset.
 
@@ -231,27 +218,16 @@ static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset,
 
 	do {
 		ret = fcntl(tdb->fd,lck_type,&fl);
-		if (ret == -1 && errno == EINTR && palarm_fired && *palarm_fired)
-			break;
 	} while (ret == -1 && errno == EINTR);
 
 	if (ret == -1) {
 		if (!probe && lck_type != F_SETLK) {
 			/* Ensure error code is set for log fun to examine. */
-			if (errno == EINTR && palarm_fired && *palarm_fired)
-				tdb->ecode = TDB_ERR_LOCK_TIMEOUT;
-			else
-				tdb->ecode = TDB_ERR_LOCK;
+			tdb->ecode = TDB_ERR_LOCK;
 			TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n", 
 				 tdb->fd, offset, rw_type, lck_type));
 		}
-		/* Was it an alarm timeout ? */
-		if (errno == EINTR && palarm_fired && *palarm_fired) {
-			TDB_LOG((tdb, 5, "tdb_brlock timed out (fd=%d) at offset %d rw_type=%d lck_type=%d\n", 
-				 tdb->fd, offset, rw_type, lck_type));
-			return TDB_ERRCODE(TDB_ERR_LOCK_TIMEOUT, -1);
-		}
-		/* Otherwise - generic lock error. errno set by fcntl.
+		/* Generic lock error. errno set by fcntl.
 		 * EAGAIN is an expected return from non-blocking
 		 * locks. */
 		if (errno != EAGAIN) {
-- 
cgit 


From e3775ee8501e2f15ee172970085c2c12e50da1cf Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Thu, 7 Apr 2005 19:50:54 +0000
Subject: r6238: Ensure if realloc fails on an internal tdb we fail gracefully.
 Jeremy. (This used to be commit d69f7c05468ae54e0474b188fedabe14e7297d53)

---
 source4/lib/tdb/common/tdb.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index d9fabaeda9..6554ec5697 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -802,9 +802,14 @@ static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size)
 
 	tdb->map_size += size;
 
-	if (tdb->flags & TDB_INTERNAL)
-		tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size);
-	else {
+	if (tdb->flags & TDB_INTERNAL) {
+		char *new_map_ptr = realloc(tdb->map_ptr, tdb->map_size);
+		if (!new_map_ptr) {
+			tdb->map_size -= size;
+			goto fail;
+		}
+		tdb->map_ptr = new_map_ptr;
+	} else {
 		/*
 		 * We must ensure the file is remapped before adding the space
 		 * to ensure consistency with systems like OpenBSD where
-- 
cgit 


From 98549af7bf1932bbc78a07053f193b24bd8685f7 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Sat, 30 Apr 2005 09:04:14 +0000
Subject: r6528: - in tdb_fetch() we effectively disallowed zero length records
 by   returning NULL/0, which is the same as we used for a failure. Having  
 to look at tdb->ecode (which we never do) is too error prone.

  Instead, tdb_fetch() should behave like malloc() and talloc(), where
  zero length is not special and malloc(0) returns a valid pointer.

- similarly in data_blob(), asking for data_blob(NULL, 0) should
  return a zero blob, but asking for data_blob(ptr, 0) should return a
  zero length blob with a valid pointer, just like talloc() and malloc()

This change fixes the SummaryInformation stream stored in the tdb
backend when manipulated from w2k. The w2k client was using
SET_EOF_INFORMATION to create a zero-length stream, which we return
STATUS_NOT_FOUND on, as the tdb_fetch() gave us back a NULL/0 blob,
which we returned as not-found
(This used to be commit 162bbe4402b9de6ac06103df904b9fc204fbff29)
---
 source4/lib/tdb/common/tdb.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 6554ec5697..670fdda7cd 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1129,8 +1129,8 @@ static int tdb_update_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA db
 /* find an entry in the database given a key */
 /* If an entry doesn't exist tdb_err will be set to
  * TDB_ERR_NOEXIST. If a key has no data attached
- * tdb_err will not be set. Both will return a
- * zero pptr and zero dsize.
+ * then the TDB_DATA will have zero length but
+ * a non-zero pointer
  */
 
 TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
@@ -1145,11 +1145,8 @@ TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
 	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 		return tdb_null;
 
-	if (rec.data_len)
-		ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
-					  rec.data_len);
-	else
-		ret.dptr = NULL;
+	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
+				  rec.data_len);
 	ret.dsize = rec.data_len;
 	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 	return ret;
-- 
cgit 


From a1ba224107fbcf6f8a9a3091f42cde2a0c47f85e Mon Sep 17 00:00:00 2001
From: Derrell Lipman <derrell@samba.org>
Date: Sat, 4 Jun 2005 17:13:43 +0000
Subject: r7276: - moved static tdb function ltdb_dn_fold() into common/ so
 that it can be   called from multiple backends.  (ldb_sqlite3 needs it too.) 
 Added parameter   for a callback function that determines whether an
 attribute needs case   folding. - begin to prepare for sqlite3 in build
 process - work-in-progress updates, on ldb_sqlite3 (This used to be commit
 a80bced0b96ffb655559a43cf7f4d7a34deb5a7d)

---
 source4/lib/tdb/common/tdb.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 670fdda7cd..e9cbc12b91 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -105,9 +105,18 @@
 #define MAP_FAILED ((void *)-1)
 #endif
 
+#ifndef discard_const_p
+# if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
+#  define discard_const(ptr) ((void *)((intptr_t)(ptr)))
+# else
+#  define discard_const(ptr) ((void *)(ptr))
+# endif
+# define discard_const_p(type, ptr) ((type *)discard_const(ptr))
+#endif
+
 /* free memory if the pointer is valid and zero the pointer */
 #ifndef SAFE_FREE
-#define SAFE_FREE(x) do { if ((x) != NULL) {free((x)); (x)=NULL;} } while(0)
+#define SAFE_FREE(x) do { if ((x) != NULL) {free(discard_const_p(void *, (x))); (x)=NULL;} } while(0)
 #endif
 
 #define BUCKET(hash) ((hash) % tdb->header.hash_size)
-- 
cgit 


From fcb339c087de567c860cc9e5e92253a642b982a4 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Thu, 16 Jun 2005 16:43:23 +0000
Subject: r7641: Fix based on work from  "Shlomi Yaakobovich"
 <Shlomi@exanet.com> to catch loops in corrupted tdb files. Jeremy. (This used
 to be commit f9f3037d6855259edd56fd5a23d63dbb37f0a751)

---
 source4/lib/tdb/common/tdb.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index e9cbc12b91..44533aad27 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1295,12 +1295,6 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 			tdb_off current;
 			if (rec_read(tdb, tlock->off, rec) == -1)
 				goto fail;
-			if (!TDB_DEAD(rec)) {
-				/* Woohoo: we found one! */
-				if (lock_record(tdb, tlock->off) != 0)
-					goto fail;
-				return tlock->off;
-			}
 
 			/* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
 			if (tlock->off == rec->next) {
@@ -1308,6 +1302,13 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 				goto fail;
 			}
 
+			if (!TDB_DEAD(rec)) {
+				/* Woohoo: we found one! */
+				if (lock_record(tdb, tlock->off) != 0)
+					goto fail;
+				return tlock->off;
+			}
+
 			/* Try to clean dead ones from old traverses */
 			current = tlock->off;
 			tlock->off = rec->next;
-- 
cgit 


From f650ea10120d41ceff8ea04975fea7637cb45a0a Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Wed, 20 Jul 2005 00:09:23 +0000
Subject: r8624: removed valgrind comment on tdb that no longer applies (This
 used to be commit 8f222c8c7a750c739d3288da0b1edb1efc3b1ffc)

---
 source4/lib/tdb/common/tdb.c | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 44533aad27..4c2d9a1add 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -27,25 +27,6 @@
 */
 
 
-/* NOTE: If you use tdbs under valgrind, and in particular if you run
- * tdbtorture, you may get spurious "uninitialized value" warnings.  I
- * think this is because valgrind doesn't understand that the mmap'd
- * area may be written to by other processes.  Memory can, from the
- * point of view of the grinded process, spontaneously become
- * initialized.
- *
- * I can think of a few solutions.  [mbp 20030311]
- *
- * 1 - Write suppressions for Valgrind so that it doesn't complain
- * about this.  Probably the most reasonable but people need to
- * remember to use them.
- *
- * 2 - Use IO not mmap when running under valgrind.  Not so nice.
- *
- * 3 - Use the special valgrind macros to mark memory as valid at the
- * right time.  Probably too hard -- the process just doesn't know.
- */ 
-
 #ifndef _SAMBA_BUILD_
 #if HAVE_CONFIG_H
 #include <config.h>
-- 
cgit 


From 37194224416d7509a457ee4aa18991b8bab0da7d Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 30 Aug 2005 00:36:12 +0000
Subject: r9769:  r11592@blu:  tridge | 2005-08-30 10:40:19 +1000  added a tdb
 optimisation that speeds up non-indexed ldb by a large  margin (often 10x or
 more). I'd be interested in any comments on the  safety of this optimisation.
 See the comment in the code for an  explanation. (This used to be commit
 7f9efaceb6d6dfc0c82923344cc45ec34493f2ed)

---
 source4/lib/tdb/common/tdb.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 4c2d9a1add..8e8e3ce3b3 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1250,6 +1250,43 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 
 	/* Lock each chain from the start one. */
 	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
+
+		/* this is an optimisation for the common case where
+		   the hash chain is empty, which is particularly
+		   common for the use of tdb with ldb, where large
+		   hashes are used. In that case we spend most of our
+		   time in tdb_brlock(), locking empty hash chains.
+
+		   To avoid this, we do an unlocked pre-check to see
+		   if the hash chain is empty before starting to look
+		   inside it. If it is empty then we can avoid that
+		   hash chain. If it isn't empty then we can't believe
+		   the value we get back, as we read it without a
+		   lock, so instead we get the lock and re-fetch the
+		   value below.
+
+		   Notice that not doing this optimisation on the
+		   first hash chain is critical. We must guarantee
+		   that we have done at least one fcntl lock at the
+		   start of a search to guarantee that memory is
+		   coherent on SMP systems. If records are added by
+		   others during the search then thats OK, and we
+		   could possibly miss those with this trick, but we
+		   could miss them anyway without this trick, so the
+		   semantics don't change.
+
+		   With a non-indexed ldb search this trick gains us a
+		   factor of more than 10 in speed on a linux 2.6.x
+		   system.
+		 */
+		if (!tlock->off && tlock->hash != 0) {
+			u32 off;
+			if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), &off) == 0 &&
+			    off == 0) {
+				continue;
+			}
+		}
+
 		if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)
 			return -1;
 
-- 
cgit 


From cae788d5608b287b2c970ab28c0361350a5c7e95 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 30 Aug 2005 01:51:36 +0000
Subject: r9773:  r11599@blu:  tridge | 2005-08-30 11:55:57 +1000  optimise
 this case a bit more. The total speedup using non-indexed  ldbtest is now
 around a factor of 80x. The code is ugly as hell, but  I think this speed is
 worth it.

 Of course, if we only ever do indexed searches in ldb then this
 doesn't help, but it seems all too common that we get unindexable
 searches, so the optimisation is worthwhile
(This used to be commit 2e14fb893dd9815cdb2488c630131dc549e5c361)
---
 source4/lib/tdb/common/tdb.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 8e8e3ce3b3..d6861efe13 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1276,14 +1276,22 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 		   semantics don't change.
 
 		   With a non-indexed ldb search this trick gains us a
-		   factor of more than 10 in speed on a linux 2.6.x
-		   system.
+		   factor of around 80 in speed on a linux 2.6.x
+		   system (testing using ldbtest).
 		 */
 		if (!tlock->off && tlock->hash != 0) {
 			u32 off;
-			if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), &off) == 0 &&
-			    off == 0) {
-				continue;
+			if (tdb->map_ptr) {
+				for (;tlock->hash < tdb->header.hash_size;tlock->hash++) {
+					if (0 != *(u32 *)(TDB_HASH_TOP(tlock->hash) + (unsigned char *)tdb->map_ptr)) {
+						break;
+					}
+				}
+			} else {
+				if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), &off) == 0 &&
+				    off == 0) {
+					continue;
+				}
 			}
 		}
 
-- 
cgit 


From 8012fb2cfeede6ecae1346725141354f2897887e Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 30 Aug 2005 01:58:03 +0000
Subject: r9774:  r11605@blu:  tridge | 2005-08-30 12:02:19 +1000  make sure we
 don't walk off the end of the hash array (This used to be commit
 3c32f24e2c6a99ec294fb16e1684cd22b08f2df4)

---
 source4/lib/tdb/common/tdb.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index d6861efe13..3477d7e4be 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -1287,6 +1287,9 @@ static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
 						break;
 					}
 				}
+				if (tlock->hash == tdb->header.hash_size) {
+					continue;
+				}
 			} else {
 				if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), &off) == 0 &&
 				    off == 0) {
-- 
cgit 


From 0868b7c77d42efd5f361f605bfc0d8d46841db95 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 16 Sep 2005 03:52:42 +0000
Subject: r10253: a fairly large tdb cleanup and re-organise. Nearly all of
 this change just involves splitting up the core tdb.c code into separate
 files on logical boundaries, but there are some minor functional changes as
 well:

 - move the 'struct tdb_context' into tdb_private.h, hiding it from
   users. This was done to allow the structure to change without
   breaking code that uses tdb.

 - added accessor functions tdb_fd(), tdb_name(), and tdb_log_fn() to
   access the elements of struct tdb_context that were used by
   external code but are no longer visible

 - simplied tdb_append() to use tdb_fetch()/tdb_store(), which is just
   as good due to the way tdb locks work

 - changed some of the types (such as tdb_off to tdb_off_t) to make
   syntax highlighting work better

 - removed the old optional spinlock code. It was a bad idea.

 - fixed a bug in tdb_reopen_all() that caused tdbtorture to sometimes
   fail or report nasty looking errors. This is the only real bug
   fixed in this commit. Jeremy/Jerry, you might like to pickup this
   change for Samba3, as that could definately affect smbd in
   Samba3.

The aim of all of these changes is to make the tdb
transactions/journaling code I am working on easier to write. I
started to write it on top of the existing tdb.c code and it got very
messy. Splitting up the code makes it much easier to follow.

There are more cleanups we could do in tdb, such as using uint32_t
instead of u32 (suggested by metze). I'll leave those for another day.
(This used to be commit 4673cdd0d261614e707b72a7a348bb0e7dbb2482)
---
 source4/lib/tdb/common/tdb.c | 1947 +++---------------------------------------
 1 file changed, 105 insertions(+), 1842 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 3477d7e4be..f099c2d1aa 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -3,7 +3,7 @@
 
    trivial database library
 
-   Copyright (C) Andrew Tridgell              1999-2004
+   Copyright (C) Andrew Tridgell              1999-2005
    Copyright (C) Paul `Rusty' Russell		   2000
    Copyright (C) Jeremy Allison			   2000-2003
    
@@ -26,1007 +26,28 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
+#include "tdb_private.h"
 
-#ifndef _SAMBA_BUILD_
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include "tdb.h"
-#else
-#include "includes.h"
-#include "lib/tdb/include/tdb.h"
-#include "system/time.h"
-#include "system/shmem.h"
-#include "system/filesys.h"
-#endif
-
-#define TDB_MAGIC_FOOD "TDB file\n"
-#define TDB_VERSION (0x26011967 + 6)
-#define TDB_MAGIC (0x26011999U)
-#define TDB_FREE_MAGIC (~TDB_MAGIC)
-#define TDB_DEAD_MAGIC (0xFEE1DEAD)
-#define TDB_ALIGNMENT 4
-#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)
-#define DEFAULT_HASH_SIZE 131
-#define TDB_PAGE_SIZE 0x2000
-#define FREELIST_TOP (sizeof(struct tdb_header))
-#define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
-#define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
-#define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)
-#define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
-#define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off))
-#define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + TDB_SPINLOCK_SIZE(hash_size))
-
-
-/* NB assumes there is a local variable called "tdb" that is the
- * current context, also takes doubly-parenthesized print-style
- * argument. */
-#define TDB_LOG(x) tdb->log_fn x
-
-/* lock offsets */
-#define GLOBAL_LOCK 0
-#define ACTIVE_LOCK 4
-
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-
-#ifndef MAP_FAILED
-#define MAP_FAILED ((void *)-1)
-#endif
-
-#ifndef discard_const_p
-# if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
-#  define discard_const(ptr) ((void *)((intptr_t)(ptr)))
-# else
-#  define discard_const(ptr) ((void *)(ptr))
-# endif
-# define discard_const_p(type, ptr) ((type *)discard_const(ptr))
-#endif
-
-/* free memory if the pointer is valid and zero the pointer */
-#ifndef SAFE_FREE
-#define SAFE_FREE(x) do { if ((x) != NULL) {free(discard_const_p(void *, (x))); (x)=NULL;} } while(0)
-#endif
-
-#define BUCKET(hash) ((hash) % tdb->header.hash_size)
 TDB_DATA tdb_null;
 
-/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
-static TDB_CONTEXT *tdbs = NULL;
-
-static int tdb_munmap(TDB_CONTEXT *tdb)
-{
-	if (tdb->flags & TDB_INTERNAL)
-		return 0;
-
-#ifdef HAVE_MMAP
-	if (tdb->map_ptr) {
-		int ret = munmap(tdb->map_ptr, tdb->map_size);
-		if (ret != 0)
-			return ret;
-	}
-#endif
-	tdb->map_ptr = NULL;
-	return 0;
-}
-
-static void tdb_mmap(TDB_CONTEXT *tdb)
-{
-	if (tdb->flags & TDB_INTERNAL)
-		return;
-
-#ifdef HAVE_MMAP
-	if (!(tdb->flags & TDB_NOMMAP)) {
-		tdb->map_ptr = mmap(NULL, tdb->map_size, 
-				    PROT_READ|(tdb->read_only? 0:PROT_WRITE), 
-				    MAP_SHARED|MAP_FILE, tdb->fd, 0);
-
-		/*
-		 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
-		 */
-
-		if (tdb->map_ptr == MAP_FAILED) {
-			tdb->map_ptr = NULL;
-			TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n", 
-				 tdb->map_size, strerror(errno)));
-		}
-	} else {
-		tdb->map_ptr = NULL;
-	}
-#else
-	tdb->map_ptr = NULL;
-#endif
-}
-
-/* Endian conversion: we only ever deal with 4 byte quantities */
-static void *convert(void *buf, u32 size)
-{
-	u32 i, *p = buf;
-	for (i = 0; i < size / 4; i++)
-		p[i] = TDB_BYTEREV(p[i]);
-	return buf;
-}
-#define DOCONV() (tdb->flags & TDB_CONVERT)
-#define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x)
-
-/* the body of the database is made of one list_struct for the free space
-   plus a separate data list for each hash value */
-struct list_struct {
-	tdb_off next; /* offset of the next record in the list */
-	tdb_len rec_len; /* total byte length of record */
-	tdb_len key_len; /* byte length of key */
-	tdb_len data_len; /* byte length of data */
-	u32 full_hash; /* the full 32 bit hash of the key */
-	u32 magic;   /* try to catch errors */
-	/* the following union is implied:
-		union {
-			char record[rec_len];
-			struct {
-				char key[key_len];
-				char data[data_len];
-			}
-			u32 totalsize; (tailer)
-		}
-	*/
-};
-
-/* a byte range locking function - return 0 on success
-   this functions locks/unlocks 1 byte at the specified offset.
-
-   On error, errno is also set so that errors are passed back properly
-   through tdb_open(). */
-static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, 
-		      int rw_type, int lck_type, int probe)
-{
-	struct flock fl;
-	int ret;
-
-	if (tdb->flags & TDB_NOLOCK)
-		return 0;
-	if ((rw_type == F_WRLCK) && (tdb->read_only)) {
-		errno = EACCES;
-		return -1;
-	}
-
-	fl.l_type = rw_type;
-	fl.l_whence = SEEK_SET;
-	fl.l_start = offset;
-	fl.l_len = 1;
-	fl.l_pid = 0;
-
-	do {
-		ret = fcntl(tdb->fd,lck_type,&fl);
-	} while (ret == -1 && errno == EINTR);
-
-	if (ret == -1) {
-		if (!probe && lck_type != F_SETLK) {
-			/* Ensure error code is set for log fun to examine. */
-			tdb->ecode = TDB_ERR_LOCK;
-			TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n", 
-				 tdb->fd, offset, rw_type, lck_type));
-		}
-		/* Generic lock error. errno set by fcntl.
-		 * EAGAIN is an expected return from non-blocking
-		 * locks. */
-		if (errno != EAGAIN) {
-		TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n", 
-				 tdb->fd, offset, rw_type, lck_type, 
-				 strerror(errno)));
-		}
-		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
-	}
-	return 0;
-}
-
-/* lock a list in the database. list -1 is the alloc list */
-static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype)
-{
-	if (list < -1 || list >= (int)tdb->header.hash_size) {
-		TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n", 
-			   list, ltype));
-		return -1;
-	}
-	if (tdb->flags & TDB_NOLOCK)
-		return 0;
-
-	/* Since fcntl locks don't nest, we do a lock for the first one,
-	   and simply bump the count for future ones */
-	if (tdb->locked[list+1].count == 0) {
-		if (!tdb->read_only && tdb->header.rwlocks) {
-			if (tdb_spinlock(tdb, list, ltype)) {
-				TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list %d ltype=%d\n", 
-					   list, ltype));
-				return -1;
-			}
-		} else if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) {
-			TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n", 
-					   list, ltype, strerror(errno)));
-			return -1;
-		}
-		tdb->locked[list+1].ltype = ltype;
-	}
-	tdb->locked[list+1].count++;
-	return 0;
-}
-
-/* unlock the database: returns void because it's too late for errors. */
-	/* changed to return int it may be interesting to know there
-	   has been an error  --simo */
-static int tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype)
-{
-	int ret = -1;
-
-	if (tdb->flags & TDB_NOLOCK)
-		return 0;
-
-	/* Sanity checks */
-	if (list < -1 || list >= (int)tdb->header.hash_size) {
-		TDB_LOG((tdb, 0, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size));
-		return ret;
-	}
-
-	if (tdb->locked[list+1].count==0) {
-		TDB_LOG((tdb, 0, "tdb_unlock: count is 0\n"));
-		return ret;
-	}
-
-	if (tdb->locked[list+1].count == 1) {
-		/* Down to last nested lock: unlock underneath */
-		if (!tdb->read_only && tdb->header.rwlocks) {
-			ret = tdb_spinunlock(tdb, list, ltype);
-		} else {
-			ret = tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0);
-		}
-	} else {
-		ret = 0;
-	}
-	tdb->locked[list+1].count--;
-
-	if (ret)
-		TDB_LOG((tdb, 0,"tdb_unlock: An error occurred unlocking!\n")); 
-	return ret;
-}
-
-/* This is based on the hash algorithm from gdbm */
-static u32 default_tdb_hash(TDB_DATA *key)
-{
-	u32 value;	/* Used to compute the hash value.  */
-	u32   i;	/* Used to cycle through random values. */
-
-	/* Set the initial value from the key size. */
-	for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
-		value = (value + (key->dptr[i] << (i*5 % 24)));
-
-	return (1103515243 * value + 12345);  
-}
-
-/* check for an out of bounds access - if it is out of bounds then
-   see if the database has been expanded by someone else and expand
-   if necessary 
-   note that "len" is the minimum length needed for the db
-*/
-static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe)
-{
-	struct stat st;
-	if (len <= tdb->map_size)
-		return 0;
-	if (tdb->flags & TDB_INTERNAL) {
-		if (!probe) {
-			/* Ensure ecode is set for log fn. */
-			tdb->ecode = TDB_ERR_IO;
-			TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
-				 (int)len, (int)tdb->map_size));
-		}
-		return TDB_ERRCODE(TDB_ERR_IO, -1);
-	}
-
-	if (fstat(tdb->fd, &st) == -1)
-		return TDB_ERRCODE(TDB_ERR_IO, -1);
-
-	if (st.st_size < (size_t)len) {
-		if (!probe) {
-			/* Ensure ecode is set for log fn. */
-			tdb->ecode = TDB_ERR_IO;
-			TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
-				 (int)len, (int)st.st_size));
-		}
-		return TDB_ERRCODE(TDB_ERR_IO, -1);
-	}
-
-	/* Unmap, update size, remap */
-	if (tdb_munmap(tdb) == -1)
-		return TDB_ERRCODE(TDB_ERR_IO, -1);
-	tdb->map_size = st.st_size;
-	tdb_mmap(tdb);
-	return 0;
-}
-
-/* write a lump of data at a specified offset */
-static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len)
-{
-	if (tdb_oob(tdb, off + len, 0) != 0)
-		return -1;
-
-	if (tdb->map_ptr)
-		memcpy(off + (char *)tdb->map_ptr, buf, len);
-#ifdef HAVE_PWRITE
-	else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
-#else
-	else if (lseek(tdb->fd, off, SEEK_SET) != off
-		 || write(tdb->fd, buf, len) != (ssize_t)len) {
-#endif
-		/* Ensure ecode is set for log fn. */
-		tdb->ecode = TDB_ERR_IO;
-		TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
-			   off, len, strerror(errno)));
-		return TDB_ERRCODE(TDB_ERR_IO, -1);
-	}
-	return 0;
-}
-
-/* read a lump of data at a specified offset, maybe convert */
-static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv)
-{
-	if (tdb_oob(tdb, off + len, 0) != 0)
-		return -1;
-
-	if (tdb->map_ptr)
-		memcpy(buf, off + (char *)tdb->map_ptr, len);
-#ifdef HAVE_PREAD
-	else if (pread(tdb->fd, buf, len, off) != (ssize_t)len) {
-#else
-	else if (lseek(tdb->fd, off, SEEK_SET) != off
-		 || read(tdb->fd, buf, len) != (ssize_t)len) {
-#endif
-		/* Ensure ecode is set for log fn. */
-		tdb->ecode = TDB_ERR_IO;
-		TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n",
-			   off, len, strerror(errno)));
-		return TDB_ERRCODE(TDB_ERR_IO, -1);
-	}
-	if (cv)
-		convert(buf, len);
-	return 0;
-}
-
-/* read a lump of data, allocating the space for it */
-static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
-{
-	char *buf;
-
-	if (!(buf = malloc(len))) {
-		/* Ensure ecode is set for log fn. */
-		tdb->ecode = TDB_ERR_OOM;
-		TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
-			   len, strerror(errno)));
-		return TDB_ERRCODE(TDB_ERR_OOM, buf);
-	}
-	if (tdb_read(tdb, offset, buf, len, 0) == -1) {
-		SAFE_FREE(buf);
-		return NULL;
-	}
-	return buf;
-}
-
-/* read/write a tdb_off */
-static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
-{
-	return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
-}
-static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
-{
-	tdb_off off = *d;
-	return tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
-}
-
-/* read/write a record */
-static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
-{
-	if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
-		return -1;
-	if (TDB_BAD_MAGIC(rec)) {
-		/* Ensure ecode is set for log fn. */
-		tdb->ecode = TDB_ERR_CORRUPT;
-		TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
-		return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
-	}
-	return tdb_oob(tdb, rec->next+sizeof(*rec), 0);
-}
-static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
-{
-	struct list_struct r = *rec;
-	return tdb_write(tdb, offset, CONVERT(r), sizeof(r));
-}
-
-/* read a freelist record and check for simple errors */
-static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec)
-{
-	if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
-		return -1;
-
-	if (rec->magic == TDB_MAGIC) {
-		/* this happens when a app is showdown while deleting a record - we should
-		   not completely fail when this happens */
-		TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n", 
-			 rec->magic, off));
-		rec->magic = TDB_FREE_MAGIC;
-		if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
-			return -1;
-	}
-
-	if (rec->magic != TDB_FREE_MAGIC) {
-		/* Ensure ecode is set for log fn. */
-		tdb->ecode = TDB_ERR_CORRUPT;
-		TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n", 
-			   rec->magic, off));
-		return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
-	}
-	if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
-		return -1;
-	return 0;
-}
-
-/* update a record tailer (must hold allocation lock) */
-static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset,
-			 const struct list_struct *rec)
-{
-	tdb_off totalsize;
-
-	/* Offset of tailer from record header */
-	totalsize = sizeof(*rec) + rec->rec_len;
-	return ofs_write(tdb, offset + totalsize - sizeof(tdb_off),
-			 &totalsize);
-}
-
-static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset)
-{
-	struct list_struct rec;
-	tdb_off tailer_ofs, tailer;
-
-	if (tdb_read(tdb, offset, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
-		printf("ERROR: failed to read record at %u\n", offset);
-		return 0;
-	}
-
-	printf(" rec: offset=0x%08x next=0x%08x rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
-	       offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic);
-
-	tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off);
-	if (ofs_read(tdb, tailer_ofs, &tailer) == -1) {
-		printf("ERROR: failed to read tailer at %u\n", tailer_ofs);
-		return rec.next;
-	}
-
-	if (tailer != rec.rec_len + sizeof(rec)) {
-		printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n",
-				(unsigned int)tailer, (unsigned int)(rec.rec_len + sizeof(rec)));
-	}
-	return rec.next;
-}
-
-static int tdb_dump_chain(TDB_CONTEXT *tdb, int i)
-{
-	tdb_off rec_ptr, top;
-
-	top = TDB_HASH_TOP(i);
-
-	if (tdb_lock(tdb, i, F_WRLCK) != 0)
-		return -1;
-
-	if (ofs_read(tdb, top, &rec_ptr) == -1)
-		return tdb_unlock(tdb, i, F_WRLCK);
-
-	if (rec_ptr)
-		printf("hash=%d\n", i);
-
-	while (rec_ptr) {
-		rec_ptr = tdb_dump_record(tdb, rec_ptr);
-	}
-
-	return tdb_unlock(tdb, i, F_WRLCK);
-}
-
-void tdb_dump_all(TDB_CONTEXT *tdb)
-{
-	int i;
-	for (i=0;i<tdb->header.hash_size;i++) {
-		tdb_dump_chain(tdb, i);
-	}
-	printf("freelist:\n");
-	tdb_dump_chain(tdb, -1);
-}
-
-int tdb_printfreelist(TDB_CONTEXT *tdb)
-{
-	int ret;
-	long total_free = 0;
-	tdb_off offset, rec_ptr;
-	struct list_struct rec;
-
-	if ((ret = tdb_lock(tdb, -1, F_WRLCK)) != 0)
-		return ret;
-
-	offset = FREELIST_TOP;
-
-	/* read in the freelist top */
-	if (ofs_read(tdb, offset, &rec_ptr) == -1) {
-		tdb_unlock(tdb, -1, F_WRLCK);
-		return 0;
-	}
-
-	printf("freelist top=[0x%08x]\n", rec_ptr );
-	while (rec_ptr) {
-		if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec), DOCONV()) == -1) {
-			tdb_unlock(tdb, -1, F_WRLCK);
-			return -1;
-		}
-
-		if (rec.magic != TDB_FREE_MAGIC) {
-			printf("bad magic 0x%08x in free list\n", rec.magic);
-			tdb_unlock(tdb, -1, F_WRLCK);
-			return -1;
-		}
-
-		printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n", 
-		       rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len);
-		total_free += rec.rec_len;
-
-		/* move to the next record */
-		rec_ptr = rec.next;
-	}
-	printf("total rec_len = [0x%08x (%d)]\n", (int)total_free, 
-               (int)total_free);
-
-	return tdb_unlock(tdb, -1, F_WRLCK);
-}
-
-/* Remove an element from the freelist.  Must have alloc lock. */
-static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next)
-{
-	tdb_off last_ptr, i;
-
-	/* read in the freelist top */
-	last_ptr = FREELIST_TOP;
-	while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
-		if (i == off) {
-			/* We've found it! */
-			return ofs_write(tdb, last_ptr, &next);
-		}
-		/* Follow chain (next offset is at start of record) */
-		last_ptr = i;
-	}
-	TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off));
-	return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
-}
-
-/* Add an element into the freelist. Merge adjacent records if
-   neccessary. */
-static int tdb_free(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
-{
-	tdb_off right, left;
-
-	/* Allocation and tailer lock */
-	if (tdb_lock(tdb, -1, F_WRLCK) != 0)
-		return -1;
-
-	/* set an initial tailer, so if we fail we don't leave a bogus record */
-	if (update_tailer(tdb, offset, rec) != 0) {
-		TDB_LOG((tdb, 0, "tdb_free: upfate_tailer failed!\n"));
-		goto fail;
-	}
-
-	/* Look right first (I'm an Australian, dammit) */
-	right = offset + sizeof(*rec) + rec->rec_len;
-	if (right + sizeof(*rec) <= tdb->map_size) {
-		struct list_struct r;
-
-		if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
-			TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right));
-			goto left;
-		}
-
-		/* If it's free, expand to include it. */
-		if (r.magic == TDB_FREE_MAGIC) {
-			if (remove_from_freelist(tdb, right, r.next) == -1) {
-				TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right));
-				goto left;
-			}
-			rec->rec_len += sizeof(r) + r.rec_len;
-		}
-	}
-
-left:
-	/* Look left */
-	left = offset - sizeof(tdb_off);
-	if (left > TDB_DATA_START(tdb->header.hash_size)) {
-		struct list_struct l;
-		tdb_off leftsize;
-		
-		/* Read in tailer and jump back to header */
-		if (ofs_read(tdb, left, &leftsize) == -1) {
-			TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left));
-			goto update;
-		}
-		left = offset - leftsize;
-
-		/* Now read in record */
-		if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
-			TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
-			goto update;
-		}
-
-		/* If it's free, expand to include it. */
-		if (l.magic == TDB_FREE_MAGIC) {
-			if (remove_from_freelist(tdb, left, l.next) == -1) {
-				TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left));
-				goto update;
-			} else {
-				offset = left;
-				rec->rec_len += leftsize;
-			}
-		}
-	}
-
-update:
-	if (update_tailer(tdb, offset, rec) == -1) {
-		TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset));
-		goto fail;
-	}
-
-	/* Now, prepend to free list */
-	rec->magic = TDB_FREE_MAGIC;
-
-	if (ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
-	    rec_write(tdb, offset, rec) == -1 ||
-	    ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
-		TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset));
-		goto fail;
-	}
-
-	/* And we're done. */
-	tdb_unlock(tdb, -1, F_WRLCK);
-	return 0;
-
- fail:
-	tdb_unlock(tdb, -1, F_WRLCK);
-	return -1;
-}
-
-
-/* expand a file.  we prefer to use ftruncate, as that is what posix
-  says to use for mmap expansion */
-static int expand_file(TDB_CONTEXT *tdb, tdb_off size, tdb_off addition)
-{
-	char buf[1024];
-#if HAVE_FTRUNCATE_EXTEND
-	if (ftruncate(tdb->fd, size+addition) != 0) {
-		TDB_LOG((tdb, 0, "expand_file ftruncate to %d failed (%s)\n", 
-			   size+addition, strerror(errno)));
-		return -1;
-	}
-#else
-	char b = 0;
-
-#ifdef HAVE_PWRITE
-	if (pwrite(tdb->fd,  &b, 1, (size+addition) - 1) != 1) {
-#else
-	if (lseek(tdb->fd, (size+addition) - 1, SEEK_SET) != (size+addition) - 1 || 
-	    write(tdb->fd, &b, 1) != 1) {
-#endif
-		TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n", 
-			   size+addition, strerror(errno)));
-		return -1;
-	}
-#endif
-
-	/* now fill the file with something. This ensures that the file isn't sparse, which would be
-	   very bad if we ran out of disk. This must be done with write, not via mmap */
-	memset(buf, 0x42, sizeof(buf));
-	while (addition) {
-		int n = addition>sizeof(buf)?sizeof(buf):addition;
-#ifdef HAVE_PWRITE
-		int ret = pwrite(tdb->fd, buf, n, size);
-#else
-		int ret;
-		if (lseek(tdb->fd, size, SEEK_SET) != size)
-			return -1;
-		ret = write(tdb->fd, buf, n);
-#endif
-		if (ret != n) {
-			TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n", 
-				   n, strerror(errno)));
-			return -1;
-		}
-		addition -= n;
-		size += n;
-	}
-	return 0;
-}
-
-
-/* expand the database at least size bytes by expanding the underlying
-   file and doing the mmap again if necessary */
-static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size)
-{
-	struct list_struct rec;
-	tdb_off offset;
-
-	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
-		TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));
-		return -1;
-	}
-
-	/* must know about any previous expansions by another process */
-	tdb_oob(tdb, tdb->map_size + 1, 1);
-
-	/* always make room for at least 10 more records, and round
-           the database up to a multiple of TDB_PAGE_SIZE */
-	size = TDB_ALIGN(tdb->map_size + size*10, TDB_PAGE_SIZE) - tdb->map_size;
-
-	if (!(tdb->flags & TDB_INTERNAL))
-		tdb_munmap(tdb);
-
-	/*
-	 * We must ensure the file is unmapped before doing this
-	 * to ensure consistency with systems like OpenBSD where
-	 * writes and mmaps are not consistent.
-	 */
-
-	/* expand the file itself */
-	if (!(tdb->flags & TDB_INTERNAL)) {
-		if (expand_file(tdb, tdb->map_size, size) != 0)
-			goto fail;
-	}
-
-	tdb->map_size += size;
-
-	if (tdb->flags & TDB_INTERNAL) {
-		char *new_map_ptr = realloc(tdb->map_ptr, tdb->map_size);
-		if (!new_map_ptr) {
-			tdb->map_size -= size;
-			goto fail;
-		}
-		tdb->map_ptr = new_map_ptr;
-	} else {
-		/*
-		 * We must ensure the file is remapped before adding the space
-		 * to ensure consistency with systems like OpenBSD where
-		 * writes and mmaps are not consistent.
-		 */
-
-		/* We're ok if the mmap fails as we'll fallback to read/write */
-		tdb_mmap(tdb);
-	}
-
-	/* form a new freelist record */
-	memset(&rec,'\0',sizeof(rec));
-	rec.rec_len = size - sizeof(rec);
-
-	/* link it into the free list */
-	offset = tdb->map_size - size;
-	if (tdb_free(tdb, offset, &rec) == -1)
-		goto fail;
-
-	tdb_unlock(tdb, -1, F_WRLCK);
-	return 0;
- fail:
-	tdb_unlock(tdb, -1, F_WRLCK);
-	return -1;
-}
-
-
-/* 
-   the core of tdb_allocate - called when we have decided which
-   free list entry to use
- */
-static tdb_off tdb_allocate_ofs(TDB_CONTEXT *tdb, tdb_len length, tdb_off rec_ptr,
-				struct list_struct *rec, tdb_off last_ptr)
-{
-	struct list_struct newrec;
-	tdb_off newrec_ptr;
-
-	memset(&newrec, '\0', sizeof(newrec));
-
-	/* found it - now possibly split it up  */
-	if (rec->rec_len > length + MIN_REC_SIZE) {
-		/* Length of left piece */
-		length = TDB_ALIGN(length, TDB_ALIGNMENT);
-		
-		/* Right piece to go on free list */
-		newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
-		newrec_ptr = rec_ptr + sizeof(*rec) + length;
-		
-		/* And left record is shortened */
-		rec->rec_len = length;
-	} else {
-		newrec_ptr = 0;
-	}
-	
-	/* Remove allocated record from the free list */
-	if (ofs_write(tdb, last_ptr, &rec->next) == -1) {
-		return 0;
-	}
-	
-	/* Update header: do this before we drop alloc
-	   lock, otherwise tdb_free() might try to
-	   merge with us, thinking we're free.
-	   (Thanks Jeremy Allison). */
-	rec->magic = TDB_MAGIC;
-	if (rec_write(tdb, rec_ptr, rec) == -1) {
-		return 0;
-	}
-	
-	/* Did we create new block? */
-	if (newrec_ptr) {
-		/* Update allocated record tailer (we
-		   shortened it). */
-		if (update_tailer(tdb, rec_ptr, rec) == -1) {
-			return 0;
-		}
-		
-		/* Free new record */
-		if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
-			return 0;
-		}
-	}
-	
-	/* all done - return the new record offset */
-	return rec_ptr;
-}
-
-/* allocate some space from the free list. The offset returned points
-   to a unconnected list_struct within the database with room for at
-   least length bytes of total data
-
-   0 is returned if the space could not be allocated
- */
-static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,
-			    struct list_struct *rec)
-{
-	tdb_off rec_ptr, last_ptr, newrec_ptr;
-	struct {
-		tdb_off rec_ptr, last_ptr;
-		tdb_len rec_len;
-	} bestfit;
-
-	if (tdb_lock(tdb, -1, F_WRLCK) == -1)
-		return 0;
-
-	/* Extra bytes required for tailer */
-	length += sizeof(tdb_off);
-
- again:
-	last_ptr = FREELIST_TOP;
-
-	/* read in the freelist top */
-	if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
-		goto fail;
-
-	bestfit.rec_ptr = 0;
-
-	/* 
-	   this is a best fit allocation strategy. Originally we used
-	   a first fit strategy, but it suffered from massive fragmentation
-	   issues when faced with a slowly increasing record size.
-	 */
-	while (rec_ptr) {
-		if (rec_free_read(tdb, rec_ptr, rec) == -1) {
-			goto fail;
-		}
-
-		if (rec->rec_len >= length) {
-			if (bestfit.rec_ptr == 0 ||
-			    rec->rec_len < bestfit.rec_len) {
-				bestfit.rec_len = rec->rec_len;
-				bestfit.rec_ptr = rec_ptr;
-				bestfit.last_ptr = last_ptr;
-				/* consider a fit to be good enough if we aren't wasting more than half the space */
-				if (bestfit.rec_len < 2*length) {
-					break;
-				}
-			}
-		}
-
-		/* move to the next record */
-		last_ptr = rec_ptr;
-		rec_ptr = rec->next;
-	}
-
-	if (bestfit.rec_ptr != 0) {
-		if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
-			goto fail;
-		}
-
-		newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
-		tdb_unlock(tdb, -1, F_WRLCK);
-		return newrec_ptr;
-	}
-
-	/* we didn't find enough space. See if we can expand the
-	   database and if we can then try again */
-	if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
-		goto again;
- fail:
-	tdb_unlock(tdb, -1, F_WRLCK);
-	return 0;
-}
-
-/* initialise a new database with a specified hash size */
-static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
-{
-	struct tdb_header *newdb;
-	int size, ret = -1;
-
-	/* We make it up in memory, then write it out if not internal */
-	size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off);
-	if (!(newdb = calloc(size, 1)))
-		return TDB_ERRCODE(TDB_ERR_OOM, -1);
-
-	/* Fill in the header */
-	newdb->version = TDB_VERSION;
-	newdb->hash_size = hash_size;
-#ifdef USE_SPINLOCKS
-	newdb->rwlocks = size;
-#endif
-	if (tdb->flags & TDB_INTERNAL) {
-		tdb->map_size = size;
-		tdb->map_ptr = (char *)newdb;
-		memcpy(&tdb->header, newdb, sizeof(tdb->header));
-		/* Convert the `ondisk' version if asked. */
-		CONVERT(*newdb);
-		return 0;
-	}
-	if (lseek(tdb->fd, 0, SEEK_SET) == -1)
-		goto fail;
-
-	if (ftruncate(tdb->fd, 0) == -1)
-		goto fail;
-
-	/* This creates an endian-converted header, as if read from disk */
-	CONVERT(*newdb);
-	memcpy(&tdb->header, newdb, sizeof(tdb->header));
-	/* Don't endian-convert the magic food! */
-	memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
-	if (write(tdb->fd, newdb, size) != size)
-		ret = -1;
-	else
-		ret = tdb_create_rwlocks(tdb->fd, hash_size);
-
-  fail:
-	SAFE_FREE(newdb);
-	return ret;
-}
-
 /* Returns 0 on fail.  On success, return offset of record, and fills
    in rec */
-static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash,
+static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
 			struct list_struct *r)
 {
-	tdb_off rec_ptr;
+	tdb_off_t rec_ptr;
 	
 	/* read in the hash top */
-	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
+	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 		return 0;
 
 	/* keep looking until we find the right record */
 	while (rec_ptr) {
-		if (rec_read(tdb, rec_ptr, r) == -1)
+		if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 			return 0;
 
 		if (!TDB_DEAD(r) && hash==r->full_hash && key.dsize==r->key_len) {
-			char *k;
+			unsigned char *k;
 			/* a very likely hit - read the key */
 			k = tdb_alloc_read(tdb, rec_ptr + sizeof(*r), 
 					   r->key_len);
@@ -1045,8 +66,8 @@ static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash,
 }
 
 /* As tdb_find, but if you succeed, keep the lock */
-static tdb_off tdb_find_lock_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, int locktype,
-			     struct list_struct *rec)
+tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
+			   struct list_struct *rec)
 {
 	u32 rec_ptr;
 
@@ -1057,48 +78,22 @@ static tdb_off tdb_find_lock_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, int
 	return rec_ptr;
 }
 
-enum TDB_ERROR tdb_error(TDB_CONTEXT *tdb)
-{
-	return tdb->ecode;
-}
-
-static struct tdb_errname {
-	enum TDB_ERROR ecode; const char *estring;
-} emap[] = { {TDB_SUCCESS, "Success"},
-	     {TDB_ERR_CORRUPT, "Corrupt database"},
-	     {TDB_ERR_IO, "IO Error"},
-	     {TDB_ERR_LOCK, "Locking error"},
-	     {TDB_ERR_OOM, "Out of memory"},
-	     {TDB_ERR_EXISTS, "Record exists"},
-	     {TDB_ERR_NOLOCK, "Lock exists on other keys"},
-	     {TDB_ERR_NOEXIST, "Record does not exist"} };
-
-/* Error string for the last tdb error */
-const char *tdb_errorstr(TDB_CONTEXT *tdb)
-{
-	u32 i;
-	for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++)
-		if (tdb->ecode == emap[i].ecode)
-			return emap[i].estring;
-	return "Invalid error code";
-}
 
 /* update an entry in place - this only works if the new data size
    is <= the old data size and the key exists.
    on failure return -1.
 */
-
-static int tdb_update_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
+static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
 {
 	struct list_struct rec;
-	tdb_off rec_ptr;
+	tdb_off_t rec_ptr;
 
 	/* find entry */
 	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
 		return -1;
 
 	/* must be long enough key, data and tailer */
-	if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off)) {
+	if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
 		tdb->ecode = TDB_SUCCESS; /* Not really an error */
 		return -1;
 	}
@@ -1110,7 +105,7 @@ static int tdb_update_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA db
 	if (dbuf.dsize != rec.data_len) {
 		/* update size */
 		rec.data_len = dbuf.dsize;
-		return rec_write(tdb, rec_ptr, &rec);
+		return tdb_rec_write(tdb, rec_ptr, &rec);
 	}
  
 	return 0;
@@ -1122,10 +117,9 @@ static int tdb_update_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA db
  * then the TDB_DATA will have zero length but
  * a non-zero pointer
  */
-
-TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
+TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 {
-	tdb_off rec_ptr;
+	tdb_off_t rec_ptr;
 	struct list_struct rec;
 	TDB_DATA ret;
 	u32 hash;
@@ -1136,385 +130,85 @@ TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
 		return tdb_null;
 
 	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
-				  rec.data_len);
-	ret.dsize = rec.data_len;
-	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
-	return ret;
-}
-
-/* check if an entry in the database exists 
-
-   note that 1 is returned if the key is found and 0 is returned if not found
-   this doesn't match the conventions in the rest of this module, but is
-   compatible with gdbm
-*/
-static int tdb_exists_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash)
-{
-	struct list_struct rec;
-	
-	if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
-		return 0;
-	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
-	return 1;
-}
-
-int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
-{
-	u32 hash = tdb->hash_fn(&key);
-	return tdb_exists_hash(tdb, key, hash);
-}
-
-/* record lock stops delete underneath */
-static int lock_record(TDB_CONTEXT *tdb, tdb_off off)
-{
-	return off ? tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0) : 0;
-}
-/*
-  Write locks override our own fcntl readlocks, so check it here.
-  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
-  an error to fail to get the lock here.
-*/
- 
-static int write_lock_record(TDB_CONTEXT *tdb, tdb_off off)
-{
-	struct tdb_traverse_lock *i;
-	for (i = &tdb->travlocks; i; i = i->next)
-		if (i->off == off)
-			return -1;
-	return tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1);
-}
-
-/*
-  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
-  an error to fail to get the lock here.
-*/
-
-static int write_unlock_record(TDB_CONTEXT *tdb, tdb_off off)
-{
-	return tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0);
-}
-/* fcntl locks don't stack: avoid unlocking someone else's */
-static int unlock_record(TDB_CONTEXT *tdb, tdb_off off)
-{
-	struct tdb_traverse_lock *i;
-	u32 count = 0;
-
-	if (off == 0)
-		return 0;
-	for (i = &tdb->travlocks; i; i = i->next)
-		if (i->off == off)
-			count++;
-	return (count == 1 ? tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0) : 0);
-}
-
-/* actually delete an entry in the database given the offset */
-static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec)
-{
-	tdb_off last_ptr, i;
-	struct list_struct lastrec;
-
-	if (tdb->read_only) return -1;
-
-	if (write_lock_record(tdb, rec_ptr) == -1) {
-		/* Someone traversing here: mark it as dead */
-		rec->magic = TDB_DEAD_MAGIC;
-		return rec_write(tdb, rec_ptr, rec);
-	}
-	if (write_unlock_record(tdb, rec_ptr) != 0)
-		return -1;
-
-	/* find previous record in hash chain */
-	if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
-		return -1;
-	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
-		if (rec_read(tdb, i, &lastrec) == -1)
-			return -1;
-
-	/* unlink it: next ptr is at start of record. */
-	if (last_ptr == 0)
-		last_ptr = TDB_HASH_TOP(rec->full_hash);
-	if (ofs_write(tdb, last_ptr, &rec->next) == -1)
-		return -1;
-
-	/* recover the space */
-	if (tdb_free(tdb, rec_ptr, rec) == -1)
-		return -1;
-	return 0;
-}
-
-/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */
-static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,
-			 struct list_struct *rec)
-{
-	int want_next = (tlock->off != 0);
-
-	/* Lock each chain from the start one. */
-	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
-
-		/* this is an optimisation for the common case where
-		   the hash chain is empty, which is particularly
-		   common for the use of tdb with ldb, where large
-		   hashes are used. In that case we spend most of our
-		   time in tdb_brlock(), locking empty hash chains.
-
-		   To avoid this, we do an unlocked pre-check to see
-		   if the hash chain is empty before starting to look
-		   inside it. If it is empty then we can avoid that
-		   hash chain. If it isn't empty then we can't believe
-		   the value we get back, as we read it without a
-		   lock, so instead we get the lock and re-fetch the
-		   value below.
-
-		   Notice that not doing this optimisation on the
-		   first hash chain is critical. We must guarantee
-		   that we have done at least one fcntl lock at the
-		   start of a search to guarantee that memory is
-		   coherent on SMP systems. If records are added by
-		   others during the search then thats OK, and we
-		   could possibly miss those with this trick, but we
-		   could miss them anyway without this trick, so the
-		   semantics don't change.
-
-		   With a non-indexed ldb search this trick gains us a
-		   factor of around 80 in speed on a linux 2.6.x
-		   system (testing using ldbtest).
-		 */
-		if (!tlock->off && tlock->hash != 0) {
-			u32 off;
-			if (tdb->map_ptr) {
-				for (;tlock->hash < tdb->header.hash_size;tlock->hash++) {
-					if (0 != *(u32 *)(TDB_HASH_TOP(tlock->hash) + (unsigned char *)tdb->map_ptr)) {
-						break;
-					}
-				}
-				if (tlock->hash == tdb->header.hash_size) {
-					continue;
-				}
-			} else {
-				if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), &off) == 0 &&
-				    off == 0) {
-					continue;
-				}
-			}
-		}
-
-		if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)
-			return -1;
-
-		/* No previous record?  Start at top of chain. */
-		if (!tlock->off) {
-			if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash),
-				     &tlock->off) == -1)
-				goto fail;
-		} else {
-			/* Otherwise unlock the previous record. */
-			if (unlock_record(tdb, tlock->off) != 0)
-				goto fail;
-		}
-
-		if (want_next) {
-			/* We have offset of old record: grab next */
-			if (rec_read(tdb, tlock->off, rec) == -1)
-				goto fail;
-			tlock->off = rec->next;
-		}
-
-		/* Iterate through chain */
-		while( tlock->off) {
-			tdb_off current;
-			if (rec_read(tdb, tlock->off, rec) == -1)
-				goto fail;
-
-			/* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */
-			if (tlock->off == rec->next) {
-				TDB_LOG((tdb, 0, "tdb_next_lock: loop detected.\n"));
-				goto fail;
-			}
-
-			if (!TDB_DEAD(rec)) {
-				/* Woohoo: we found one! */
-				if (lock_record(tdb, tlock->off) != 0)
-					goto fail;
-				return tlock->off;
-			}
-
-			/* Try to clean dead ones from old traverses */
-			current = tlock->off;
-			tlock->off = rec->next;
-			if (!tdb->read_only && 
-			    do_delete(tdb, current, rec) != 0)
-				goto fail;
-		}
-		tdb_unlock(tdb, tlock->hash, F_WRLCK);
-		want_next = 0;
-	}
-	/* We finished iteration without finding anything */
-	return TDB_ERRCODE(TDB_SUCCESS, 0);
-
- fail:
-	tlock->off = 0;
-	if (tdb_unlock(tdb, tlock->hash, F_WRLCK) != 0)
-		TDB_LOG((tdb, 0, "tdb_next_lock: On error unlock failed!\n"));
-	return -1;
+				  rec.data_len);
+	ret.dsize = rec.data_len;
+	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
+	return ret;
 }
 
-/* traverse the entire database - calling fn(tdb, key, data) on each element.
-   return -1 on error or the record count traversed
-   if fn is NULL then it is not called
-   a non-zero return value from fn() indicates that the traversal should stop
-  */
-int tdb_traverse(TDB_CONTEXT *tdb, tdb_traverse_func fn, void *private)
+/* check if an entry in the database exists 
+
+   note that 1 is returned if the key is found and 0 is returned if not found
+   this doesn't match the conventions in the rest of this module, but is
+   compatible with gdbm
+*/
+static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 {
-	TDB_DATA key, dbuf;
 	struct list_struct rec;
-	struct tdb_traverse_lock tl = { NULL, 0, 0 };
-	int ret, count = 0;
-
-	/* This was in the initializaton, above, but the IRIX compiler
-	 * did not like it.  crh
-	 */
-	tl.next = tdb->travlocks.next;
-
-	/* fcntl locks don't stack: beware traverse inside traverse */
-	tdb->travlocks.next = &tl;
-
-	/* tdb_next_lock places locks on the record returned, and its chain */
-	while ((ret = tdb_next_lock(tdb, &tl, &rec)) > 0) {
-		count++;
-		/* now read the full record */
-		key.dptr = tdb_alloc_read(tdb, tl.off + sizeof(rec), 
-					  rec.key_len + rec.data_len);
-		if (!key.dptr) {
-			ret = -1;
-			if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0)
-				goto out;
-			if (unlock_record(tdb, tl.off) != 0)
-				TDB_LOG((tdb, 0, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n"));
-			goto out;
-		}
-		key.dsize = rec.key_len;
-		dbuf.dptr = key.dptr + rec.key_len;
-		dbuf.dsize = rec.data_len;
-
-		/* Drop chain lock, call out */
-		if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) {
-			ret = -1;
-			goto out;
-		}
-		if (fn && fn(tdb, key, dbuf, private)) {
-			/* They want us to terminate traversal */
-			ret = count;
-			if (unlock_record(tdb, tl.off) != 0) {
-				TDB_LOG((tdb, 0, "tdb_traverse: unlock_record failed!\n"));;
-				ret = -1;
-			}
-			tdb->travlocks.next = tl.next;
-			SAFE_FREE(key.dptr);
-			return count;
-		}
-		SAFE_FREE(key.dptr);
-	}
-out:
-	tdb->travlocks.next = tl.next;
-	if (ret < 0)
-		return -1;
-	else
-		return count;
+	
+	if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
+		return 0;
+	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
+	return 1;
 }
 
-/* find the first entry in the database and return its key */
-TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb)
+int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 {
-	TDB_DATA key;
-	struct list_struct rec;
-
-	/* release any old lock */
-	if (unlock_record(tdb, tdb->travlocks.off) != 0)
-		return tdb_null;
-	tdb->travlocks.off = tdb->travlocks.hash = 0;
-
-	if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0)
-		return tdb_null;
-	/* now read the key */
-	key.dsize = rec.key_len;
-	key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
-	if (tdb_unlock(tdb, BUCKET(tdb->travlocks.hash), F_WRLCK) != 0)
-		TDB_LOG((tdb, 0, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
-	return key;
+	u32 hash = tdb->hash_fn(&key);
+	return tdb_exists_hash(tdb, key, hash);
 }
 
-/* find the next entry in the database, returning its key */
-TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA oldkey)
+/* actually delete an entry in the database given the offset */
+int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
 {
-	u32 oldhash;
-	TDB_DATA key = tdb_null;
-	struct list_struct rec;
-	char *k = NULL;
+	tdb_off_t last_ptr, i;
+	struct list_struct lastrec;
 
-	/* Is locked key the old key?  If so, traverse will be reliable. */
-	if (tdb->travlocks.off) {
-		if (tdb_lock(tdb,tdb->travlocks.hash,F_WRLCK))
-			return tdb_null;
-		if (rec_read(tdb, tdb->travlocks.off, &rec) == -1
-		    || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
-					    rec.key_len))
-		    || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) {
-			/* No, it wasn't: unlock it and start from scratch */
-			if (unlock_record(tdb, tdb->travlocks.off) != 0)
-				return tdb_null;
-			if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0)
-				return tdb_null;
-			tdb->travlocks.off = 0;
-		}
+	if (tdb->read_only) return -1;
 
-		SAFE_FREE(k);
+	if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
+		/* Someone traversing here: mark it as dead */
+		rec->magic = TDB_DEAD_MAGIC;
+		return tdb_rec_write(tdb, rec_ptr, rec);
 	}
+	if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
+		return -1;
 
-	if (!tdb->travlocks.off) {
-		/* No previous element: do normal find, and lock record */
-		tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), F_WRLCK, &rec);
-		if (!tdb->travlocks.off)
-			return tdb_null;
-		tdb->travlocks.hash = BUCKET(rec.full_hash);
-		if (lock_record(tdb, tdb->travlocks.off) != 0) {
-			TDB_LOG((tdb, 0, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno)));
-			return tdb_null;
-		}
-	}
-	oldhash = tdb->travlocks.hash;
+	/* find previous record in hash chain */
+	if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
+		return -1;
+	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
+		if (tdb_rec_read(tdb, i, &lastrec) == -1)
+			return -1;
 
-	/* Grab next record: locks chain and returned record,
-	   unlocks old record */
-	if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) {
-		key.dsize = rec.key_len;
-		key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
-					  key.dsize);
-		/* Unlock the chain of this new record */
-		if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0)
-			TDB_LOG((tdb, 0, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
-	}
-	/* Unlock the chain of old record */
-	if (tdb_unlock(tdb, BUCKET(oldhash), F_WRLCK) != 0)
-		TDB_LOG((tdb, 0, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
-	return key;
+	/* unlink it: next ptr is at start of record. */
+	if (last_ptr == 0)
+		last_ptr = TDB_HASH_TOP(rec->full_hash);
+	if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
+		return -1;
+
+	/* recover the space */
+	if (tdb_free(tdb, rec_ptr, rec) == -1)
+		return -1;
+	return 0;
 }
 
 /* delete an entry in the database given a key */
-static int tdb_delete_hash(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash)
+static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 {
-	tdb_off rec_ptr;
+	tdb_off_t rec_ptr;
 	struct list_struct rec;
 	int ret;
 
 	if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec)))
 		return -1;
-	ret = do_delete(tdb, rec_ptr, &rec);
+	ret = tdb_do_delete(tdb, rec_ptr, &rec);
 	if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 		TDB_LOG((tdb, 0, "tdb_delete: WARNING tdb_unlock failed!\n"));
 	return ret;
 }
 
-int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
+int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 {
 	u32 hash = tdb->hash_fn(&key);
 	return tdb_delete_hash(tdb, key, hash);
@@ -1525,11 +219,11 @@ int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
 
    return 0 on success, -1 on failure
 */
-int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
+int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 {
 	struct list_struct rec;
 	u32 hash;
-	tdb_off rec_ptr;
+	tdb_off_t rec_ptr;
 	char *p = NULL;
 	int ret = 0;
 
@@ -1581,7 +275,7 @@ int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 		goto fail;
 
 	/* Read hash top into next ptr */
-	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
+	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
 		goto fail;
 
 	rec.key_len = key.dsize;
@@ -1590,9 +284,9 @@ int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	rec.magic = TDB_MAGIC;
 
 	/* write out and point the top of the hash chain at it */
-	if (rec_write(tdb, rec_ptr, &rec) == -1
+	if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 	    || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
-	    || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
+	    || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 		/* Need to tdb_unallocate() here */
 		goto fail;
 	}
@@ -1605,499 +299,68 @@ fail:
 	goto out;
 }
 
-/* Attempt to append data to an entry in place - this only works if the new data size
-   is <= the old data size and the key exists.
-   on failure return -1. Record must be locked before calling.
-*/
-static int tdb_append_inplace(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash, TDB_DATA new_dbuf)
-{
-	struct list_struct rec;
-	tdb_off rec_ptr;
-
-	/* find entry */
-	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
-		return -1;
-
-	/* Append of 0 is always ok. */
-	if (new_dbuf.dsize == 0)
-		return 0;
-
-	/* must be long enough for key, old data + new data and tailer */
-	if (rec.rec_len < key.dsize + rec.data_len + new_dbuf.dsize + sizeof(tdb_off)) {
-		/* No room. */
-		tdb->ecode = TDB_SUCCESS; /* Not really an error */
-		return -1;
-	}
-
-	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len + rec.data_len,
-		      new_dbuf.dptr, new_dbuf.dsize) == -1)
-		return -1;
-
-	/* update size */
-	rec.data_len += new_dbuf.dsize;
-	return rec_write(tdb, rec_ptr, &rec);
-}
 
 /* Append to an entry. Create if not exist. */
-
-int tdb_append(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf)
+int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 {
-	struct list_struct rec;
 	u32 hash;
-	tdb_off rec_ptr;
-	char *p = NULL;
-	int ret = 0;
-	size_t new_data_size = 0;
+	TDB_DATA dbuf;
+	int ret = -1;
 
 	/* find which hash bucket it is in */
 	hash = tdb->hash_fn(&key);
 	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 		return -1;
 
-	/* first try in-place. */
-	if (tdb_append_inplace(tdb, key, hash, new_dbuf) == 0)
-		goto out;
-
-	/* reset the error code potentially set by the tdb_append_inplace() */
-	tdb->ecode = TDB_SUCCESS;
-
-	/* find entry */
-	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
-		if (tdb->ecode != TDB_ERR_NOEXIST)
-			goto fail;
+	dbuf = tdb_fetch(tdb, key);
 
-		/* Not found - create. */
-
-		ret = tdb_store(tdb, key, new_dbuf, TDB_INSERT);
-		goto out;
+	if (dbuf.dptr == NULL) {
+		dbuf.dptr = malloc(new_dbuf.dsize);
+	} else {
+		dbuf.dptr = realloc(dbuf.dptr, dbuf.dsize + new_dbuf.dsize);
 	}
 
-	new_data_size = rec.data_len + new_dbuf.dsize;
-
-	/* Copy key+old_value+value *before* allocating free space in case malloc
-	   fails and we are left with a dead spot in the tdb. */
-
-	if (!(p = (char *)malloc(key.dsize + new_data_size))) {
+	if (dbuf.dptr == NULL) {
 		tdb->ecode = TDB_ERR_OOM;
-		goto fail;
-	}
-
-	/* Copy the key in place. */
-	memcpy(p, key.dptr, key.dsize);
-
-	/* Now read the old data into place. */
-	if (rec.data_len &&
-		tdb_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, p + key.dsize, rec.data_len, 0) == -1)
-			goto fail;
-
-	/* Finally append the new data. */
-	if (new_dbuf.dsize)
-		memcpy(p+key.dsize+rec.data_len, new_dbuf.dptr, new_dbuf.dsize);
-
-	/* delete any existing record - if it doesn't exist we don't
-           care.  Doing this first reduces fragmentation, and avoids
-           coalescing with `allocated' block before it's updated. */
-
-	tdb_delete_hash(tdb, key, hash);
-
-	if (!(rec_ptr = tdb_allocate(tdb, key.dsize + new_data_size, &rec)))
-		goto fail;
-
-	/* Read hash top into next ptr */
-	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
-		goto fail;
-
-	rec.key_len = key.dsize;
-	rec.data_len = new_data_size;
-	rec.full_hash = hash;
-	rec.magic = TDB_MAGIC;
-
-	/* write out and point the top of the hash chain at it */
-	if (rec_write(tdb, rec_ptr, &rec) == -1
-	    || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+new_data_size)==-1
-	    || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
-		/* Need to tdb_unallocate() here */
-		goto fail;
-	}
-
- out:
-	SAFE_FREE(p); 
-	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
-	return ret;
-
-fail:
-	ret = -1;
-	goto out;
-}
-
-static int tdb_already_open(dev_t device,
-			    ino_t ino)
-{
-	TDB_CONTEXT *i;
-	
-	for (i = tdbs; i; i = i->next) {
-		if (i->device == device && i->inode == ino) {
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-/* open the database, creating it if necessary 
-
-   The open_flags and mode are passed straight to the open call on the
-   database file. A flags value of O_WRONLY is invalid. The hash size
-   is advisory, use zero for a default value.
-
-   Return is NULL on error, in which case errno is also set.  Don't 
-   try to call tdb_error or tdb_errname, just do strerror(errno).
-
-   @param name may be NULL for internal databases. */
-TDB_CONTEXT *tdb_open(const char *name, int hash_size, int tdb_flags,
-		      int open_flags, mode_t mode)
-{
-	return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL);
-}
-
-/* a default logging function */
-static void null_log_fn(TDB_CONTEXT *tdb, int level, const char *fmt, ...)
-{
-}
-
-
-TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
-			 int open_flags, mode_t mode,
-			 tdb_log_func log_fn,
-			 tdb_hash_func hash_fn)
-{
-	TDB_CONTEXT *tdb;
-	struct stat st;
-	int rev = 0, locked = 0;
-	uint8_t *vp;
-	u32 vertest;
-
-	if (!(tdb = calloc(1, sizeof *tdb))) {
-		/* Can't log this */
-		errno = ENOMEM;
-		goto fail;
-	}
-	tdb->fd = -1;
-	tdb->name = NULL;
-	tdb->map_ptr = NULL;
-	tdb->flags = tdb_flags;
-	tdb->open_flags = open_flags;
-	tdb->log_fn = log_fn?log_fn:null_log_fn;
-	tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash;
-
-	if ((open_flags & O_ACCMODE) == O_WRONLY) {
-		TDB_LOG((tdb, 0, "tdb_open_ex: can't open tdb %s write-only\n",
-			 name));
-		errno = EINVAL;
-		goto fail;
-	}
-	
-	if (hash_size == 0)
-		hash_size = DEFAULT_HASH_SIZE;
-	if ((open_flags & O_ACCMODE) == O_RDONLY) {
-		tdb->read_only = 1;
-		/* read only databases don't do locking or clear if first */
-		tdb->flags |= TDB_NOLOCK;
-		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
-	}
-
-	/* internal databases don't mmap or lock, and start off cleared */
-	if (tdb->flags & TDB_INTERNAL) {
-		tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
-		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
-		if (tdb_new_database(tdb, hash_size) != 0) {
-			TDB_LOG((tdb, 0, "tdb_open_ex: tdb_new_database failed!"));
-			goto fail;
-		}
-		goto internal;
-	}
-
-	if ((tdb->fd = open(name, open_flags, mode)) == -1) {
-		TDB_LOG((tdb, 5, "tdb_open_ex: could not open file %s: %s\n",
-			 name, strerror(errno)));
-		goto fail;	/* errno set by open(2) */
+		goto failed;
 	}
 
-	/* ensure there is only one process initialising at once */
-	if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0) == -1) {
-		TDB_LOG((tdb, 0, "tdb_open_ex: failed to get global lock on %s: %s\n",
-			 name, strerror(errno)));
-		goto fail;	/* errno set by tdb_brlock */
-	}
-
-	/* we need to zero database if we are the only one with it open */
-	if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
-		(locked = (tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0) == 0))) {
-		open_flags |= O_CREAT;
-		if (ftruncate(tdb->fd, 0) == -1) {
-			TDB_LOG((tdb, 0, "tdb_open_ex: "
-				 "failed to truncate %s: %s\n",
-				 name, strerror(errno)));
-			goto fail; /* errno set by ftruncate */
-		}
-	}
-
-	if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
-	    || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0
-	    || (tdb->header.version != TDB_VERSION
-		&& !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) {
-		/* its not a valid database - possibly initialise it */
-		if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) {
-			errno = EIO; /* ie bad format or something */
-			goto fail;
-		}
-		rev = (tdb->flags & TDB_CONVERT);
-	}
-	vp = (uint8_t *)&tdb->header.version;
-	vertest = (((u32)vp[0]) << 24) | (((u32)vp[1]) << 16) |
-		  (((u32)vp[2]) << 8) | (u32)vp[3];
-	tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0;
-	if (!rev)
-		tdb->flags &= ~TDB_CONVERT;
-	else {
-		tdb->flags |= TDB_CONVERT;
-		convert(&tdb->header, sizeof(tdb->header));
-	}
-	if (fstat(tdb->fd, &st) == -1)
-		goto fail;
-
-	/* Is it already in the open list?  If so, fail. */
-	if (tdb_already_open(st.st_dev, st.st_ino)) {
-		TDB_LOG((tdb, 2, "tdb_open_ex: "
-			 "%s (%d,%d) is already open in this process\n",
-			 name, (int)st.st_dev, (int)st.st_ino));
-		errno = EBUSY;
-		goto fail;
-	}
-
-	if (!(tdb->name = (char *)strdup(name))) {
-		errno = ENOMEM;
-		goto fail;
-	}
-
-	tdb->map_size = st.st_size;
-	tdb->device = st.st_dev;
-	tdb->inode = st.st_ino;
-	tdb->locked = calloc(tdb->header.hash_size+1, sizeof(tdb->locked[0]));
-	if (!tdb->locked) {
-		TDB_LOG((tdb, 2, "tdb_open_ex: "
-			 "failed to allocate lock structure for %s\n",
-			 name));
-		errno = ENOMEM;
-		goto fail;
-	}
-	tdb_mmap(tdb);
-	if (locked) {
-		if (!tdb->read_only)
-			if (tdb_clear_spinlocks(tdb) != 0) {
-				TDB_LOG((tdb, 0, "tdb_open_ex: "
-				"failed to clear spinlock\n"));
-				goto fail;
-			}
-		if (tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0) == -1) {
-			TDB_LOG((tdb, 0, "tdb_open_ex: "
-				 "failed to take ACTIVE_LOCK on %s: %s\n",
-				 name, strerror(errno)));
-			goto fail;
-		}
-
-	}
-
-	/* We always need to do this if the CLEAR_IF_FIRST flag is set, even if
-	   we didn't get the initial exclusive lock as we need to let all other
-	   users know we're using it. */
-
-	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
-	/* leave this lock in place to indicate it's in use */
-	if (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1)
-		goto fail;
-	}
-
-
- internal:
-	/* Internal (memory-only) databases skip all the code above to
-	 * do with disk files, and resume here by releasing their
-	 * global lock and hooking into the active list. */
-	if (tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0) == -1)
-		goto fail;
-	tdb->next = tdbs;
-	tdbs = tdb;
-	return tdb;
-
- fail:
-	{ int save_errno = errno;
+	memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
+	dbuf.dsize += new_dbuf.dsize;
 
-	if (!tdb)
-		return NULL;
+	ret = tdb_store(tdb, key, dbuf, 0);
 	
-	if (tdb->map_ptr) {
-		if (tdb->flags & TDB_INTERNAL)
-			SAFE_FREE(tdb->map_ptr);
-		else
-			tdb_munmap(tdb);
-	}
-	SAFE_FREE(tdb->name);
-	if (tdb->fd != -1)
-		if (close(tdb->fd) != 0)
-			TDB_LOG((tdb, 5, "tdb_open_ex: failed to close tdb->fd on error!\n"));
-	SAFE_FREE(tdb->locked);
-	SAFE_FREE(tdb);
-	errno = save_errno;
-	return NULL;
-	}
-}
-
-/**
- * Close a database.
- *
- * @returns -1 for error; 0 for success.
- **/
-int tdb_close(TDB_CONTEXT *tdb)
-{
-	TDB_CONTEXT **i;
-	int ret = 0;
-
-	if (tdb->map_ptr) {
-		if (tdb->flags & TDB_INTERNAL)
-			SAFE_FREE(tdb->map_ptr);
-		else
-			tdb_munmap(tdb);
-	}
-	SAFE_FREE(tdb->name);
-	if (tdb->fd != -1)
-		ret = close(tdb->fd);
-	SAFE_FREE(tdb->locked);
-
-	/* Remove from contexts list */
-	for (i = &tdbs; *i; i = &(*i)->next) {
-		if (*i == tdb) {
-			*i = tdb->next;
-			break;
-		}
-	}
-
-	memset(tdb, 0, sizeof(*tdb));
-	SAFE_FREE(tdb);
-
+failed:
+	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
+	SAFE_FREE(dbuf.dptr);
 	return ret;
 }
 
-/* lock/unlock entire database */
-int tdb_lockall(TDB_CONTEXT *tdb)
-{
-	u32 i;
-
-	/* There are no locks on read-only dbs */
-	if (tdb->read_only)
-		return TDB_ERRCODE(TDB_ERR_LOCK, -1);
-	for (i = 0; i < tdb->header.hash_size; i++) 
-		if (tdb_lock(tdb, i, F_WRLCK))
-			break;
-
-	/* If error, release locks we have... */
-	if (i < tdb->header.hash_size) {
-		u32 j;
-
-		for ( j = 0; j < i; j++)
-			tdb_unlock(tdb, j, F_WRLCK);
-		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);
-	}
-
-	return 0;
-}
-void tdb_unlockall(TDB_CONTEXT *tdb)
-{
-	u32 i;
-	for (i=0; i < tdb->header.hash_size; i++)
-		tdb_unlock(tdb, i, F_WRLCK);
-}
-
-/* lock/unlock one hash chain. This is meant to be used to reduce
-   contention - it cannot guarantee how many records will be locked */
-int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
-{
-	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
-}
-
-int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
-{
-	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK);
-}
-
-int tdb_chainlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
-{
-	return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
-}
-
-int tdb_chainunlock_read(TDB_CONTEXT *tdb, TDB_DATA key)
-{
-	return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
-}
-
 
-/* register a loging function */
-void tdb_logging_function(TDB_CONTEXT *tdb, void (*fn)(TDB_CONTEXT *, int , const char *, ...))
+/*
+  return the name of the current tdb file
+  useful for external logging functions
+*/
+const char *tdb_name(struct tdb_context *tdb)
 {
-	tdb->log_fn = fn?fn:null_log_fn;
+	return tdb->name;
 }
 
-
-/* reopen a tdb - this can be used after a fork to ensure that we have an independent
-   seek pointer from our parent and to re-establish locks */
-int tdb_reopen(TDB_CONTEXT *tdb)
+/*
+  return the underlying file descriptor being used by tdb, or -1
+  useful for external routines that want to check the device/inode
+  of the fd
+*/
+int tdb_fd(struct tdb_context *tdb)
 {
-	struct stat st;
-
-	if (tdb->flags & TDB_INTERNAL)
-		return 0; /* Nothing to do. */
-	if (tdb_munmap(tdb) != 0) {
-		TDB_LOG((tdb, 0, "tdb_reopen: munmap failed (%s)\n", strerror(errno)));
-		goto fail;
-	}
-	if (close(tdb->fd) != 0)
-		TDB_LOG((tdb, 0, "tdb_reopen: WARNING closing tdb->fd failed!\n"));
-	tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0);
-	if (tdb->fd == -1) {
-		TDB_LOG((tdb, 0, "tdb_reopen: open failed (%s)\n", strerror(errno)));
-		goto fail;
-	}
-	if (fstat(tdb->fd, &st) != 0) {
-		TDB_LOG((tdb, 0, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
-		goto fail;
-	}
-	if (st.st_ino != tdb->inode || st.st_dev != tdb->device) {
-		TDB_LOG((tdb, 0, "tdb_reopen: file dev/inode has changed!\n"));
-		goto fail;
-	}
-	tdb_mmap(tdb);
-	if ((tdb->flags & TDB_CLEAR_IF_FIRST) && (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1)) {
-		TDB_LOG((tdb, 0, "tdb_reopen: failed to obtain active lock\n"));
-		goto fail;
-	}
-
-	return 0;
-
-fail:
-	tdb_close(tdb);
-	return -1;
+	return tdb->fd;
 }
 
-/* reopen all tdb's */
-int tdb_reopen_all(void)
+/*
+  return the current logging function
+  useful for external tdb routines that wish to log tdb errors
+*/
+tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 {
-	TDB_CONTEXT *tdb;
-
-	for (tdb=tdbs; tdb; tdb = tdb->next) {
-		/* Ensure no clear-if-first. */
-		tdb->flags &= ~TDB_CLEAR_IF_FIRST;
-		if (tdb_reopen(tdb) != 0)
-			return -1;
-	}
-
-	return 0;
+	return tdb->log_fn;
 }
-- 
cgit 


From ede8415d61b6791114c65de1c283a4e8c11f1585 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 22 Sep 2005 03:56:41 +0000
Subject: r10405: added transactions into tdb, and hook them into ldb. See my
 samba-technical posting for more details on the transactions design.

This also adds a number of command line arguments to tdbtorture,
making it more flexible, and fixes some lock deadlock conditions in
the tdbtorture code.
(This used to be commit 06bd8abba942ec9f1e23f5c5d546cbb71ca3a701)
---
 source4/lib/tdb/common/tdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index f099c2d1aa..c37d37a4f2 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -98,7 +98,7 @@ static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_
 		return -1;
 	}
 
-	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
+	if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 		      dbuf.dptr, dbuf.dsize) == -1)
 		return -1;
 
@@ -285,7 +285,7 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 
 	/* write out and point the top of the hash chain at it */
 	if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
-	    || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
+	    || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 	    || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 		/* Need to tdb_unallocate() here */
 		goto fail;
-- 
cgit 


From bd310b792509f7305d7dc029eb4bec109322a4bf Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 22 Sep 2005 13:12:46 +0000
Subject: r10421: following on discussions with simo, I have worked out a way
 of allowing searches to proceed while another process is in a transaction,
 then only upgrading the transaction lock to a write lock on commit.

The solution is:

 - split tdb_traverse() into two calls, called tdb_traverse() and
   tdb_traverse_read(). The _read() version only gets read locks, and
   will fail any write operations made in the callback from the
   traverse.

 - the normal tdb_traverse() call allows for read or write operations
   in the callback, but gets the transaction lock, preventing
   transastions from starting inside the traverse

In addition we enforce the following rule that you may not start a
transaction within a traverse callback, although you can start a
traverse within a transaction

With these rules in place I believe all the deadlock possibilities are
removed, and we can now allow for searches to happen in parallel with
transactions
(This used to be commit 7dd31288a701d772e45b1960ac4ce4cc1be782ed)
---
 source4/lib/tdb/common/tdb.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index c37d37a4f2..2e229e88cc 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -227,6 +227,11 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	char *p = NULL;
 	int ret = 0;
 
+	if (tdb->read_only) {
+		tdb->ecode = TDB_ERR_RDONLY;
+		return -1;
+	}
+
 	/* find which hash bucket it is in */
 	hash = tdb->hash_fn(&key);
 	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
-- 
cgit 


From 5860aef9cd53da572bef1b86a62a3a5e86da84b0 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Sat, 24 Sep 2005 03:43:02 +0000
Subject: r10465: separate out a read_only db from a read-only traversal to
 ensure we don't end up doing a mmap read only (This used to be commit
 294ccfd46a0c4e1af9365d028acdabec03c41ad3)

---
 source4/lib/tdb/common/tdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 2e229e88cc..4b0d4a31c5 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -164,7 +164,7 @@ int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct
 	tdb_off_t last_ptr, i;
 	struct list_struct lastrec;
 
-	if (tdb->read_only) return -1;
+	if (tdb->read_only || tdb->traverse_read) return -1;
 
 	if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
 		/* Someone traversing here: mark it as dead */
@@ -227,7 +227,7 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	char *p = NULL;
 	int ret = 0;
 
-	if (tdb->read_only) {
+	if (tdb->read_only || tdb->traverse_read) {
 		tdb->ecode = TDB_ERR_RDONLY;
 		return -1;
 	}
-- 
cgit 


From 3387746c4517d6766146080980ac467b72b62316 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 30 Mar 2006 04:52:39 +0000
Subject: r14799: added a tdb_get_seqnum() call, and the TDB_SEQNUM flag. This
 allows for an extremely lightweight test to see if a tdb has possibly
 changed. (This used to be commit f325ba605ccceca63712c0f2c98961e35e437b3d)

---
 source4/lib/tdb/common/tdb.c | 54 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 4b0d4a31c5..b0411601eb 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -30,6 +30,33 @@
 
 TDB_DATA tdb_null;
 
+/*
+  increment the tdb sequence number if the tdb has been opened using
+  the TDB_SEQNUM flag
+*/
+static void tdb_increment_seqnum(struct tdb_context *tdb)
+{
+	tdb_off_t seqnum=0;
+	
+	if (!(tdb->flags & TDB_SEQNUM)) {
+		return;
+	}
+
+	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1) != 0) {
+		return;
+	}
+
+	/* we ignore errors from this, as we have no sane way of
+	   dealing with them.
+	*/
+	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
+	seqnum++;
+	tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
+
+	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1);
+}
+
+
 /* Returns 0 on fail.  On success, return offset of record, and fills
    in rec */
 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
@@ -203,6 +230,11 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 	if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec)))
 		return -1;
 	ret = tdb_do_delete(tdb, rec_ptr, &rec);
+
+	if (ret == 0) {
+		tdb_increment_seqnum(tdb);
+	}
+
 	if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 		TDB_LOG((tdb, 0, "tdb_delete: WARNING tdb_unlock failed!\n"));
 	return ret;
@@ -295,6 +327,9 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 		/* Need to tdb_unallocate() here */
 		goto fail;
 	}
+
+	tdb_increment_seqnum(tdb);
+
  out:
 	SAFE_FREE(p); 
 	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
@@ -369,3 +404,22 @@ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 {
 	return tdb->log_fn;
 }
+
+
+/*
+  get the tdb sequence number. Only makes sense if the writers opened
+  with TDB_SEQNUM set. Note that this sequence number will wrap quite
+  quickly, so it should only be used for a 'has something changed'
+  test, not for code that relies on the count of the number of changes
+  made. If you want a counter then use a tdb record.
+
+  The aim of this sequence number is to allow for a very lightweight
+  test of a possible tdb change.
+*/
+int tdb_get_seqnum(struct tdb_context *tdb)
+{
+	tdb_off_t seqnum=0;
+
+	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
+	return seqnum;
+}
-- 
cgit 


From d3fee429aee87e9c05a4a606fbf0b60b16dac782 Mon Sep 17 00:00:00 2001
From: Andrew Bartlett <abartlet@samba.org>
Date: Mon, 3 Jul 2006 06:40:56 +0000
Subject: r16774: This patch modifies the tdb API to allow the logging function
 to be used as part of ldb.

This allows tdb failures to be passed all the way up to Samba's DEBUG
system, which allowed easier debugging.

Unfortunately I had to extend the tdb API, as the logging function
didn't have a context pointer.

I've worked over the 'debug levels' in TDB.  Most of them were 0,
which didn't seem right, as some were trace-like messages.  We didn't
see any of these previously, except when accessing TDB directly.

Andrew Bartlett
(This used to be commit 58898092c1ce043f6d698db5065f372b79109e22)
---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index b0411601eb..2513eecfb1 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -236,7 +236,7 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 	}
 
 	if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
-		TDB_LOG((tdb, 0, "tdb_delete: WARNING tdb_unlock failed!\n"));
+		TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
 	return ret;
 }
 
-- 
cgit 


From 35fda6c5f344e71b1ed0bd195a62161e31401149 Mon Sep 17 00:00:00 2001
From: Andrew Bartlett <abartlet@samba.org>
Date: Mon, 10 Jul 2006 12:51:36 +0000
Subject: r16916: Implement metze's proposed changes to the tdb logging API.

This clearly links the log function with its private pointer, and
makes the argument list for tdb_open_ex a bit shorter.

Andrew Bartlett
(This used to be commit 5d5503e8d8a10ead3ef21a5ffda52cadb9a07727)
---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 2513eecfb1..a052ffeb61 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -402,7 +402,7 @@ int tdb_fd(struct tdb_context *tdb)
 */
 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 {
-	return tdb->log_fn;
+	return tdb->log.log_fn;
 }
 
 
-- 
cgit 


From cba142f1ae71b03266210e254c251683846d7fd7 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Wed, 18 Oct 2006 21:41:59 +0000
Subject: r19401: make tdb_lockall() much more efficient, and add a
 tdb_lockall_read() call which does a read lock on all chains. These will be
 used to make ldb searches more efficient (This used to be commit
 de664ec1f8cf179f1d650563272c0de3f7636e2b)

---
 source4/lib/tdb/common/tdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index a052ffeb61..52e2d633b0 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -42,7 +42,7 @@ static void tdb_increment_seqnum(struct tdb_context *tdb)
 		return;
 	}
 
-	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1) != 0) {
+	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
 		return;
 	}
 
@@ -53,7 +53,7 @@ static void tdb_increment_seqnum(struct tdb_context *tdb)
 	seqnum++;
 	tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
 
-	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1);
+	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
 }
 
 
-- 
cgit 


From 118c064a473562274bff8fb47f37437db904b8fb Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 20 Oct 2006 08:06:14 +0000
Subject: r19423: merge some tdb changes from SAMBA_3_0 to SAMBA_4_0

this is in preparation of a merge in the other direction
(This used to be commit db3211079fd594aa03c3b9bb3eb6ad86bdd32837)
---
 source4/lib/tdb/common/tdb.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 52e2d633b0..8d067ebecc 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -423,3 +423,8 @@ int tdb_get_seqnum(struct tdb_context *tdb)
 	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 	return seqnum;
 }
+
+int tdb_hash_size(struct tdb_context *tdb)
+{
+	return tdb->header.hash_size;
+}
-- 
cgit 


From d71502b07cc0d113f4555d244bef9f06024907e4 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 20 Oct 2006 09:48:18 +0000
Subject: r19425: two more tdb functions from samba3 (This used to be commit
 c9d9d79c34e8a36a6f684b173b1cc861330adc5c)

---
 source4/lib/tdb/common/tdb.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 8d067ebecc..5810f46d56 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -428,3 +428,14 @@ int tdb_hash_size(struct tdb_context *tdb)
 {
 	return tdb->header.hash_size;
 }
+
+size_t tdb_map_size(struct tdb_context *tdb)
+{
+	return tdb->map_size;
+}
+
+int tdb_get_flags(struct tdb_context *tdb)
+{
+	return tdb->flags;
+}
+
-- 
cgit 


From 6cb2ce4275b727e220ab24bd22aa65f3f47e0557 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vlendec@samba.org>
Date: Sat, 17 Feb 2007 21:46:13 +0000
Subject: r21410: We have to increment the sequence number also when
 tdb_update_hash() succeeded. Found while testing the brlock seqnum patch.

Tridge, please check!

Volker
(This used to be commit e518c68fc5446304611d096ac2e3cab744734fc3)
---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 5810f46d56..4a1a3b9c6f 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -328,9 +328,9 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 		goto fail;
 	}
 
+ out:
 	tdb_increment_seqnum(tdb);
 
- out:
 	SAFE_FREE(p); 
 	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 	return ret;
-- 
cgit 


From 31b3c38c02fd1fb4849518c1d5676cbaf5a9d1f3 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vlendec@samba.org>
Date: Sat, 17 Feb 2007 23:41:45 +0000
Subject: r21412: The last patch also incremented the seqnum when tdb_store
 failed. Not as bad as not doing it at all, but needs fixing. Also simplify
 the logic, I had missed the "goto out" at the end of the function.

Volker
(This used to be commit ed30a0ff602d0a1d4409bee4faf12b6979b5f4b8)
---
 source4/lib/tdb/common/tdb.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 4a1a3b9c6f..b610cb35b2 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -257,7 +257,7 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	u32 hash;
 	tdb_off_t rec_ptr;
 	char *p = NULL;
-	int ret = 0;
+	int ret = -1;
 
 	if (tdb->read_only || tdb->traverse_read) {
 		tdb->ecode = TDB_ERR_RDONLY;
@@ -277,8 +277,10 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 		}
 	} else {
 		/* first try in-place update, on modify or replace. */
-		if (tdb_update_hash(tdb, key, hash, dbuf) == 0)
-			goto out;
+		if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
+			ret = 0;
+			goto fail; /* Well, not really failed */
+		}
 		if (tdb->ecode == TDB_ERR_NOEXIST &&
 		    flag == TDB_MODIFY) {
 			/* if the record doesn't exist and we are in TDB_MODIFY mode then
@@ -328,15 +330,15 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 		goto fail;
 	}
 
- out:
-	tdb_increment_seqnum(tdb);
+	ret = 0;
+ fail:
+	if (ret == 0) {
+		tdb_increment_seqnum(tdb);
+	}
 
 	SAFE_FREE(p); 
 	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 	return ret;
-fail:
-	ret = -1;
-	goto out;
 }
 
 
-- 
cgit 


From eaaf246d4fa42df5e590ee5bfe54e672abd26b02 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vlendec@samba.org>
Date: Mon, 19 Feb 2007 11:45:33 +0000
Subject: r21445: Apply tdb_parse_record Tridges error return, merge to 3_0_25
 and 4_0 (This used to be commit afe7d7855841066b88859976ac748cbf438a9a9f)

---
 source4/lib/tdb/common/tdb.c | 64 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 13 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index b610cb35b2..03a66804b3 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -56,6 +56,10 @@ static void tdb_increment_seqnum(struct tdb_context *tdb)
 	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
 }
 
+static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+	return memcmp(data.dptr, key.dptr, data.dsize);
+}
 
 /* Returns 0 on fail.  On success, return offset of record, and fills
    in rec */
@@ -73,19 +77,12 @@ static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
 		if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 			return 0;
 
-		if (!TDB_DEAD(r) && hash==r->full_hash && key.dsize==r->key_len) {
-			unsigned char *k;
-			/* a very likely hit - read the key */
-			k = tdb_alloc_read(tdb, rec_ptr + sizeof(*r), 
-					   r->key_len);
-			if (!k)
-				return 0;
-
-			if (memcmp(key.dptr, k, key.dsize) == 0) {
-				SAFE_FREE(k);
-				return rec_ptr;
-			}
-			SAFE_FREE(k);
+		if (!TDB_DEAD(r) && hash==r->full_hash
+		    && key.dsize==r->key_len
+		    && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
+				      r->key_len, tdb_key_compare,
+				      NULL) == 0) {
+			return rec_ptr;
 		}
 		rec_ptr = r->next;
 	}
@@ -163,6 +160,47 @@ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 	return ret;
 }
 
+/*
+ * Find an entry in the database and hand the record's data to a parsing
+ * function. The parsing function is executed under the chain read lock, so it
+ * should be fast and should not block on other syscalls.
+ *
+ * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
+ *
+ * For mmapped tdb's that do not have a transaction open it points the parsing
+ * function directly at the mmap area, it avoids the malloc/memcpy in this
+ * case. If a transaction is open or no mmap is available, it has to do
+ * malloc/read/parse/free.
+ *
+ * This is interesting for all readers of potentially large data structures in
+ * the tdb records, ldb indexes being one example.
+ */
+
+int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
+		     int (*parser)(TDB_DATA key, TDB_DATA data,
+				   void *private_data),
+		     void *private_data)
+{
+	tdb_off_t rec_ptr;
+	struct list_struct rec;
+	int ret;
+	u32 hash;
+
+	/* find which hash bucket it is in */
+	hash = tdb->hash_fn(&key);
+
+	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
+		return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
+	}
+
+	ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
+			     rec.data_len, parser, private_data);
+
+	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
+
+	return ret;
+}
+
 /* check if an entry in the database exists 
 
    note that 1 is returned if the key is found and 0 is returned if not found
-- 
cgit 


From 0aebd296cd6834567509d0b100d486f1ef4d104e Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vlendec@samba.org>
Date: Tue, 6 Mar 2007 10:11:15 +0000
Subject: r21722: Add the dead record functionality presented on
 samba-technical@samba.org. If you do a tdb_set_max_dead(tdb, n), then for
 this tdb a delete operation will only mark a record as dead and re-use it if
 a new record is created. The parameter n allows for at most n dead records
 per hash chain. If this number is exceeded, all dead records are put on the
 central freelist.

Volker
(This used to be commit 98a27ab28a3cd554e370a9a0e3652f4dea8749e9)
---
 source4/lib/tdb/common/tdb.c | 181 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 175 insertions(+), 6 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 03a66804b3..a6b472ae94 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -258,6 +258,66 @@ int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct
 	return 0;
 }
 
+static int tdb_count_dead(struct tdb_context *tdb, u32 hash)
+{
+	int res = 0;
+	tdb_off_t rec_ptr;
+	struct list_struct rec;
+	
+	/* read in the hash top */
+	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
+		return 0;
+
+	while (rec_ptr) {
+		if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
+			return 0;
+
+		if (rec.magic == TDB_DEAD_MAGIC) {
+			res += 1;
+		}
+		rec_ptr = rec.next;
+	}
+	return res;
+}
+
+/*
+ * Purge all DEAD records from a hash chain
+ */
+static int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
+{
+	int res = -1;
+	struct list_struct rec;
+	tdb_off_t rec_ptr;
+
+	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
+		return -1;
+	}
+	
+	/* read in the hash top */
+	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
+		goto fail;
+
+	while (rec_ptr) {
+		tdb_off_t next;
+
+		if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
+			goto fail;
+		}
+
+		next = rec.next;
+
+		if (rec.magic == TDB_DEAD_MAGIC
+		    && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
+			goto fail;
+		}
+		rec_ptr = next;
+	}
+	res = 0;
+ fail:
+	tdb_unlock(tdb, -1, F_WRLCK);
+	return res;
+}
+
 /* delete an entry in the database given a key */
 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 {
@@ -265,9 +325,42 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 	struct list_struct rec;
 	int ret;
 
-	if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec)))
-		return -1;
-	ret = tdb_do_delete(tdb, rec_ptr, &rec);
+	if (tdb->max_dead_records != 0) {
+
+		/*
+		 * Allow for some dead records per hash chain, mainly for
+		 * tdb's with a very high create/delete rate like locking.tdb.
+		 */
+
+		if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
+			return -1;
+
+		if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
+			/*
+			 * Don't let the per-chain freelist grow too large,
+			 * delete all existing dead records
+			 */
+			tdb_purge_dead(tdb, hash);
+		}
+
+		if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
+			tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
+			return -1;
+		}
+
+		/*
+		 * Just mark the record as dead.
+		 */
+		rec.magic = TDB_DEAD_MAGIC;
+		ret = tdb_rec_write(tdb, rec_ptr, &rec);
+	}
+	else {
+		if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
+						   &rec)))
+			return -1;
+
+		ret = tdb_do_delete(tdb, rec_ptr, &rec);
+	}
 
 	if (ret == 0) {
 		tdb_increment_seqnum(tdb);
@@ -284,6 +377,35 @@ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 	return tdb_delete_hash(tdb, key, hash);
 }
 
+/*
+ * See if we have a dead record around with enough space
+ */
+static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
+			       struct list_struct *r, tdb_len_t length)
+{
+	tdb_off_t rec_ptr;
+	
+	/* read in the hash top */
+	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
+		return 0;
+
+	/* keep looking until we find the right record */
+	while (rec_ptr) {
+		if (tdb_rec_read(tdb, rec_ptr, r) == -1)
+			return 0;
+
+		if (TDB_DEAD(r) && r->rec_len >= length) {
+			/*
+			 * First fit for simple coding, TODO: change to best
+			 * fit
+			 */
+			return rec_ptr;
+		}
+		rec_ptr = r->next;
+	}
+	return 0;
+}
+
 /* store an element in the database, replacing any existing element
    with the same key 
 
@@ -316,8 +438,7 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	} else {
 		/* first try in-place update, on modify or replace. */
 		if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
-			ret = 0;
-			goto fail; /* Well, not really failed */
+			goto done;
 		}
 		if (tdb->ecode == TDB_ERR_NOEXIST &&
 		    flag == TDB_MODIFY) {
@@ -347,9 +468,56 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 	if (dbuf.dsize)
 		memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 
+	if (tdb->max_dead_records != 0) {
+		/*
+		 * Allow for some dead records per hash chain, look if we can
+		 * find one that can hold the new record. We need enough space
+		 * for key, data and tailer. If we find one, we don't have to
+		 * consult the central freelist.
+		 */
+		rec_ptr = tdb_find_dead(
+			tdb, hash, &rec,
+			key.dsize + dbuf.dsize + sizeof(tdb_off_t));
+
+		if (rec_ptr != 0) {
+			rec.key_len = key.dsize;
+			rec.data_len = dbuf.dsize;
+			rec.full_hash = hash;
+			rec.magic = TDB_MAGIC;
+			if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
+			    || tdb->methods->tdb_write(
+				    tdb, rec_ptr + sizeof(rec),
+				    p, key.dsize + dbuf.dsize) == -1) {
+				goto fail;
+			}
+			goto done;
+		}
+	}
+
+	/*
+	 * We have to allocate some space from the freelist, so this means we
+	 * have to lock it. Use the chance to purge all the DEAD records from
+	 * the hash chain under the freelist lock.
+	 */
+
+	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
+		goto fail;
+	}
+
+	if ((tdb->max_dead_records != 0)
+	    && (tdb_purge_dead(tdb, hash) == -1)) {
+		tdb_unlock(tdb, -1, F_WRLCK);
+		goto fail;
+	}
+
 	/* we have to allocate some space */
-	if (!(rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec)))
+	rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
+
+	tdb_unlock(tdb, -1, F_WRLCK);
+
+	if (rec_ptr == 0) {
 		goto fail;
+	}
 
 	/* Read hash top into next ptr */
 	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
@@ -368,6 +536,7 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 		goto fail;
 	}
 
+ done:
 	ret = 0;
  fail:
 	if (ret == 0) {
-- 
cgit 


From 769efdf048d80c4081487d555649de0f31738dd1 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Mon, 2 Apr 2007 18:56:25 +0000
Subject: r22041: merge trivial changes from samba3

metze
(This used to be commit 902a76ca705f07c61f86a9ef1346583ba9d3157d)
---
 source4/lib/tdb/common/tdb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index a6b472ae94..25103d826e 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -564,9 +564,10 @@ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 	dbuf = tdb_fetch(tdb, key);
 
 	if (dbuf.dptr == NULL) {
-		dbuf.dptr = malloc(new_dbuf.dsize);
+		dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 	} else {
-		dbuf.dptr = realloc(dbuf.dptr, dbuf.dsize + new_dbuf.dsize);
+		dbuf.dptr = (unsigned char *)realloc(dbuf.dptr,
+						     dbuf.dsize + new_dbuf.dsize);
 	}
 
 	if (dbuf.dptr == NULL) {
-- 
cgit 


From a2b250258502907399dbbe9f738d4212c1b0618e Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Mon, 14 May 2007 01:00:06 +0000
Subject: r22832: merged the latest tdb changes from ctdb to Samba4 (This used
 to be commit a88ab4fa3a07c31bc45c612043f9e096f384eda4)

---
 source4/lib/tdb/common/tdb.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 25103d826e..70d050e7e6 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -31,10 +31,10 @@
 TDB_DATA tdb_null;
 
 /*
-  increment the tdb sequence number if the tdb has been opened using
+  non-blocking increment of the tdb sequence number if the tdb has been opened using
   the TDB_SEQNUM flag
 */
-static void tdb_increment_seqnum(struct tdb_context *tdb)
+void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
 {
 	tdb_off_t seqnum=0;
 	
@@ -42,16 +42,29 @@ static void tdb_increment_seqnum(struct tdb_context *tdb)
 		return;
 	}
 
-	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
-		return;
-	}
-
 	/* we ignore errors from this, as we have no sane way of
 	   dealing with them.
 	*/
 	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 	seqnum++;
 	tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
+}
+
+/*
+  increment the tdb sequence number if the tdb has been opened using
+  the TDB_SEQNUM flag
+*/
+static void tdb_increment_seqnum(struct tdb_context *tdb)
+{
+	if (!(tdb->flags & TDB_SEQNUM)) {
+		return;
+	}
+
+	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
+		return;
+	}
+
+	tdb_increment_seqnum_nonblock(tdb);
 
 	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
 }
@@ -649,3 +662,11 @@ int tdb_get_flags(struct tdb_context *tdb)
 	return tdb->flags;
 }
 
+
+/*
+  enable sequence number handling on an open tdb
+*/
+void tdb_enable_seqnum(struct tdb_context *tdb)
+{
+	tdb->flags |= TDB_SEQNUM;
+}
-- 
cgit 


From 1f193bf76da65012a8529f93e6e21d558db15674 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Fri, 22 Jun 2007 17:36:10 +0000
Subject: r23590: Fix realloc leak on failure case from Jim Meyering 
 <jim@meyering.net>. Jeremy. (This used to be commit
 59ba128cb61e77a830ddd8b8e1d5d0fd00f99736)

---
 source4/lib/tdb/common/tdb.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 70d050e7e6..28129a8c8e 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -579,8 +579,12 @@ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 	if (dbuf.dptr == NULL) {
 		dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 	} else {
-		dbuf.dptr = (unsigned char *)realloc(dbuf.dptr,
+		unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr,
 						     dbuf.dsize + new_dbuf.dsize);
+		if (new_dptr == NULL) {
+			free(dbuf.dptr);
+		}
+		dbuf.dptr = new_dptr;
 	}
 
 	if (dbuf.dptr == NULL) {
-- 
cgit 


From b8d69a7ea2505b706ff7c74d7c97bc89d82dfa07 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 10 Jul 2007 02:46:15 +0000
Subject: r23795: more v2->v3 conversion (This used to be commit
 84b468b2f8f2dffda89593f816e8bc6a8b6d42ac)

---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 28129a8c8e..97749abc99 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -14,7 +14,7 @@
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
+   version 3 of the License, or (at your option) any later version.
 
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-- 
cgit 


From 6c973f4e8ccbcb6c9275f8a54e26abb19df7e15a Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 10 Jul 2007 03:42:26 +0000
Subject: r23798: updated old Temple Place FSF addresses to new URL (This used
 to be commit 40c0919aaa9c1b14bbaebb95ecce53eb0380fdbb)

---
 source4/lib/tdb/common/tdb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 97749abc99..d4e6e18664 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -22,8 +22,7 @@
    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 
 #include "tdb_private.h"
-- 
cgit 


From f3e13632813c543583fe1d04203825743aa99111 Mon Sep 17 00:00:00 2001
From: Jelmer Vernooij <jelmer@samba.org>
Date: Sat, 11 Aug 2007 21:19:24 +0000
Subject: r24336: Use standard data type uint32_t rather than tdb-specific u32.
 (This used to be commit f90a698387c53508862eb6359bd4d1fba1d2b4b0)

---
 source4/lib/tdb/common/tdb.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index d4e6e18664..0e9d1dbd74 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -75,7 +75,7 @@ static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
 
 /* Returns 0 on fail.  On success, return offset of record, and fills
    in rec */
-static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
+static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
 			struct list_struct *r)
 {
 	tdb_off_t rec_ptr;
@@ -102,10 +102,11 @@ static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
 }
 
 /* As tdb_find, but if you succeed, keep the lock */
-tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
+tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, 
+							 uint32_t hash, int locktype,
 			   struct list_struct *rec)
 {
-	u32 rec_ptr;
+	uint32_t rec_ptr;
 
 	if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
 		return 0;
@@ -119,7 +120,7 @@ tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, in
    is <= the old data size and the key exists.
    on failure return -1.
 */
-static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
+static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 {
 	struct list_struct rec;
 	tdb_off_t rec_ptr;
@@ -158,7 +159,7 @@ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 	tdb_off_t rec_ptr;
 	struct list_struct rec;
 	TDB_DATA ret;
-	u32 hash;
+	uint32_t hash;
 
 	/* find which hash bucket it is in */
 	hash = tdb->hash_fn(&key);
@@ -196,7 +197,7 @@ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
 	tdb_off_t rec_ptr;
 	struct list_struct rec;
 	int ret;
-	u32 hash;
+	uint32_t hash;
 
 	/* find which hash bucket it is in */
 	hash = tdb->hash_fn(&key);
@@ -219,7 +220,7 @@ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
    this doesn't match the conventions in the rest of this module, but is
    compatible with gdbm
 */
-static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
+static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 {
 	struct list_struct rec;
 	
@@ -231,7 +232,7 @@ static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 
 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 {
-	u32 hash = tdb->hash_fn(&key);
+	uint32_t hash = tdb->hash_fn(&key);
 	return tdb_exists_hash(tdb, key, hash);
 }
 
@@ -270,7 +271,7 @@ int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct
 	return 0;
 }
 
-static int tdb_count_dead(struct tdb_context *tdb, u32 hash)
+static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
 {
 	int res = 0;
 	tdb_off_t rec_ptr;
@@ -295,7 +296,7 @@ static int tdb_count_dead(struct tdb_context *tdb, u32 hash)
 /*
  * Purge all DEAD records from a hash chain
  */
-static int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
+static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
 {
 	int res = -1;
 	struct list_struct rec;
@@ -331,7 +332,7 @@ static int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
 }
 
 /* delete an entry in the database given a key */
-static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
+static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 {
 	tdb_off_t rec_ptr;
 	struct list_struct rec;
@@ -385,14 +386,14 @@ static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
 
 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 {
-	u32 hash = tdb->hash_fn(&key);
+	uint32_t hash = tdb->hash_fn(&key);
 	return tdb_delete_hash(tdb, key, hash);
 }
 
 /*
  * See if we have a dead record around with enough space
  */
-static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
+static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
 			       struct list_struct *r, tdb_len_t length)
 {
 	tdb_off_t rec_ptr;
@@ -426,7 +427,7 @@ static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 {
 	struct list_struct rec;
-	u32 hash;
+	uint32_t hash;
 	tdb_off_t rec_ptr;
 	char *p = NULL;
 	int ret = -1;
@@ -564,7 +565,7 @@ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 /* Append to an entry. Create if not exist. */
 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 {
-	u32 hash;
+	uint32_t hash;
 	TDB_DATA dbuf;
 	int ret = -1;
 
-- 
cgit 


From 9170998427ebbb7abfd9b482fb6e0d051bca5205 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Tue, 15 Jan 2008 14:05:47 +1100
Subject: merged tdb from ctdb bzr tree (This used to be commit
 ed0c3a0f74c305b3b8554b05c3f97cf79db8296a)

---
 source4/lib/tdb/common/tdb.c | 119 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 115 insertions(+), 4 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 0e9d1dbd74..fd4e1cc8af 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -102,8 +102,7 @@ static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
 }
 
 /* As tdb_find, but if you succeed, keep the lock */
-tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, 
-							 uint32_t hash, int locktype,
+tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 			   struct list_struct *rec)
 {
 	uint32_t rec_ptr;
@@ -237,14 +236,15 @@ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 }
 
 /* actually delete an entry in the database given the offset */
-int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec)
+int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec)
 {
 	tdb_off_t last_ptr, i;
 	struct list_struct lastrec;
 
 	if (tdb->read_only || tdb->traverse_read) return -1;
 
-	if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
+	if (tdb->traverse_write != 0 || 
+	    tdb_write_lock_record(tdb, rec_ptr) == -1) {
 		/* Someone traversing here: mark it as dead */
 		rec->magic = TDB_DEAD_MAGIC;
 		return tdb_rec_write(tdb, rec_ptr, rec);
@@ -666,6 +666,16 @@ int tdb_get_flags(struct tdb_context *tdb)
 	return tdb->flags;
 }
 
+void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
+{
+	tdb->flags |= flags;
+}
+
+void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
+{
+	tdb->flags &= ~flags;
+}
+
 
 /*
   enable sequence number handling on an open tdb
@@ -674,3 +684,104 @@ void tdb_enable_seqnum(struct tdb_context *tdb)
 {
 	tdb->flags |= TDB_SEQNUM;
 }
+
+
+/*
+  wipe the entire database, deleting all records. This can be done
+  very fast by using a global lock. The entire data portion of the
+  file becomes a single entry in the freelist.
+ */
+int tdb_wipe_all(struct tdb_context *tdb)
+{
+	int i;
+	tdb_off_t offset = 0;
+	ssize_t data_len;
+
+	if (tdb_lockall(tdb) != 0) {
+		return -1;
+	}
+
+	/* wipe the hashes */
+	for (i=0;i<tdb->header.hash_size;i++) {
+		if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
+			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
+			goto failed;
+		}
+	}
+
+	/* wipe the freelist */
+	if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
+		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
+		goto failed;
+	}
+
+	if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &offset) == -1) {
+		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write recovery head\n"));
+		goto failed;		
+	}
+
+	/* add all the rest of the file to the freelist */
+	data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct);
+	if (data_len > 0) {
+		struct list_struct rec;
+		memset(&rec,'\0',sizeof(rec));
+		rec.rec_len = data_len;
+		if (tdb_free(tdb, TDB_DATA_START(tdb->header.hash_size), &rec) == -1) {
+			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add free record\n"));
+			goto failed;
+		}
+	}
+
+	if (tdb_unlockall(tdb) != 0) {
+		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
+		goto failed;
+	}
+
+	return 0;
+
+failed:
+	tdb_unlockall(tdb);
+	return -1;
+}
+
+
+/* 
+   validate the integrity of all tdb hash chains. Useful when debugging
+ */
+int tdb_validate(struct tdb_context *tdb)
+{
+	int h;
+	for (h=-1;h<(int)tdb->header.hash_size;h++) {
+		tdb_off_t rec_ptr;
+		uint32_t count = 0;
+		if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &rec_ptr) == -1) {
+			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read at top of hash %d\n", h));
+			return -1;
+		}
+		while (rec_ptr) {
+			struct list_struct r;
+			tdb_off_t size;
+
+			if (tdb_rec_read(tdb, rec_ptr, &r) == -1) {
+				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed rec_read h=%d rec_ptr=%u count=%u\n",
+					 h, rec_ptr, count));
+				return -1;
+			}
+			if (tdb_ofs_read(tdb, rec_ptr + sizeof(r) + r.rec_len - sizeof(tdb_off_t), &size) == -1) {
+				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read h=%d rec_ptr=%u count=%u\n",
+					 h, rec_ptr, count));
+				return -1;
+			}
+			if (size != r.rec_len + sizeof(r)) {
+				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed size check size=%u h=%d rec_ptr=%u count=%u\n",
+					 size, h, rec_ptr, count));
+				return -1;
+			}
+			rec_ptr = r.next;
+			count++;
+		}		
+	}
+	return 0;
+}
+
+
-- 
cgit 


From 61a015a786c52008f4471e62750ad93507bce518 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 18 Jan 2008 15:45:22 +1100
Subject: merged changes from v3-2-test (This used to be commit
 7077df3e2e3f171532f6a5ac87d45201736c9c11)

---
 source4/lib/tdb/common/tdb.c | 42 ------------------------------------------
 1 file changed, 42 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index fd4e1cc8af..ea5d9ccc60 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -743,45 +743,3 @@ failed:
 	tdb_unlockall(tdb);
 	return -1;
 }
-
-
-/* 
-   validate the integrity of all tdb hash chains. Useful when debugging
- */
-int tdb_validate(struct tdb_context *tdb)
-{
-	int h;
-	for (h=-1;h<(int)tdb->header.hash_size;h++) {
-		tdb_off_t rec_ptr;
-		uint32_t count = 0;
-		if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &rec_ptr) == -1) {
-			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read at top of hash %d\n", h));
-			return -1;
-		}
-		while (rec_ptr) {
-			struct list_struct r;
-			tdb_off_t size;
-
-			if (tdb_rec_read(tdb, rec_ptr, &r) == -1) {
-				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed rec_read h=%d rec_ptr=%u count=%u\n",
-					 h, rec_ptr, count));
-				return -1;
-			}
-			if (tdb_ofs_read(tdb, rec_ptr + sizeof(r) + r.rec_len - sizeof(tdb_off_t), &size) == -1) {
-				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed ofs_read h=%d rec_ptr=%u count=%u\n",
-					 h, rec_ptr, count));
-				return -1;
-			}
-			if (size != r.rec_len + sizeof(r)) {
-				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_validate: failed size check size=%u h=%d rec_ptr=%u count=%u\n",
-					 size, h, rec_ptr, count));
-				return -1;
-			}
-			rec_ptr = r.next;
-			count++;
-		}		
-	}
-	return 0;
-}
-
-
-- 
cgit 


From 8c4e52547bbcf8b334c0a1ba65191c8258e00132 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 7 Feb 2008 23:06:44 +1100
Subject: merge growing tdb for tdb_wipe_all() fix from ctdb (This used to be
 commit df4efb902ec5053ae9d7c6e4fd1e21255ca66914)

---
 source4/lib/tdb/common/tdb.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index ea5d9ccc60..a25c3e7aca 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -696,11 +696,31 @@ int tdb_wipe_all(struct tdb_context *tdb)
 	int i;
 	tdb_off_t offset = 0;
 	ssize_t data_len;
+	tdb_off_t recovery_head;
+	tdb_len_t recovery_size = 0;
 
 	if (tdb_lockall(tdb) != 0) {
 		return -1;
 	}
 
+	/* see if the tdb has a recovery area, and remember its size
+	   if so. We don't want to lose this as otherwise each
+	   tdb_wipe_all() in a transaction will increase the size of
+	   the tdb by the size of the recovery area */
+	if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
+		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
+		goto failed;
+	}
+
+	if (recovery_head != 0) {
+		struct list_struct rec;
+		if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
+			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
+			return -1;
+		}	
+		recovery_size = rec.rec_len + sizeof(rec);
+	}
+
 	/* wipe the hashes */
 	for (i=0;i<tdb->header.hash_size;i++) {
 		if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
@@ -722,6 +742,11 @@ int tdb_wipe_all(struct tdb_context *tdb)
 
 	/* add all the rest of the file to the freelist */
 	data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct);
+	if (data_len < recovery_size+sizeof(tdb_off_t)) {
+		recovery_size = 0;
+	} else {
+		data_len -= recovery_size;
+	}
 	if (data_len > 0) {
 		struct list_struct rec;
 		memset(&rec,'\0',sizeof(rec));
@@ -732,6 +757,24 @@ int tdb_wipe_all(struct tdb_context *tdb)
 		}
 	}
 
+	/* possibly add the recovery record */
+	if (recovery_size != 0) {
+		struct list_struct rec;
+		
+		recovery_head = tdb->map_size - recovery_size;
+
+		ZERO_STRUCT(rec);
+		rec.rec_len = recovery_size - sizeof(rec);
+		if (tdb_rec_write(tdb, recovery_head, &rec) != 0) {
+			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add recovery record\n"));
+			goto failed;
+		}
+		if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
+			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write recovery head\n"));
+			goto failed;		
+		}
+	}
+
 	if (tdb_unlockall(tdb) != 0) {
 		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
 		goto failed;
-- 
cgit 


From 77dab7f8579132468627fb2bb4641ccdf242dd11 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 8 Feb 2008 14:13:19 +1100
Subject: merge tdb changes from ctdb (This used to be commit
 b3e60a388d338ef90540007239e88563cb9ba27a)

---
 source4/lib/tdb/common/tdb.c | 80 ++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 33 deletions(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index a25c3e7aca..767452c9b3 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -686,10 +686,36 @@ void tdb_enable_seqnum(struct tdb_context *tdb)
 }
 
 
+/*
+  add a region of the file to the freelist. Length is the size of the region in bytes, 
+  which includes the free list header that needs to be added
+ */
+static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
+{
+	struct list_struct rec;
+	if (length <= sizeof(rec)) {
+		/* the region is not worth adding */
+		return 0;
+	}
+	if (length + offset > tdb->map_size) {
+		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
+		return -1;		
+	}
+	memset(&rec,'\0',sizeof(rec));
+	rec.rec_len = length - sizeof(rec);
+	if (tdb_free(tdb, offset, &rec) == -1) {
+		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
+		return -1;
+	}
+	return 0;
+}
+
 /*
   wipe the entire database, deleting all records. This can be done
   very fast by using a global lock. The entire data portion of the
   file becomes a single entry in the freelist.
+
+  This code carefully steps around the recovery area, leaving it alone
  */
 int tdb_wipe_all(struct tdb_context *tdb)
 {
@@ -735,43 +761,31 @@ int tdb_wipe_all(struct tdb_context *tdb)
 		goto failed;
 	}
 
-	if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &offset) == -1) {
-		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write recovery head\n"));
-		goto failed;		
-	}
-
-	/* add all the rest of the file to the freelist */
-	data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct);
-	if (data_len < recovery_size+sizeof(tdb_off_t)) {
-		recovery_size = 0;
-	} else {
-		data_len -= recovery_size;
-	}
-	if (data_len > 0) {
-		struct list_struct rec;
-		memset(&rec,'\0',sizeof(rec));
-		rec.rec_len = data_len;
-		if (tdb_free(tdb, TDB_DATA_START(tdb->header.hash_size), &rec) == -1) {
-			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add free record\n"));
+	/* add all the rest of the file to the freelist, possibly leaving a gap 
+	   for the recovery area */
+	if (recovery_size == 0) {
+		/* the simple case - the whole file can be used as a freelist */
+		data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
+		if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 			goto failed;
 		}
-	}
-
-	/* possibly add the recovery record */
-	if (recovery_size != 0) {
-		struct list_struct rec;
-		
-		recovery_head = tdb->map_size - recovery_size;
-
-		ZERO_STRUCT(rec);
-		rec.rec_len = recovery_size - sizeof(rec);
-		if (tdb_rec_write(tdb, recovery_head, &rec) != 0) {
-			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add recovery record\n"));
+	} else {
+		/* we need to add two freelist entries - one on either
+		   side of the recovery area 
+
+		   Note that we cannot shift the recovery area during
+		   this operation. Only the transaction.c code may
+		   move the recovery area or we risk subtle data
+		   corruption
+		*/
+		data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
+		if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 			goto failed;
 		}
-		if (tdb_ofs_write(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
-			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write recovery head\n"));
-			goto failed;		
+		/* and the 2nd free list entry after the recovery area - if any */
+		data_len = tdb->map_size - (recovery_head+recovery_size);
+		if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
+			goto failed;
 		}
 	}
 
-- 
cgit 


From 8c88209c6f4b57b0dbe1459bd5bc583c5d321758 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Tue, 12 Aug 2008 22:31:52 +0200
Subject: Attempt to fix bug 5684

With the ctdb checkin dde9f3f006 tdb optimized out write lock checks for
write-enabled transaction. Sadly, this also removed the possibility to ever
remove dead records left over from tdb_delete calls within a transaction.

Tridge, please check this! Did dde9f3f006 have any reason beyond performance
optimizations?

Thanks,

Volker
(cherry picked from commit 3f884c4ae36f3260e63626bdd4989d9258ae6497)
(This used to be commit 1d85e0647e287d269b3f6b534da88f497d6f76c3)
---
 source4/lib/tdb/common/tdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'source4/lib/tdb/common/tdb.c')

diff --git a/source4/lib/tdb/common/tdb.c b/source4/lib/tdb/common/tdb.c
index 767452c9b3..c7cec297f6 100644
--- a/source4/lib/tdb/common/tdb.c
+++ b/source4/lib/tdb/common/tdb.c
@@ -243,7 +243,7 @@ int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct
 
 	if (tdb->read_only || tdb->traverse_read) return -1;
 
-	if (tdb->traverse_write != 0 || 
+	if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
 	    tdb_write_lock_record(tdb, rec_ptr) == -1) {
 		/* Someone traversing here: mark it as dead */
 		rec->magic = TDB_DEAD_MAGIC;
-- 
cgit