/* Unix SMB/Netbios implementation. Version 3.0 Samba database functions Copyright (C) Andrew Tridgell 1999-2000 Copyright (C) Luke Kenneth Casson Leighton 2000 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #if STANDALONE #include <stdlib.h> #include <stdio.h> #include <fcntl.h> #include <unistd.h> #include <string.h> #include <fcntl.h> #include <errno.h> #include <sys/mman.h> #include <sys/stat.h> #include "tdb.h" #else #include "includes.h" #endif #define TDB_VERSION (0x26011967 + 3) #define TDB_MAGIC (0x26011999U) #define TDB_FREE_MAGIC (~TDB_MAGIC) #define TDB_ALIGN 4 #define MIN_REC_SIZE (2*sizeof(struct list_struct) + 200) #define DEFAULT_HASH_SIZE 131 #define TDB_PAGE_SIZE 0x2000 #define TDB_LEN_MULTIPLIER 10 #define FREELIST_TOP (sizeof(struct tdb_header)) /* lock offsets */ #define GLOBAL_LOCK 0 #define ACTIVE_LOCK 4 #define LIST_LOCK_BASE 1024 #define BUCKET(hash) ((hash) % tdb->header.hash_size) #ifndef MAP_FILE #define MAP_FILE 0 #endif /* the body of the database is made of one list_struct for the free space plus a separate data list for each hash value */ struct list_struct { tdb_len rec_len; /* total byte length of record */ tdb_off next; /* offset of the next record in the list */ tdb_len key_len; /* byte length of key */ tdb_len data_len; /* byte length of data */ unsigned full_hash; /* the full 32 bit hash of the key */ unsigned magic; /* try to catch errors */ /* the following union is implied union { char record[rec_len]; struct { char key[key_len]; char data[data_len]; } } */ }; /* a null data record - useful for error returns */ static TDB_DATA null_data; /* a byte range locking function - return 0 on success this functions locks/unlocks 1 byte at the specified offset */ static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, int rw_type, int lck_type) { struct flock fl; if (tdb->flags & TDB_NOLOCK) return 0; if (tdb->read_only) return -1; fl.l_type = rw_type; fl.l_whence = SEEK_SET; fl.l_start = offset; fl.l_len = 1; fl.l_pid = 0; if (fcntl(tdb->fd, lck_type, &fl) != 0) { #if TDB_DEBUG if (lck_type == F_SETLKW) { printf("lock failed at %d (%s)\n", offset, strerror(errno)); } #endif tdb->ecode = TDB_ERR_LOCK; return -1; } return 0; } /* lock a list in the database. list -1 is the alloc list */ static int tdb_lock(TDB_CONTEXT *tdb, int list, int locktype) { if (list < -1 || list >= (int)tdb->header.hash_size) { #if TDB_DEBUG printf("bad list %d\n", list); #endif return -1; } if (tdb->flags & TDB_NOLOCK) return 0; if (tdb->locked[list+1].count == 0 || (tdb->locked[list+1].ltype == F_RDLCK && locktype == F_WRLCK)) { if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, locktype, F_SETLKW) != 0) { return -1; } tdb->locked[list+1].ltype = locktype; } tdb->locked[list+1].count++; return 0; } /* unlock the database. */ static int tdb_unlock(TDB_CONTEXT *tdb, int list) { if (list < -1 || list >= (int)tdb->header.hash_size) { #if TDB_DEBUG printf("bad unlock list %d\n", list); #endif return -1; } if (tdb->flags & TDB_NOLOCK) return 0; if (tdb->locked[list+1].count == 0) { #if TDB_DEBUG printf("not locked %d\n", list); #endif tdb->ecode = TDB_ERR_LOCK; return -1; } if (tdb->locked[list+1].count == 1) { if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, F_UNLCK, F_SETLKW) != 0) { return -1; } } tdb->locked[list+1].count--; return 0; } /* the hash algorithm - turn a key into an integer This is based on the hash agorithm from gdbm */ static unsigned tdb_hash(TDB_DATA *key) { unsigned value; /* Used to compute the hash value. */ unsigned i; /* Used to cycle through random values. */ /* Set the initial value from the key size. */ value = 0x238F13AF * key->dsize; for (i=0; i < key->dsize; i++) { value = (value + (key->dptr[i] << (i*5 % 24))); } value = (1103515243 * value + 12345); return value; } /* find the top of the hash chain for an open database */ static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash) { tdb_off ret; hash = BUCKET(hash); ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off); return ret; } /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary */ static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset) { struct stat st; if (offset <= tdb->map_size) return 0; if (tdb->flags & TDB_INTERNAL) return 0; fstat(tdb->fd, &st); if (st.st_size <= (size_t)offset) { tdb->ecode = TDB_ERR_IO; return -1; } #if HAVE_MMAP if (tdb->map_ptr) { munmap(tdb->map_ptr, tdb->map_size); tdb->map_ptr = NULL; } #endif tdb->map_size = st.st_size; #if HAVE_MMAP if (!(tdb->flags & TDB_NOMMAP)) { tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FILE, tdb->fd, 0); } #endif return 0; } /* write a lump of data at a specified offset */ static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len) { if (tdb_oob(tdb, offset + len) != 0) { /* oops - trying to write beyond the end of the database! */ return -1; } if (tdb->map_ptr) { memcpy(offset + (char *)tdb->map_ptr, buf, len); } else { if (lseek(tdb->fd, offset, SEEK_SET) != offset || write(tdb->fd, buf, len) != (ssize_t)len) { tdb->ecode = TDB_ERR_IO; return -1; } } return 0; } /* read a lump of data at a specified offset */ static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len) { if (tdb_oob(tdb, offset + len) != 0) { /* oops - trying to read beyond the end of the database! */ return -1; } if (tdb->map_ptr) { memcpy(buf, offset + (char *)tdb->map_ptr, len); } else { if (lseek(tdb->fd, offset, SEEK_SET) != offset || read(tdb->fd, buf, len) != (ssize_t)len) { tdb->ecode = TDB_ERR_IO; return -1; } } return 0; } /* read a lump of data, allocating the space for it */ static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len) { char *buf; buf = (char *)malloc(len); if (!buf) { tdb->ecode = TDB_ERR_OOM; return NULL; } if (tdb_read(tdb, offset, buf, len) == -1) { free(buf); return NULL; } return buf; } /* convenience routine for writing a record */ static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) { return tdb_write(tdb, offset, (char *)rec, sizeof(*rec)); } /* convenience routine for writing a tdb_off */ static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) { return tdb_write(tdb, offset, (char *)d, sizeof(*d)); } /* read a tdb_off from the store */ static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) { return tdb_read(tdb, offset, (char *)d, sizeof(*d)); } /* read a record and check for simple errors */ static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) { if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1; if (rec->magic != TDB_MAGIC) { #if TDB_DEBUG printf("bad magic 0x%08x at offset %d\n", rec->magic, offset); #endif tdb->ecode = TDB_ERR_CORRUPT; return -1; } if (tdb_oob(tdb, rec->next) != 0) { return -1; } return 0; } /* expand the database at least length bytes by expanding the underlying file and doing the mmap again if necessary */ static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length) { struct list_struct rec; tdb_off offset, ptr; char b = 0; tdb_lock(tdb,-1, F_WRLCK); /* make sure we know about any previous expansions by another process */ tdb_oob(tdb,tdb->map_size + 1); /* always make room for at least 10 more records */ length *= TDB_LEN_MULTIPLIER; /* and round the database up to a multiple of TDB_PAGE_SIZE */ length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size; /* expand the file itself */ if (!(tdb->flags & TDB_INTERNAL)) { lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET); if (write(tdb->fd, &b, 1) != 1) goto fail; } /* form a new freelist record */ offset = FREELIST_TOP; rec.rec_len = length - sizeof(rec); rec.magic = TDB_FREE_MAGIC; if (ofs_read(tdb, offset, &rec.next) == -1) { goto fail; } #if HAVE_MMAP if (!(tdb->flags & TDB_INTERNAL) && tdb->map_ptr) { munmap(tdb->map_ptr, tdb->map_size); tdb->map_ptr = NULL; } #endif tdb->map_size += length; if (tdb->flags & TDB_INTERNAL) { tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size); } /* write it out */ if (rec_write(tdb, tdb->map_size - length, &rec) == -1) { goto fail; } /* link it into the free list */ ptr = tdb->map_size - length; if (ofs_write(tdb, offset, &ptr) == -1) goto fail; #if HAVE_MMAP if (!(tdb->flags & TDB_NOMMAP)) { tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FILE, tdb->fd, 0); } #endif tdb_unlock(tdb, -1); return 0; fail: tdb_unlock(tdb,-1); return -1; } /* allocate some space from the free list. The offset returned points to a unconnected list_struct within the database with room for at least length bytes of total data 0 is returned if the space could not be allocated */ static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length) { tdb_off offset, rec_ptr, last_ptr; struct list_struct rec, lastrec, newrec; tdb_lock(tdb, -1, F_WRLCK); again: last_ptr = 0; offset = FREELIST_TOP; /* read in the freelist top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) { goto fail; } /* keep looking until we find a freelist record that is big enough */ while (rec_ptr) { if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { goto fail; } if (rec.magic != TDB_FREE_MAGIC) { #if TDB_DEBUG printf("bad magic 0x%08x in free list\n", rec.magic); #endif goto fail; } if (rec.rec_len >= length) { /* found it - now possibly split it up */ if (rec.rec_len > length + MIN_REC_SIZE) { length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1); newrec.rec_len = rec.rec_len - (sizeof(rec) + length); newrec.next = rec.next; newrec.magic = TDB_FREE_MAGIC; rec.rec_len = length; rec.next = rec_ptr + sizeof(rec) + rec.rec_len; if (rec_write(tdb, rec.next, &newrec) == -1) { goto fail; } if (rec_write(tdb, rec_ptr, &rec) == -1) { goto fail; } } /* remove it from the list */ if (last_ptr == 0) { offset = FREELIST_TOP; if (ofs_write(tdb, offset, &rec.next) == -1) { goto fail; } } else { lastrec.next = rec.next; if (rec_write(tdb, last_ptr, &lastrec) == -1) { goto fail; } } /* all done - return the new record offset */ tdb_unlock(tdb, -1); return rec_ptr; } /* move to the next record */ lastrec = rec; last_ptr = rec_ptr; rec_ptr = rec.next; } /* we didn't find enough space. See if we can expand the database and if we can then try again */ if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again; fail: #if TDB_DEBUG printf("tdb_allocate failed for size %u\n", length); #endif tdb_unlock(tdb, -1); return 0; } /* initialise a new database with a specified hash size */ static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size) { struct tdb_header header; tdb_off offset; int i, size = 0; tdb_off buf[16]; /* create the header */ memset(&header, 0, sizeof(header)); memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); header.version = TDB_VERSION; header.hash_size = hash_size; if (tdb->fd != -1) { lseek(tdb->fd, 0, SEEK_SET); ftruncate(tdb->fd, 0); } if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) != sizeof(header)) { tdb->ecode = TDB_ERR_IO; return -1; } else size += sizeof(header); /* the freelist and hash pointers */ offset = 0; memset(buf, 0, sizeof(buf)); for (i=0;(hash_size+1)-i >= 16; i += 16) { if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) != sizeof(buf)) { tdb->ecode = TDB_ERR_IO; return -1; } else size += sizeof(buf); } for (;i<hash_size+1; i++) { if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) != sizeof(tdb_off)) { tdb->ecode = TDB_ERR_IO; return -1; } else size += sizeof(tdb_off); } if (tdb->flags & TDB_INTERNAL) { tdb->map_ptr = calloc(size,1); tdb->map_size = size; if (tdb->map_ptr == NULL) { tdb->ecode = TDB_ERR_IO; return -1; } memcpy(&tdb->header, &header, sizeof(header)); } #if TDB_DEBUG printf("initialised database of hash_size %u\n", hash_size); #endif return 0; } /* Returns 0 on fail. On success, return offset of record, and fills in rec */ static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash, struct list_struct *rec) { tdb_off offset, rec_ptr; /* find the top of the hash chain */ offset = tdb_hash_top(tdb, hash); /* read in the hash top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) return 0; /* keep looking until we find the right record */ while (rec_ptr) { if (rec_read(tdb, rec_ptr, rec) == -1) return 0; if (hash == rec->full_hash && key.dsize == rec->key_len) { char *k; /* a very likely hit - read the key */ k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec), rec->key_len); if (!k) return 0; if (memcmp(key.dptr, k, key.dsize) == 0) { free(k); return rec_ptr; } free(k); } /* move to the next record */ rec_ptr = rec->next; } return 0; } /* return an error string for the last tdb error */ char *tdb_error(TDB_CONTEXT *tdb) { int i; static struct { enum TDB_ERROR ecode; char *estring; } emap[] = { {TDB_SUCCESS, "Success"}, {TDB_ERR_CORRUPT, "Corrupt database"}, {TDB_ERR_IO, "IO Error"}, {TDB_ERR_LOCK, "Locking error"}, {TDB_ERR_OOM, "Out of memory"}, {TDB_ERR_EXISTS, "Record exists"}, {TDB_ERR_NOEXIST, "Record does not exist"}, {(enum TDB_ERROR)-1, NULL}}; if (tdb != NULL) { for (i=0;emap[i].estring;i++) { if (tdb->ecode == emap[i].ecode) return emap[i].estring; } } else { return "Invalid tdb context"; } return "Invalid error code"; } /* update an entry in place - this only works if the new data size is <= the old data size and the key exists. on failure return -1 */ int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf) { unsigned hash; struct list_struct rec; tdb_off rec_ptr; int ret = -1; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_update() called with null context\n"); #endif return -1; } /* initialise error code to ok, first */ tdb->ecode = 0; /* find which hash bucket it is in */ hash = tdb_hash(&key); tdb_lock(tdb, BUCKET(hash), F_WRLCK); rec_ptr = tdb_find(tdb, key, hash, &rec); if (!rec_ptr) { tdb->ecode = TDB_ERR_NOEXIST; goto out; } /* must be long enough */ if (rec.rec_len < key.dsize + dbuf.dsize) goto out; if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, dbuf.dptr, dbuf.dsize) == -1) goto out; if (dbuf.dsize != rec.data_len) { /* update size */ rec.data_len = dbuf.dsize; ret = rec_write(tdb, rec_ptr, &rec); } else ret = 0; out: tdb_unlock(tdb, BUCKET(hash)); return ret; } /* find an entry in the database given a key */ TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key) { unsigned hash; tdb_off rec_ptr; struct list_struct rec; TDB_DATA ret = null_data; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_fetch() called with null context\n"); #endif return null_data; } /* find which hash bucket it is in */ hash = tdb_hash(&key); tdb_lock(tdb, BUCKET(hash), F_RDLCK); rec_ptr = tdb_find(tdb, key, hash, &rec); if (rec_ptr) { if (rec.data_len) ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, rec.data_len); else ret.dptr = NULL; ret.dsize = rec.data_len; } tdb_unlock(tdb, BUCKET(hash)); return ret; } /* check if an entry in the database exists note that 1 is returned if the key is found and 0 is returned if not found this doesn't match the conventions in the rest of this module, but is compatible with gdbm */ int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key) { unsigned hash; tdb_off rec_ptr; struct list_struct rec; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_exists() called with null context\n"); #endif return 0; } /* find which hash bucket it is in */ hash = tdb_hash(&key); tdb_lock(tdb, BUCKET(hash), F_RDLCK); rec_ptr = tdb_find(tdb, key, hash, &rec); tdb_unlock(tdb, BUCKET(hash)); return rec_ptr != 0; } /* traverse the entire database - calling fn(tdb, key, data) on each element. return -1 on error or the record count traversed if fn is NULL then it is not called a non-zero return value from fn() indicates that the traversal should stop */ int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state) { int count = 0; unsigned h; tdb_off offset, rec_ptr; struct list_struct rec; char *data; TDB_DATA key, dbuf; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_traverse() called with null context\n"); #endif return -1; } /* loop over all hash chains */ for (h = 0; h < tdb->header.hash_size; h++) { tdb_lock(tdb, BUCKET(h), F_WRLCK); /* read in the hash top */ offset = tdb_hash_top(tdb, h); if (ofs_read(tdb, offset, &rec_ptr) == -1) { goto fail; } /* traverse all records for this hash */ while (rec_ptr) { if (rec_read(tdb, rec_ptr, &rec) == -1) { goto fail; } /* now read the full record */ data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len + rec.data_len); if (!data) { goto fail; } key.dptr = data; key.dsize = rec.key_len; dbuf.dptr = data + rec.key_len; dbuf.dsize = rec.data_len; count++; if (fn && fn(tdb, key, dbuf, state) != 0) { /* they want us to stop traversing */ free(data); tdb_unlock(tdb, BUCKET(h)); return count; } /* a miss - drat */ free(data); /* move to the next record */ rec_ptr = rec.next; } tdb_unlock(tdb, BUCKET(h)); } /* return the number traversed */ return count; fail: tdb_unlock(tdb, BUCKET(h)); return -1; } /* find the first entry in the database and return its key */ TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb) { tdb_off offset, rec_ptr; struct list_struct rec; unsigned hash; TDB_DATA ret; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_firstkey() called with null context\n"); #endif return null_data; } /* look for a non-empty hash chain */ for (hash = 0, rec_ptr = 0; hash < tdb->header.hash_size; hash++) { /* find the top of the hash chain */ offset = tdb_hash_top(tdb, hash); tdb_lock(tdb, BUCKET(hash), F_RDLCK); /* read in the hash top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) { goto fail; } if (rec_ptr) break; tdb_unlock(tdb, BUCKET(hash)); } if (rec_ptr == 0) return null_data; /* we've found a non-empty chain, now read the record */ if (rec_read(tdb, rec_ptr, &rec) == -1) { goto fail; } /* allocate and read the key space */ ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); ret.dsize = rec.key_len; tdb_unlock(tdb, BUCKET(hash)); return ret; fail: tdb_unlock(tdb, BUCKET(hash)); return null_data; } /* find the next entry in the database, returning its key */ TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key) { unsigned hash, hbucket; tdb_off rec_ptr, offset; struct list_struct rec; TDB_DATA ret; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_nextkey() called with null context\n"); #endif return null_data; } /* find which hash bucket it is in */ hash = tdb_hash(&key); hbucket = BUCKET(hash); tdb_lock(tdb, hbucket, F_RDLCK); rec_ptr = tdb_find(tdb, key, hash, &rec); if (rec_ptr) { /* we want the next record after this one */ rec_ptr = rec.next; } /* not found or last in hash: look for next non-empty hash chain */ while (rec_ptr == 0) { tdb_unlock(tdb, hbucket); if (++hbucket >= tdb->header.hash_size - 1) return null_data; offset = tdb_hash_top(tdb, hbucket); tdb_lock(tdb, hbucket, F_RDLCK); /* read in the hash top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) { tdb_unlock(tdb, hbucket); return null_data; } } /* Read the record. */ if (rec_read(tdb, rec_ptr, &rec) == -1) { tdb_unlock(tdb, hbucket); return null_data; } /* allocate and read the key */ ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); ret.dsize = rec.key_len; tdb_unlock(tdb, hbucket); return ret; } /* delete an entry in the database given a key */ int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key) { unsigned hash; tdb_off offset, rec_ptr, last_ptr; struct list_struct rec, lastrec; char *data = NULL; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_delete() called with null context\n"); #endif return -1; } /* find which hash bucket it is in */ hash = tdb_hash(&key); tdb_lock(tdb, BUCKET(hash), F_WRLCK); /* find the top of the hash chain */ offset = tdb_hash_top(tdb, hash); /* read in the hash top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) { goto fail; } last_ptr = 0; /* keep looking until we find the right record */ while (rec_ptr) { if (rec_read(tdb, rec_ptr, &rec) == -1) { goto fail; } if (hash == rec.full_hash && key.dsize == rec.key_len) { /* a very likely hit - read the record and full key */ data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); if (!data) { goto fail; } if (memcmp(key.dptr, data, key.dsize) == 0) { /* a definite match - delete it */ if (last_ptr == 0) { offset = tdb_hash_top(tdb, hash); if (ofs_write(tdb, offset, &rec.next) == -1) { goto fail; } } else { lastrec.next = rec.next; if (rec_write(tdb, last_ptr, &lastrec) == -1) { goto fail; } } tdb_unlock(tdb, BUCKET(hash)); tdb_lock(tdb, -1, F_WRLCK); /* and recover the space */ offset = FREELIST_TOP; if (ofs_read(tdb, offset, &rec.next) == -1) { goto fail2; } rec.magic = TDB_FREE_MAGIC; if (rec_write(tdb, rec_ptr, &rec) == -1) { goto fail2; } if (ofs_write(tdb, offset, &rec_ptr) == -1) { goto fail2; } /* yipee - all done */ free(data); tdb_unlock(tdb, -1); return 0; } /* a miss - drat */ free(data); data = NULL; } /* move to the next record */ last_ptr = rec_ptr; lastrec = rec; rec_ptr = rec.next; } fail: if (data) free(data); tdb_unlock(tdb, BUCKET(hash)); return -1; fail2: if (data) free(data); tdb_unlock(tdb, -1); return -1; } /* store an element in the database, replacing any existing element with the same key return 0 on success, -1 on failure */ int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) { struct list_struct rec; unsigned hash; tdb_off rec_ptr, offset; char *p = NULL; if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_store() called with null context\n"); #endif return -1; } /* find which hash bucket it is in */ hash = tdb_hash(&key); tdb_lock(tdb, BUCKET(hash), F_WRLCK); /* check for it existing, on insert. */ if (flag == TDB_INSERT && tdb_exists(tdb, key)) { tdb->ecode = TDB_ERR_EXISTS; goto fail; } /* first try in-place update, on modify or replace. */ if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) { tdb_unlock(tdb, BUCKET(hash)); return 0; } /* check for it _not_ existing, from error code of the update. */ if (flag == TDB_MODIFY && tdb->ecode == TDB_ERR_NOEXIST) { goto fail; } /* reset the error code potentially set by the tdb_update() */ tdb->ecode = 0; /* * now we're into insert / modify / replace of a record * which we know could not be optimised by an in-place * store (for various reasons). */ rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize); if (rec_ptr == 0) { goto fail; } /* delete any existing record - if it doesn't exist we don't care */ if (flag != TDB_INSERT) { tdb_delete(tdb, key); } /* read the newly created record */ if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { goto fail; } if (rec.magic != TDB_FREE_MAGIC) goto fail; /* find the top of the hash chain */ offset = tdb_hash_top(tdb, hash); /* read in the hash top diretcly into our next pointer */ if (ofs_read(tdb, offset, &rec.next) == -1) { goto fail; } rec.key_len = key.dsize; rec.data_len = dbuf.dsize; rec.full_hash = hash; rec.magic = TDB_MAGIC; p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize); if (!p) { tdb->ecode = TDB_ERR_OOM; goto fail; } memcpy(p, &rec, sizeof(rec)); memcpy(p+sizeof(rec), key.dptr, key.dsize); if (dbuf.dsize) memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize); if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1) goto fail; free(p); p = NULL; /* and point the top of the hash chain at it */ if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail; tdb_unlock(tdb, BUCKET(hash)); return 0; fail: #if TDB_DEBUG printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash)); #endif if (p) free(p); tdb_unlock(tdb, BUCKET(hash)); return -1; } /* open the database, creating it if necessary The open_flags and mode are passed straight to the open call on the database file. A flags value of O_WRONLY is invalid The hash size is advisory, use zero for a default value. return is NULL on error */ TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags, int open_flags, mode_t mode) { TDB_CONTEXT tdb, *ret; struct stat st; memset(&tdb, 0, sizeof(tdb)); tdb.fd = -1; tdb.name = NULL; tdb.map_ptr = NULL; tdb.flags = tdb_flags; if ((open_flags & O_ACCMODE) == O_WRONLY) { goto fail; } if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE; tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY); /* internal databases don't use mmap or locking, and start off cleared */ if (tdb.flags & TDB_INTERNAL) { tdb.flags |= (TDB_NOLOCK | TDB_NOMMAP); tdb.flags &= ~TDB_CLEAR_IF_FIRST; } /* read only databases don't do locking */ if (tdb.read_only) tdb.flags |= TDB_NOLOCK; if (tdb.flags & TDB_INTERNAL) { tdb_new_database(&tdb, hash_size); goto internal; } tdb.fd = open(name, open_flags, mode); if (tdb.fd == -1) { goto fail; } /* ensure there is only one process initialising at once */ tdb_brlock(&tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW); if (tdb_flags & TDB_CLEAR_IF_FIRST) { /* we need to zero the database if we are the only one with it open */ if (tdb_brlock(&tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK) == 0) { ftruncate(tdb.fd, 0); tdb_brlock(&tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK); } } /* leave this lock in place */ tdb_brlock(&tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW); if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) || strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 || tdb.header.version != TDB_VERSION) { /* its not a valid database - possibly initialise it */ if (!(open_flags & O_CREAT)) { goto fail; } if (tdb_new_database(&tdb, hash_size) == -1) goto fail; lseek(tdb.fd, 0, SEEK_SET); if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header)) goto fail; } fstat(tdb.fd, &st); /* map the database and fill in the return structure */ tdb.name = (char *)strdup(name); tdb.map_size = st.st_size; tdb.locked = (struct tdb_lock_type *)calloc(tdb.header.hash_size+1, sizeof(tdb.locked[0])); if (!tdb.locked) { goto fail; } #if HAVE_MMAP if (!(tdb.flags & TDB_NOMMAP)) { tdb.map_ptr = (void *)mmap(NULL, st.st_size, tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FILE, tdb.fd, 0); } #endif internal: ret = (TDB_CONTEXT *)malloc(sizeof(tdb)); if (!ret) goto fail; *ret = tdb; #if TDB_DEBUG printf("mapped database of hash_size %u map_size=%u\n", hash_size, tdb.map_size); #endif tdb_brlock(&tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW); return ret; fail: if (tdb.name) free(tdb.name); if (tdb.fd != -1) close(tdb.fd); if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size); return NULL; } /* close a database */ int tdb_close(TDB_CONTEXT *tdb) { if (!tdb) return -1; if (tdb->name) free(tdb->name); if (tdb->fd != -1) close(tdb->fd); if (tdb->locked) free(tdb->locked); if (tdb->map_ptr) { if (tdb->flags & TDB_INTERNAL) { free(tdb->map_ptr); } else { munmap(tdb->map_ptr, tdb->map_size); } } memset(tdb, 0, sizeof(*tdb)); free(tdb); return 0; } /* lock one hash chain. This is meant to be used to reduce locking contention - it cannot guarantee how many records will be locked */ int tdb_lockchain(TDB_CONTEXT *tdb, TDB_DATA key) { if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_lockchain() called with null context\n"); #endif return -1; } return tdb_lock(tdb, BUCKET(tdb_hash(&key)), F_WRLCK); } /* unlock one hash chain */ int tdb_unlockchain(TDB_CONTEXT *tdb, TDB_DATA key) { if (tdb == NULL) { #ifdef TDB_DEBUG printf("tdb_unlockchain() called with null context\n"); #endif return -1; } return tdb_unlock(tdb, BUCKET(tdb_hash(&key))); } #if TDB_DEBUG void tdb_printfreelist(TDB_CONTEXT *tdb) { tdb_off offset, rec_ptr, last_ptr; struct list_struct rec, lastrec, newrec; tdb_lock(tdb, -1, F_WRLCK); last_ptr = 0; offset = FREELIST_TOP; /* read in the freelist top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) { return; } while (rec_ptr) { if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { return; } if (rec.magic != TDB_FREE_MAGIC) { printf("bad magic 0x%08x in free list\n", rec.magic); return; } printf("entry offset=[0x%08x], rec.rec_len = [0x%08x]\n", rec.next, rec.rec_len ); /* move to the next record */ rec_ptr = rec.next; } tdb_unlock(tdb, -1); } #endif