/* Trivial Database 2: fetch, store and misc routines. Copyright (C) Rusty Russell 2010 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "private.h" #ifndef HAVE_LIBREPLACE #include <ccan/asprintf/asprintf.h> #include <stdarg.h> #endif static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb, tdb_off_t off, tdb_len_t keylen, tdb_len_t datalen, struct tdb_used_record *rec, uint64_t h) { uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); enum TDB_ERROR ecode; ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen, keylen + dataroom, h); if (ecode == TDB_SUCCESS) { ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec)); } return ecode; } static enum TDB_ERROR replace_data(struct tdb_context *tdb, struct hash_info *h, struct tdb_data key, struct tdb_data dbuf, tdb_off_t old_off, tdb_len_t old_room, bool growing) { tdb_off_t new_off; enum TDB_ERROR ecode; /* Allocate a new record. */ new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC, growing); if (TDB_OFF_IS_ERR(new_off)) { return TDB_OFF_TO_ERR(new_off); } /* We didn't like the existing one: remove it. */ if (old_off) { tdb->stats.frees++; ecode = add_free_record(tdb, old_off, sizeof(struct tdb_used_record) + key.dsize + old_room, TDB_LOCK_WAIT, true); if (ecode == TDB_SUCCESS) ecode = replace_in_hash(tdb, h, new_off); } else { ecode = add_to_hash(tdb, h, new_off); } if (ecode != TDB_SUCCESS) { return ecode; } new_off += sizeof(struct tdb_used_record); ecode = tdb->tdb2.io->twrite(tdb, new_off, key.dptr, key.dsize); if (ecode != TDB_SUCCESS) { return ecode; } new_off += key.dsize; ecode = tdb->tdb2.io->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize); if (ecode != TDB_SUCCESS) { return ecode; } if (tdb->flags & TDB_SEQNUM) tdb_inc_seqnum(tdb); return TDB_SUCCESS; } static enum TDB_ERROR update_data(struct tdb_context *tdb, tdb_off_t off, struct tdb_data dbuf, tdb_len_t extra) { enum TDB_ERROR ecode; ecode = tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize); if (ecode == TDB_SUCCESS && extra) { /* Put a zero in; future versions may append other data. */ ecode = tdb->tdb2.io->twrite(tdb, off + dbuf.dsize, "", 1); } if (tdb->flags & TDB_SEQNUM) tdb_inc_seqnum(tdb); return ecode; } enum TDB_ERROR tdb_store(struct tdb_context *tdb, struct tdb_data key, struct tdb_data dbuf, int flag) { struct hash_info h; tdb_off_t off; tdb_len_t old_room = 0; struct tdb_used_record rec; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) { if (tdb1_store(tdb, key, dbuf, flag) == -1) return tdb->last_error; return TDB_SUCCESS; } off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } /* Now we have lock on this hash bucket. */ if (flag == TDB_INSERT) { if (off) { ecode = TDB_ERR_EXISTS; goto out; } } else { if (off) { old_room = rec_data_length(&rec) + rec_extra_padding(&rec); if (old_room >= dbuf.dsize) { /* Can modify in-place. Easy! */ ecode = update_rec_hdr(tdb, off, key.dsize, dbuf.dsize, &rec, h.h); if (ecode != TDB_SUCCESS) { goto out; } ecode = update_data(tdb, off + sizeof(rec) + key.dsize, dbuf, old_room - dbuf.dsize); if (ecode != TDB_SUCCESS) { goto out; } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = TDB_SUCCESS; } } else { if (flag == TDB_MODIFY) { /* if the record doesn't exist and we are in TDB_MODIFY mode then we should fail the store */ ecode = TDB_ERR_NOEXIST; goto out; } } } /* If we didn't use the old record, this implies we're growing. */ ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off); out: tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = ecode; } enum TDB_ERROR tdb_append(struct tdb_context *tdb, struct tdb_data key, struct tdb_data dbuf) { struct hash_info h; tdb_off_t off; struct tdb_used_record rec; tdb_len_t old_room = 0, old_dlen; unsigned char *newdata; struct tdb_data new_dbuf; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) { if (tdb1_append(tdb, key, dbuf) == -1) return tdb->last_error; return TDB_SUCCESS; } off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } if (off) { old_dlen = rec_data_length(&rec); old_room = old_dlen + rec_extra_padding(&rec); /* Fast path: can append in place. */ if (rec_extra_padding(&rec) >= dbuf.dsize) { ecode = update_rec_hdr(tdb, off, key.dsize, old_dlen + dbuf.dsize, &rec, h.h); if (ecode != TDB_SUCCESS) { goto out; } off += sizeof(rec) + key.dsize + old_dlen; ecode = update_data(tdb, off, dbuf, rec_extra_padding(&rec)); goto out; } /* Slow path. */ newdata = malloc(key.dsize + old_dlen + dbuf.dsize); if (!newdata) { ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, "tdb_append:" " failed to allocate %zu bytes", (size_t)(key.dsize + old_dlen + dbuf.dsize)); goto out; } ecode = tdb->tdb2.io->tread(tdb, off + sizeof(rec) + key.dsize, newdata, old_dlen); if (ecode != TDB_SUCCESS) { goto out_free_newdata; } memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize); new_dbuf.dptr = newdata; new_dbuf.dsize = old_dlen + dbuf.dsize; } else { newdata = NULL; new_dbuf = dbuf; } /* If they're using tdb_append(), it implies they're growing record. */ ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true); out_free_newdata: free(newdata); out: tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = ecode; } enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key, struct tdb_data *data) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) return tdb1_fetch(tdb, key, data); off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } if (!off) { ecode = TDB_ERR_NOEXIST; } else { data->dsize = rec_data_length(&rec); data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize, data->dsize); if (TDB_PTR_IS_ERR(data->dptr)) { ecode = TDB_PTR_ERR(data->dptr); } else ecode = TDB_SUCCESS; } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); return tdb->last_error = ecode; } bool tdb_exists(struct tdb_context *tdb, TDB_DATA key) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; if (tdb->flags & TDB_VERSION1) { return tdb1_exists(tdb, key); } off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { tdb->last_error = TDB_OFF_TO_ERR(off); return false; } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); tdb->last_error = TDB_SUCCESS; return off ? true : false; } enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) { if (tdb1_delete(tdb, key) == -1) return tdb->last_error; return TDB_SUCCESS; } off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } if (!off) { ecode = TDB_ERR_NOEXIST; goto unlock; } ecode = delete_from_hash(tdb, &h); if (ecode != TDB_SUCCESS) { goto unlock; } /* Free the deleted entry. */ tdb->stats.frees++; ecode = add_free_record(tdb, off, sizeof(struct tdb_used_record) + rec_key_length(&rec) + rec_data_length(&rec) + rec_extra_padding(&rec), TDB_LOCK_WAIT, true); if (tdb->flags & TDB_SEQNUM) tdb_inc_seqnum(tdb); unlock: tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = ecode; } unsigned int tdb_get_flags(struct tdb_context *tdb) { return tdb->flags; } static bool inside_transaction(const struct tdb_context *tdb) { if (tdb->flags & TDB_VERSION1) return tdb->tdb1.transaction != NULL; else return tdb->tdb2.transaction != NULL; } static bool readonly_changable(struct tdb_context *tdb, const char *caller) { if (inside_transaction(tdb)) { tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, "%s: can't change" " TDB_RDONLY inside transaction", caller); return false; } return true; } void tdb_add_flag(struct tdb_context *tdb, unsigned flag) { if (tdb->flags & TDB_INTERNAL) { tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, "tdb_add_flag: internal db"); return; } switch (flag) { case TDB_NOLOCK: tdb->flags |= TDB_NOLOCK; break; case TDB_NOMMAP: tdb->flags |= TDB_NOMMAP; tdb_munmap(tdb->file); break; case TDB_NOSYNC: tdb->flags |= TDB_NOSYNC; break; case TDB_SEQNUM: tdb->flags |= TDB_SEQNUM; break; case TDB_ALLOW_NESTING: tdb->flags |= TDB_ALLOW_NESTING; break; case TDB_RDONLY: if (readonly_changable(tdb, "tdb_add_flag")) tdb->flags |= TDB_RDONLY; break; default: tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, "tdb_add_flag: Unknown flag %u", flag); } } void tdb_remove_flag(struct tdb_context *tdb, unsigned flag) { if (tdb->flags & TDB_INTERNAL) { tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, "tdb_remove_flag: internal db"); return; } switch (flag) { case TDB_NOLOCK: tdb->flags &= ~TDB_NOLOCK; break; case TDB_NOMMAP: tdb->flags &= ~TDB_NOMMAP; tdb_mmap(tdb); break; case TDB_NOSYNC: tdb->flags &= ~TDB_NOSYNC; break; case TDB_SEQNUM: tdb->flags &= ~TDB_SEQNUM; break; case TDB_ALLOW_NESTING: tdb->flags &= ~TDB_ALLOW_NESTING; break; case TDB_RDONLY: if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) { tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, "tdb_remove_flag: can't" " remove TDB_RDONLY on tdb" " opened with O_RDONLY"); break; } if (readonly_changable(tdb, "tdb_remove_flag")) tdb->flags &= ~TDB_RDONLY; break; default: tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, "tdb_remove_flag: Unknown flag %u", flag); } } const char *tdb_errorstr(enum TDB_ERROR ecode) { /* Gcc warns if you miss a case in the switch, so use that. */ switch (TDB_ERR_TO_OFF(ecode)) { case TDB_ERR_TO_OFF(TDB_SUCCESS): return "Success"; case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT): return "Corrupt database"; case TDB_ERR_TO_OFF(TDB_ERR_IO): return "IO Error"; case TDB_ERR_TO_OFF(TDB_ERR_LOCK): return "Locking error"; case TDB_ERR_TO_OFF(TDB_ERR_OOM): return "Out of memory"; case TDB_ERR_TO_OFF(TDB_ERR_EXISTS): return "Record exists"; case TDB_ERR_TO_OFF(TDB_ERR_EINVAL): return "Invalid parameter"; case TDB_ERR_TO_OFF(TDB_ERR_NOEXIST): return "Record does not exist"; case TDB_ERR_TO_OFF(TDB_ERR_RDONLY): return "write not permitted"; } return "Invalid error code"; } enum TDB_ERROR tdb_error(struct tdb_context *tdb) { return tdb->last_error; } enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb, enum TDB_ERROR ecode, enum tdb_log_level level, const char *fmt, ...) { char *message; va_list ap; size_t len; /* tdb_open paths care about errno, so save it. */ int saved_errno = errno; if (!tdb->log_fn) return ecode; va_start(ap, fmt); len = vasprintf(&message, fmt, ap); va_end(ap); if (len < 0) { tdb->log_fn(tdb, TDB_LOG_ERROR, TDB_ERR_OOM, "out of memory formatting message:", tdb->log_data); tdb->log_fn(tdb, level, ecode, fmt, tdb->log_data); } else { tdb->log_fn(tdb, level, ecode, message, tdb->log_data); free(message); } errno = saved_errno; return ecode; } enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb, TDB_DATA key, enum TDB_ERROR (*parse)(TDB_DATA k, TDB_DATA d, void *data), void *data) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) { return tdb->last_error = tdb1_parse_record(tdb, key, parse, data); } off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } if (!off) { ecode = TDB_ERR_NOEXIST; } else { const void *dptr; dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize, rec_data_length(&rec), false); if (TDB_PTR_IS_ERR(dptr)) { ecode = TDB_PTR_ERR(dptr); } else { TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec)); ecode = parse(key, d, data); tdb_access_release(tdb, dptr); } } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); return tdb->last_error = ecode; } const char *tdb_name(const struct tdb_context *tdb) { return tdb->name; } int64_t tdb_get_seqnum(struct tdb_context *tdb) { tdb_off_t off; if (tdb->flags & TDB_VERSION1) { tdb1_off_t val; tdb->last_error = TDB_SUCCESS; val = tdb1_get_seqnum(tdb); if (tdb->last_error != TDB_SUCCESS) return TDB_ERR_TO_OFF(tdb->last_error); else return val; } off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum)); if (TDB_OFF_IS_ERR(off)) tdb->last_error = TDB_OFF_TO_ERR(off); else tdb->last_error = TDB_SUCCESS; return off; } int tdb_fd(const struct tdb_context *tdb) { return tdb->file->fd; } struct traverse_state { enum TDB_ERROR error; struct tdb_context *dest_db; }; /* traverse function for repacking */ static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, struct traverse_state *state) { state->error = tdb_store(state->dest_db, key, data, TDB_INSERT); if (state->error != TDB_SUCCESS) { return -1; } return 0; } enum TDB_ERROR tdb_repack(struct tdb_context *tdb) { struct tdb_context *tmp_db; struct traverse_state state; state.error = tdb_transaction_start(tdb); if (state.error != TDB_SUCCESS) { return state.error; } tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL); if (tmp_db == NULL) { state.error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, __location__ " Failed to create tmp_db"); tdb_transaction_cancel(tdb); return tdb->last_error = state.error; } state.dest_db = tmp_db; if (tdb_traverse(tdb, repack_traverse, &state) < 0) { goto fail; } state.error = tdb_wipe_all(tdb); if (state.error != TDB_SUCCESS) { goto fail; } state.dest_db = tdb; if (tdb_traverse(tmp_db, repack_traverse, &state) < 0) { goto fail; } tdb_close(tmp_db); return tdb_transaction_commit(tdb); fail: tdb_transaction_cancel(tdb); tdb_close(tmp_db); return state.error; }