/*
   Trivial Database 2: fetch, store and misc routines.
   Copyright (C) Rusty Russell 2010

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "private.h"
#ifndef HAVE_LIBREPLACE
#include <ccan/asprintf/asprintf.h>
#include <stdarg.h>
#endif

static enum NTDB_ERROR update_rec_hdr(struct ntdb_context *ntdb,
				     ntdb_off_t off,
				     ntdb_len_t keylen,
				     ntdb_len_t datalen,
				     struct ntdb_used_record *rec)
{
	uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
	enum NTDB_ERROR ecode;

	ecode = set_header(ntdb, rec, NTDB_USED_MAGIC, keylen, datalen,
			   keylen + dataroom);
	if (ecode == NTDB_SUCCESS) {
		ecode = ntdb_write_convert(ntdb, off, rec, sizeof(*rec));
	}
	return ecode;
}

static enum NTDB_ERROR replace_data(struct ntdb_context *ntdb,
				   struct hash_info *h,
				   NTDB_DATA key, NTDB_DATA dbuf,
				   ntdb_off_t old_off, ntdb_len_t old_room,
				   bool growing)
{
	ntdb_off_t new_off;
	enum NTDB_ERROR ecode;

	/* Allocate a new record. */
	new_off = alloc(ntdb, key.dsize, dbuf.dsize, NTDB_USED_MAGIC, growing);
	if (NTDB_OFF_IS_ERR(new_off)) {
		return NTDB_OFF_TO_ERR(new_off);
	}

	/* We didn't like the existing one: remove it. */
	if (old_off) {
		ntdb->stats.frees++;
		ecode = add_free_record(ntdb, old_off,
					sizeof(struct ntdb_used_record)
					+ key.dsize + old_room,
					NTDB_LOCK_WAIT, true);
		if (ecode == NTDB_SUCCESS)
			ecode = replace_in_hash(ntdb, h, new_off);
	} else {
		ecode = add_to_hash(ntdb, h, new_off);
	}
	if (ecode != NTDB_SUCCESS) {
		return ecode;
	}

	new_off += sizeof(struct ntdb_used_record);
	ecode = ntdb->io->twrite(ntdb, new_off, key.dptr, key.dsize);
	if (ecode != NTDB_SUCCESS) {
		return ecode;
	}

	new_off += key.dsize;
	ecode = ntdb->io->twrite(ntdb, new_off, dbuf.dptr, dbuf.dsize);
	if (ecode != NTDB_SUCCESS) {
		return ecode;
	}

	if (ntdb->flags & NTDB_SEQNUM)
		ntdb_inc_seqnum(ntdb);

	return NTDB_SUCCESS;
}

static enum NTDB_ERROR update_data(struct ntdb_context *ntdb,
				  ntdb_off_t off,
				  NTDB_DATA dbuf,
				  ntdb_len_t extra)
{
	enum NTDB_ERROR ecode;

	ecode = ntdb->io->twrite(ntdb, off, dbuf.dptr, dbuf.dsize);
	if (ecode == NTDB_SUCCESS && extra) {
		/* Put a zero in; future versions may append other data. */
		ecode = ntdb->io->twrite(ntdb, off + dbuf.dsize, "", 1);
	}
	if (ntdb->flags & NTDB_SEQNUM)
		ntdb_inc_seqnum(ntdb);

	return ecode;
}

_PUBLIC_ enum NTDB_ERROR ntdb_store(struct ntdb_context *ntdb,
			 NTDB_DATA key, NTDB_DATA dbuf, int flag)
{
	struct hash_info h;
	ntdb_off_t off;
	ntdb_len_t old_room = 0;
	struct ntdb_used_record rec;
	enum NTDB_ERROR ecode;

	off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
	if (NTDB_OFF_IS_ERR(off)) {
		return NTDB_OFF_TO_ERR(off);
	}

	/* Now we have lock on this hash bucket. */
	if (flag == NTDB_INSERT) {
		if (off) {
			ecode = NTDB_ERR_EXISTS;
			goto out;
		}
	} else {
		if (off) {
			old_room = rec_data_length(&rec)
				+ rec_extra_padding(&rec);
			if (old_room >= dbuf.dsize) {
				/* Can modify in-place.  Easy! */
				ecode = update_rec_hdr(ntdb, off,
						       key.dsize, dbuf.dsize,
						       &rec);
				if (ecode != NTDB_SUCCESS) {
					goto out;
				}
				ecode = update_data(ntdb,
						    off + sizeof(rec)
						    + key.dsize, dbuf,
						    old_room - dbuf.dsize);
				if (ecode != NTDB_SUCCESS) {
					goto out;
				}
				ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
				return NTDB_SUCCESS;
			}
		} else {
			if (flag == NTDB_MODIFY) {
				/* if the record doesn't exist and we
				   are in NTDB_MODIFY mode then we should fail
				   the store */
				ecode = NTDB_ERR_NOEXIST;
				goto out;
			}
		}
	}

	/* If we didn't use the old record, this implies we're growing. */
	ecode = replace_data(ntdb, &h, key, dbuf, off, old_room, off);
out:
	ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
	return ecode;
}

_PUBLIC_ enum NTDB_ERROR ntdb_append(struct ntdb_context *ntdb,
			  NTDB_DATA key, NTDB_DATA dbuf)
{
	struct hash_info h;
	ntdb_off_t off;
	struct ntdb_used_record rec;
	ntdb_len_t old_room = 0, old_dlen;
	unsigned char *newdata;
	NTDB_DATA new_dbuf;
	enum NTDB_ERROR ecode;

	off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
	if (NTDB_OFF_IS_ERR(off)) {
		return NTDB_OFF_TO_ERR(off);
	}

	if (off) {
		old_dlen = rec_data_length(&rec);
		old_room = old_dlen + rec_extra_padding(&rec);

		/* Fast path: can append in place. */
		if (rec_extra_padding(&rec) >= dbuf.dsize) {
			ecode = update_rec_hdr(ntdb, off, key.dsize,
					       old_dlen + dbuf.dsize, &rec);
			if (ecode != NTDB_SUCCESS) {
				goto out;
			}

			off += sizeof(rec) + key.dsize + old_dlen;
			ecode = update_data(ntdb, off, dbuf,
					    rec_extra_padding(&rec));
			goto out;
		}

		/* Slow path. */
		newdata = ntdb->alloc_fn(ntdb, key.dsize + old_dlen + dbuf.dsize,
				     ntdb->alloc_data);
		if (!newdata) {
			ecode = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
					   "ntdb_append:"
					   " failed to allocate %zu bytes",
					   (size_t)(key.dsize + old_dlen
						    + dbuf.dsize));
			goto out;
		}
		ecode = ntdb->io->tread(ntdb, off + sizeof(rec) + key.dsize,
				       newdata, old_dlen);
		if (ecode != NTDB_SUCCESS) {
			goto out_free_newdata;
		}
		memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
		new_dbuf.dptr = newdata;
		new_dbuf.dsize = old_dlen + dbuf.dsize;
	} else {
		newdata = NULL;
		new_dbuf = dbuf;
	}

	/* If they're using ntdb_append(), it implies they're growing record. */
	ecode = replace_data(ntdb, &h, key, new_dbuf, off, old_room, true);

out_free_newdata:
	ntdb->free_fn(newdata, ntdb->alloc_data);
out:
	ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
	return ecode;
}

_PUBLIC_ enum NTDB_ERROR ntdb_fetch(struct ntdb_context *ntdb, NTDB_DATA key,
				    NTDB_DATA *data)
{
	ntdb_off_t off;
	struct ntdb_used_record rec;
	struct hash_info h;
	enum NTDB_ERROR ecode;
	const char *keyp;

	off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
	if (NTDB_OFF_IS_ERR(off)) {
		return NTDB_OFF_TO_ERR(off);
	}

	if (!off) {
		ecode = NTDB_ERR_NOEXIST;
	} else {
		data->dsize = rec_data_length(&rec);
		data->dptr = ntdb->alloc_fn(ntdb, data->dsize, ntdb->alloc_data);
		if (unlikely(!data->dptr)) {
			ecode = NTDB_ERR_OOM;
		} else {
			memcpy(data->dptr, keyp + key.dsize, data->dsize);
			ecode = NTDB_SUCCESS;
		}
		ntdb_access_release(ntdb, keyp);
	}

	ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
	return ecode;
}

_PUBLIC_ bool ntdb_exists(struct ntdb_context *ntdb, NTDB_DATA key)
{
	ntdb_off_t off;
	struct ntdb_used_record rec;
	struct hash_info h;

	off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, NULL);
	if (NTDB_OFF_IS_ERR(off)) {
		return false;
	}
	ntdb_unlock_hash(ntdb, h.h, F_RDLCK);

	return off ? true : false;
}

_PUBLIC_ enum NTDB_ERROR ntdb_delete(struct ntdb_context *ntdb, NTDB_DATA key)
{
	ntdb_off_t off;
	struct ntdb_used_record rec;
	struct hash_info h;
	enum NTDB_ERROR ecode;

	off = find_and_lock(ntdb, key, F_WRLCK, &h, &rec, NULL);
	if (NTDB_OFF_IS_ERR(off)) {
		return NTDB_OFF_TO_ERR(off);
	}

	if (!off) {
		ecode = NTDB_ERR_NOEXIST;
		goto unlock;
	}

	ecode = delete_from_hash(ntdb, &h);
	if (ecode != NTDB_SUCCESS) {
		goto unlock;
	}

	/* Free the deleted entry. */
	ntdb->stats.frees++;
	ecode = add_free_record(ntdb, off,
				sizeof(struct ntdb_used_record)
				+ rec_key_length(&rec)
				+ rec_data_length(&rec)
				+ rec_extra_padding(&rec),
				NTDB_LOCK_WAIT, true);

	if (ntdb->flags & NTDB_SEQNUM)
		ntdb_inc_seqnum(ntdb);

unlock:
	ntdb_unlock_hash(ntdb, h.h, F_WRLCK);
	return ecode;
}

_PUBLIC_ unsigned int ntdb_get_flags(struct ntdb_context *ntdb)
{
	return ntdb->flags;
}

static bool inside_transaction(const struct ntdb_context *ntdb)
{
	return ntdb->transaction != NULL;
}

static bool readonly_changable(struct ntdb_context *ntdb, const char *caller)
{
	if (inside_transaction(ntdb)) {
		ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
			    "%s: can't change"
			    " NTDB_RDONLY inside transaction",
			    caller);
		return false;
	}
	return true;
}

_PUBLIC_ void ntdb_add_flag(struct ntdb_context *ntdb, unsigned flag)
{
	if (ntdb->flags & NTDB_INTERNAL) {
		ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
			    "ntdb_add_flag: internal db");
		return;
	}
	switch (flag) {
	case NTDB_NOLOCK:
		ntdb->flags |= NTDB_NOLOCK;
		break;
	case NTDB_NOMMAP:
		if (ntdb->file->direct_count) {
			ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
				    "ntdb_add_flag: Can't get NTDB_NOMMAP from"
				    " ntdb_parse_record!");
			return;
		}
		ntdb->flags |= NTDB_NOMMAP;
#ifndef HAVE_INCOHERENT_MMAP
		ntdb_munmap(ntdb);
#endif
		break;
	case NTDB_NOSYNC:
		ntdb->flags |= NTDB_NOSYNC;
		break;
	case NTDB_SEQNUM:
		ntdb->flags |= NTDB_SEQNUM;
		break;
	case NTDB_ALLOW_NESTING:
		ntdb->flags |= NTDB_ALLOW_NESTING;
		break;
	case NTDB_RDONLY:
		if (readonly_changable(ntdb, "ntdb_add_flag"))
			ntdb->flags |= NTDB_RDONLY;
		break;
	default:
		ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
			    "ntdb_add_flag: Unknown flag %u", flag);
	}
}

_PUBLIC_ void ntdb_remove_flag(struct ntdb_context *ntdb, unsigned flag)
{
	if (ntdb->flags & NTDB_INTERNAL) {
		ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
			    "ntdb_remove_flag: internal db");
		return;
	}
	switch (flag) {
	case NTDB_NOLOCK:
		ntdb->flags &= ~NTDB_NOLOCK;
		break;
	case NTDB_NOMMAP:
		ntdb->flags &= ~NTDB_NOMMAP;
#ifndef HAVE_INCOHERENT_MMAP
		/* If mmap incoherent, we were mmaping anyway. */
		ntdb_mmap(ntdb);
#endif
		break;
	case NTDB_NOSYNC:
		ntdb->flags &= ~NTDB_NOSYNC;
		break;
	case NTDB_SEQNUM:
		ntdb->flags &= ~NTDB_SEQNUM;
		break;
	case NTDB_ALLOW_NESTING:
		ntdb->flags &= ~NTDB_ALLOW_NESTING;
		break;
	case NTDB_RDONLY:
		if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) {
			ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
				    "ntdb_remove_flag: can't"
				    " remove NTDB_RDONLY on ntdb"
				    " opened with O_RDONLY");
			break;
		}
		if (readonly_changable(ntdb, "ntdb_remove_flag"))
			ntdb->flags &= ~NTDB_RDONLY;
		break;
	default:
		ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
			    "ntdb_remove_flag: Unknown flag %u",
			    flag);
	}
}

_PUBLIC_ const char *ntdb_errorstr(enum NTDB_ERROR ecode)
{
	/* Gcc warns if you miss a case in the switch, so use that. */
	switch (NTDB_ERR_TO_OFF(ecode)) {
	case NTDB_ERR_TO_OFF(NTDB_SUCCESS): return "Success";
	case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT): return "Corrupt database";
	case NTDB_ERR_TO_OFF(NTDB_ERR_IO): return "IO Error";
	case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK): return "Locking error";
	case NTDB_ERR_TO_OFF(NTDB_ERR_OOM): return "Out of memory";
	case NTDB_ERR_TO_OFF(NTDB_ERR_EXISTS): return "Record exists";
	case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL): return "Invalid parameter";
	case NTDB_ERR_TO_OFF(NTDB_ERR_NOEXIST): return "Record does not exist";
	case NTDB_ERR_TO_OFF(NTDB_ERR_RDONLY): return "write not permitted";
	}
	return "Invalid error code";
}

enum NTDB_ERROR COLD ntdb_logerr(struct ntdb_context *ntdb,
			       enum NTDB_ERROR ecode,
			       enum ntdb_log_level level,
			       const char *fmt, ...)
{
	char *message;
	va_list ap;
	size_t len;
	/* ntdb_open paths care about errno, so save it. */
	int saved_errno = errno;

	if (!ntdb->log_fn)
		return ecode;

	va_start(ap, fmt);
	len = vsnprintf(NULL, 0, fmt, ap);
	va_end(ap);

	message = ntdb->alloc_fn(ntdb, len + 1, ntdb->alloc_data);
	if (!message) {
		ntdb->log_fn(ntdb, NTDB_LOG_ERROR, NTDB_ERR_OOM,
			    "out of memory formatting message:", ntdb->log_data);
		ntdb->log_fn(ntdb, level, ecode, fmt, ntdb->log_data);
	} else {
		va_start(ap, fmt);
		vsnprintf(message, len+1, fmt, ap);
		va_end(ap);
		ntdb->log_fn(ntdb, level, ecode, message, ntdb->log_data);
		ntdb->free_fn(message, ntdb->alloc_data);
	}
	errno = saved_errno;
	return ecode;
}

_PUBLIC_ enum NTDB_ERROR ntdb_parse_record_(struct ntdb_context *ntdb,
				 NTDB_DATA key,
				 enum NTDB_ERROR (*parse)(NTDB_DATA k,
							 NTDB_DATA d,
							 void *data),
				 void *data)
{
	ntdb_off_t off;
	struct ntdb_used_record rec;
	struct hash_info h;
	enum NTDB_ERROR ecode;
	const char *keyp;

	off = find_and_lock(ntdb, key, F_RDLCK, &h, &rec, &keyp);
	if (NTDB_OFF_IS_ERR(off)) {
		return NTDB_OFF_TO_ERR(off);
	}

	if (!off) {
		ecode = NTDB_ERR_NOEXIST;
	} else {
		unsigned int old_flags;
		NTDB_DATA d = ntdb_mkdata(keyp + key.dsize,
					  rec_data_length(&rec));

		/*
		 * Make sure they don't try to write db, since they
		 * have read lock!  They can if they've done
		 * ntdb_lockall(): if it was ntdb_lockall_read, that'll
		 * stop them doing a write operation anyway.
		 */
		old_flags = ntdb->flags;
		if (!ntdb->file->allrecord_lock.count &&
		    !(ntdb->flags & NTDB_NOLOCK)) {
			ntdb->flags |= NTDB_RDONLY;
		}
		ecode = parse(key, d, data);
		ntdb->flags = old_flags;
		ntdb_access_release(ntdb, keyp);
	}

	ntdb_unlock_hash(ntdb, h.h, F_RDLCK);
	return ecode;
}

_PUBLIC_ const char *ntdb_name(const struct ntdb_context *ntdb)
{
	return ntdb->name;
}

_PUBLIC_ int64_t ntdb_get_seqnum(struct ntdb_context *ntdb)
{
	return ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
}


_PUBLIC_ int ntdb_fd(const struct ntdb_context *ntdb)
{
	return ntdb->file->fd;
}

struct traverse_state {
	enum NTDB_ERROR error;
	struct ntdb_context *dest_db;
};

/*
  traverse function for repacking
 */
static int repack_traverse(struct ntdb_context *ntdb, NTDB_DATA key, NTDB_DATA data,
			   struct traverse_state *state)
{
	state->error = ntdb_store(state->dest_db, key, data, NTDB_INSERT);
	if (state->error != NTDB_SUCCESS) {
		return -1;
	}
	return 0;
}

_PUBLIC_ enum NTDB_ERROR ntdb_repack(struct ntdb_context *ntdb)
{
	struct ntdb_context *tmp_db;
	struct traverse_state state;

	state.error = ntdb_transaction_start(ntdb);
	if (state.error != NTDB_SUCCESS) {
		return state.error;
	}

	tmp_db = ntdb_open("tmpdb", NTDB_INTERNAL, O_RDWR|O_CREAT, 0, NULL);
	if (tmp_db == NULL) {
		state.error = ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
					 __location__
					 " Failed to create tmp_db");
		ntdb_transaction_cancel(ntdb);
		return state.error;
	}

	state.dest_db = tmp_db;
	if (ntdb_traverse(ntdb, repack_traverse, &state) < 0) {
		goto fail;
	}

	state.error = ntdb_wipe_all(ntdb);
	if (state.error != NTDB_SUCCESS) {
		goto fail;
	}

	state.dest_db = ntdb;
	if (ntdb_traverse(tmp_db, repack_traverse, &state) < 0) {
		goto fail;
	}

	ntdb_close(tmp_db);
	return ntdb_transaction_commit(ntdb);

fail:
	ntdb_transaction_cancel(ntdb);
	ntdb_close(tmp_db);
	return state.error;
}