From 16cc345d4f84367e70e133200f7aa335c2aae8c6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 18 Jun 2012 22:30:26 +0930 Subject: TDB2: Goodbye TDB2, Hello NTDB. This renames everything from tdb2 to ntdb: importantly, we no longer use the tdb_ namespace, so you can link against both ntdb and tdb if you want to. This also enables building of standalone ntdb by the autobuild script. Signed-off-by: Rusty Russell --- lib/ntdb/io.c | 650 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 650 insertions(+) create mode 100644 lib/ntdb/io.c (limited to 'lib/ntdb/io.c') diff --git a/lib/ntdb/io.c b/lib/ntdb/io.c new file mode 100644 index 0000000000..4580520fa2 --- /dev/null +++ b/lib/ntdb/io.c @@ -0,0 +1,650 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the ntdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include +#include + +void ntdb_munmap(struct ntdb_file *file) +{ + if (file->fd == -1) + return; + + if (file->map_ptr) { + munmap(file->map_ptr, file->map_size); + file->map_ptr = NULL; + } +} + +enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb) +{ + int mmap_flags; + + if (ntdb->flags & NTDB_INTERNAL) + return NTDB_SUCCESS; + +#ifndef HAVE_INCOHERENT_MMAP + if (ntdb->flags & NTDB_NOMMAP) + return NTDB_SUCCESS; +#endif + + if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY) + mmap_flags = PROT_READ; + else + mmap_flags = PROT_READ | PROT_WRITE; + + /* size_t can be smaller than off_t. */ + if ((size_t)ntdb->file->map_size == ntdb->file->map_size) { + ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size, + mmap_flags, + MAP_SHARED, ntdb->file->fd, 0); + } else + ntdb->file->map_ptr = MAP_FAILED; + + /* + * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! + */ + if (ntdb->file->map_ptr == MAP_FAILED) { + ntdb->file->map_ptr = NULL; +#ifdef HAVE_INCOHERENT_MMAP + /* Incoherent mmap means everyone must mmap! */ + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_mmap failed for size %lld (%s)", + (long long)ntdb->file->map_size, + strerror(errno)); +#else + ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING, + "ntdb_mmap failed for size %lld (%s)", + (long long)ntdb->file->map_size, strerror(errno)); +#endif + } + return NTDB_SUCCESS; +} + +/* check for an out of bounds access - if it is out of bounds then + see if the database has been expanded by someone else and expand + if necessary + note that "len" is the minimum length needed for the db. + + If probe is true, len being too large isn't a failure. +*/ +static enum NTDB_ERROR ntdb_oob(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool probe) +{ + struct stat st; + enum NTDB_ERROR ecode; + + /* We can't hold pointers during this: we could unmap! */ + assert(!ntdb->direct_access + || (ntdb->flags & NTDB_NOLOCK) + || ntdb_has_expansion_lock(ntdb)); + + if (len + off < len) { + if (probe) + return NTDB_SUCCESS; + + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob off %llu len %llu wrap\n", + (long long)off, (long long)len); + } + + if (len + off <= ntdb->file->map_size) + return NTDB_SUCCESS; + if (ntdb->flags & NTDB_INTERNAL) { + if (probe) + return NTDB_SUCCESS; + + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob len %lld beyond internal" + " malloc size %lld", + (long long)(off + len), + (long long)ntdb->file->map_size); + return NTDB_ERR_IO; + } + + ecode = ntdb_lock_expand(ntdb, F_RDLCK); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (fstat(ntdb->file->fd, &st) != 0) { + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "Failed to fstat file: %s", strerror(errno)); + ntdb_unlock_expand(ntdb, F_RDLCK); + return NTDB_ERR_IO; + } + + ntdb_unlock_expand(ntdb, F_RDLCK); + + if (st.st_size < off + len) { + if (probe) + return NTDB_SUCCESS; + + ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_oob len %llu beyond eof at %llu", + (long long)(off + len), (long long)st.st_size); + return NTDB_ERR_IO; + } + + /* Unmap, update size, remap */ + ntdb_munmap(ntdb->file); + + ntdb->file->map_size = st.st_size; + return ntdb_mmap(ntdb); +} + +/* Endian conversion: we only ever deal with 8 byte quantities */ +void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size) +{ + assert(size % 8 == 0); + if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) { + uint64_t i, *p = (uint64_t *)buf; + for (i = 0; i < size / 8; i++) + p[i] = bswap_64(p[i]); + } + return buf; +} + +/* Return first non-zero offset in offset array, or end, or -ve error. */ +/* FIXME: Return the off? */ +uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb, + ntdb_off_t base, uint64_t start, uint64_t end) +{ + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t), + (end - start) * sizeof(ntdb_off_t), false); + if (NTDB_PTR_IS_ERR(val)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val)); + } + + for (i = 0; i < (end - start); i++) { + if (val[i]) + break; + } + ntdb_access_release(ntdb, val); + return start + i; +} + +/* Return first zero offset in num offset array, or num, or -ve error. */ +uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off, + uint64_t num) +{ + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false); + if (NTDB_PTR_IS_ERR(val)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val)); + } + + for (i = 0; i < num; i++) { + if (!val[i]) + break; + } + ntdb_access_release(ntdb, val); + return i; +} + +enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len) +{ + char buf[8192] = { 0 }; + void *p = ntdb->io->direct(ntdb, off, len, true); + enum NTDB_ERROR ecode = NTDB_SUCCESS; + + assert(!(ntdb->flags & NTDB_RDONLY)); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_PTR_ERR(p); + } + if (p) { + memset(p, 0, len); + return ecode; + } + while (len) { + unsigned todo = len < sizeof(buf) ? len : sizeof(buf); + ecode = ntdb->io->twrite(ntdb, off, buf, todo); + if (ecode != NTDB_SUCCESS) { + break; + } + len -= todo; + off += todo; + } + return ecode; +} + +ntdb_off_t ntdb_read_off(struct ntdb_context *ntdb, ntdb_off_t off) +{ + ntdb_off_t ret; + enum NTDB_ERROR ecode; + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + ntdb_off_t *p = ntdb->io->direct(ntdb, off, sizeof(*p), false); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p)); + } + if (p) + return *p; + } + + ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret)); + if (ecode != NTDB_SUCCESS) { + return NTDB_ERR_TO_OFF(ecode); + } + return ret; +} + +/* write a lump of data at a specified offset */ +static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off, + const void *buf, ntdb_len_t len) +{ + enum NTDB_ERROR ecode; + + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Write to read-only database"); + } + + ecode = ntdb->io->oob(ntdb, off, len, false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (ntdb->file->map_ptr) { + memcpy(off + (char *)ntdb->file->map_ptr, buf, len); + } else { +#ifdef HAVE_INCOHERENT_MMAP + return NTDB_ERR_IO; +#else + ssize_t ret; + ret = pwrite(ntdb->file->fd, buf, len, off); + if (ret != len) { + /* This shouldn't happen: we avoid sparse files. */ + if (ret >= 0) + errno = ENOSPC; + + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_write: %zi at %zu len=%zu (%s)", + ret, (size_t)off, (size_t)len, + strerror(errno)); + } +#endif + } + return NTDB_SUCCESS; +} + +/* read a lump of data at a specified offset */ +static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off, + void *buf, ntdb_len_t len) +{ + enum NTDB_ERROR ecode; + + ecode = ntdb->io->oob(ntdb, off, len, false); + if (ecode != NTDB_SUCCESS) { + return ecode; + } + + if (ntdb->file->map_ptr) { + memcpy(buf, off + (char *)ntdb->file->map_ptr, len); + } else { +#ifdef HAVE_INCOHERENT_MMAP + return NTDB_ERR_IO; +#else + ssize_t r = pread(ntdb->file->fd, buf, len, off); + if (r != len) { + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "ntdb_read failed with %zi at %zu " + "len=%zu (%s) map_size=%zu", + r, (size_t)off, (size_t)len, + strerror(errno), + (size_t)ntdb->file->map_size); + } +#endif + } + return NTDB_SUCCESS; +} + +enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off, + const void *rec, size_t len) +{ + enum NTDB_ERROR ecode; + + if (unlikely((ntdb->flags & NTDB_CONVERT))) { + void *conv = malloc(len); + if (!conv) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "ntdb_write: no memory converting" + " %zu bytes", len); + } + memcpy(conv, rec, len); + ecode = ntdb->io->twrite(ntdb, off, + ntdb_convert(ntdb, conv, len), len); + free(conv); + } else { + ecode = ntdb->io->twrite(ntdb, off, rec, len); + } + return ecode; +} + +enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off, + void *rec, size_t len) +{ + enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len); + ntdb_convert(ntdb, rec, len); + return ecode; +} + +enum NTDB_ERROR ntdb_write_off(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_off_t val) +{ + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Write to read-only database"); + } + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + ntdb_off_t *p = ntdb->io->direct(ntdb, off, sizeof(*p), true); + if (NTDB_PTR_IS_ERR(p)) { + return NTDB_PTR_ERR(p); + } + if (p) { + *p = val; + return NTDB_SUCCESS; + } + } + return ntdb_write_convert(ntdb, off, &val, sizeof(val)); +} + +static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, + ntdb_len_t len, unsigned int prefix) +{ + unsigned char *buf; + enum NTDB_ERROR ecode; + + /* some systems don't like zero length malloc */ + buf = malloc(prefix + len ? prefix + len : 1); + if (!buf) { + ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_USE_ERROR, + "ntdb_alloc_read malloc failed len=%zu", + (size_t)(prefix + len)); + return NTDB_ERR_PTR(NTDB_ERR_OOM); + } else { + ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len); + if (unlikely(ecode != NTDB_SUCCESS)) { + free(buf); + return NTDB_ERR_PTR(ecode); + } + } + return buf; +} + +/* read a lump of data, allocating the space for it */ +void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len) +{ + return _ntdb_alloc_read(ntdb, offset, len, 0); +} + +static enum NTDB_ERROR fill(struct ntdb_context *ntdb, + const void *buf, size_t size, + ntdb_off_t off, ntdb_len_t len) +{ + while (len) { + size_t n = len > size ? size : len; + ssize_t ret = pwrite(ntdb->file->fd, buf, n, off); + if (ret != n) { + if (ret >= 0) + errno = ENOSPC; + + return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR, + "fill failed:" + " %zi at %zu len=%zu (%s)", + ret, (size_t)off, (size_t)len, + strerror(errno)); + } + len -= n; + off += n; + } + return NTDB_SUCCESS; +} + +/* expand a file. we prefer to use ftruncate, as that is what posix + says to use for mmap expansion */ +static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb, + ntdb_len_t addition) +{ + char buf[8192]; + enum NTDB_ERROR ecode; + + if (ntdb->flags & NTDB_RDONLY) { + return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Expand on read-only database"); + } + + if (ntdb->flags & NTDB_INTERNAL) { + char *new = realloc(ntdb->file->map_ptr, + ntdb->file->map_size + addition); + if (!new) { + return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR, + "No memory to expand database"); + } + ntdb->file->map_ptr = new; + ntdb->file->map_size += addition; + return NTDB_SUCCESS; + } else { + /* Unmap before trying to write; old NTDB claimed OpenBSD had + * problem with this otherwise. */ + ntdb_munmap(ntdb->file); + + /* If this fails, we try to fill anyway. */ + if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition)) + ; + + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, 0x43, sizeof(buf)); + ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size, + addition); + if (ecode != NTDB_SUCCESS) + return ecode; + ntdb->file->map_size += addition; + return ntdb_mmap(ntdb); + } +} + +const void *ntdb_access_read(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool convert) +{ + void *ret = NULL; + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + ret = ntdb->io->direct(ntdb, off, len, false); + + if (NTDB_PTR_IS_ERR(ret)) { + return ret; + } + } + if (!ret) { + struct ntdb_access_hdr *hdr; + hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr)); + if (NTDB_PTR_IS_ERR(hdr)) { + return hdr; + } + hdr->next = ntdb->access; + ntdb->access = hdr; + ret = hdr + 1; + if (convert) { + ntdb_convert(ntdb, (void *)ret, len); + } + } else + ntdb->direct_access++; + + return ret; +} + +void *ntdb_access_write(struct ntdb_context *ntdb, + ntdb_off_t off, ntdb_len_t len, bool convert) +{ + void *ret = NULL; + + if (ntdb->flags & NTDB_RDONLY) { + ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR, + "Write to read-only database"); + return NTDB_ERR_PTR(NTDB_ERR_RDONLY); + } + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + ret = ntdb->io->direct(ntdb, off, len, true); + + if (NTDB_PTR_IS_ERR(ret)) { + return ret; + } + } + + if (!ret) { + struct ntdb_access_hdr *hdr; + hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr)); + if (NTDB_PTR_IS_ERR(hdr)) { + return hdr; + } + hdr->next = ntdb->access; + ntdb->access = hdr; + hdr->off = off; + hdr->len = len; + hdr->convert = convert; + ret = hdr + 1; + if (convert) + ntdb_convert(ntdb, (void *)ret, len); + } else + ntdb->direct_access++; + + return ret; +} + +static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p) +{ + struct ntdb_access_hdr **hp; + + for (hp = &ntdb->access; *hp; hp = &(*hp)->next) { + if (*hp + 1 == p) + return hp; + } + return NULL; +} + +void ntdb_access_release(struct ntdb_context *ntdb, const void *p) +{ + struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p); + + if (hp) { + hdr = *hp; + *hp = hdr->next; + free(hdr); + } else + ntdb->direct_access--; +} + +enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p) +{ + struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p); + enum NTDB_ERROR ecode; + + if (hp) { + hdr = *hp; + if (hdr->convert) + ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len); + else + ecode = ntdb_write(ntdb, hdr->off, p, hdr->len); + *hp = hdr->next; + free(hdr); + } else { + ntdb->direct_access--; + ecode = NTDB_SUCCESS; + } + + return ecode; +} + +static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len, + bool write_mode) +{ + enum NTDB_ERROR ecode; + + if (unlikely(!ntdb->file->map_ptr)) + return NULL; + + ecode = ntdb_oob(ntdb, off, len, false); + if (unlikely(ecode != NTDB_SUCCESS)) + return NTDB_ERR_PTR(ecode); + return (char *)ntdb->file->map_ptr + off; +} + +void ntdb_inc_seqnum(struct ntdb_context *ntdb) +{ + ntdb_off_t seq; + + if (likely(!(ntdb->flags & NTDB_CONVERT))) { + int64_t *direct; + + direct = ntdb->io->direct(ntdb, + offsetof(struct ntdb_header, seqnum), + sizeof(*direct), true); + if (likely(direct)) { + /* Don't let it go negative, even briefly */ + if (unlikely((*direct) + 1) < 0) + *direct = 0; + (*direct)++; + return; + } + } + + seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum)); + if (!NTDB_OFF_IS_ERR(seq)) { + seq++; + if (unlikely((int64_t)seq < 0)) + seq = 0; + ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq); + } +} + +static const struct ntdb_methods io_methods = { + ntdb_read, + ntdb_write, + ntdb_oob, + ntdb_expand_file, + ntdb_direct, +}; + +/* + initialise the default methods table +*/ +void ntdb_io_init(struct ntdb_context *ntdb) +{ + ntdb->io = &io_methods; +} -- cgit