/* Unix SMB/CIFS implementation. trivial database library Copyright (C) Andrew Tridgell 1999-2005 Copyright (C) Paul `Rusty' Russell 2000 Copyright (C) Jeremy Allison 2000-2003 Copyright (C) Rusty Russell 2010 ** NOTE! The following LGPL license applies to the tdb ** library. This does NOT imply that all of Samba is released ** under the LGPL This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, see . */ #include "private.h" #include #include void tdb_munmap(struct tdb_file *file) { if (file->fd == -1) return; if (file->map_ptr) { munmap(file->map_ptr, file->map_size); file->map_ptr = NULL; } } enum TDB_ERROR tdb_mmap(struct tdb_context *tdb) { int mmap_flags; if (tdb->flags & TDB_INTERNAL) return TDB_SUCCESS; #ifndef HAVE_INCOHERENT_MMAP if (tdb->flags & TDB_NOMMAP) return TDB_SUCCESS; #endif if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) mmap_flags = PROT_READ; else mmap_flags = PROT_READ | PROT_WRITE; /* size_t can be smaller than off_t. */ if ((size_t)tdb->file->map_size == tdb->file->map_size) { tdb->file->map_ptr = mmap(NULL, tdb->file->map_size, mmap_flags, MAP_SHARED, tdb->file->fd, 0); } else tdb->file->map_ptr = MAP_FAILED; /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! */ if (tdb->file->map_ptr == MAP_FAILED) { tdb->file->map_ptr = NULL; #ifdef HAVE_INCOHERENT_MMAP /* Incoherent mmap means everyone must mmap! */ return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "tdb_mmap failed for size %lld (%s)", (long long)tdb->file->map_size, strerror(errno)); #else tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING, "tdb_mmap failed for size %lld (%s)", (long long)tdb->file->map_size, strerror(errno)); #endif } return TDB_SUCCESS; } /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary note that "len" is the minimum length needed for the db. If probe is true, len being too large isn't a failure. */ static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, bool probe) { struct stat st; enum TDB_ERROR ecode; /* We can't hold pointers during this: we could unmap! */ assert(!tdb->tdb2.direct_access || (tdb->flags & TDB_NOLOCK) || tdb_has_expansion_lock(tdb)); if (len + off < len) { if (probe) return TDB_SUCCESS; return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "tdb_oob off %llu len %llu wrap\n", (long long)off, (long long)len); } if (len + off <= tdb->file->map_size) return TDB_SUCCESS; if (tdb->flags & TDB_INTERNAL) { if (probe) return TDB_SUCCESS; tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "tdb_oob len %lld beyond internal" " malloc size %lld", (long long)(off + len), (long long)tdb->file->map_size); return TDB_ERR_IO; } ecode = tdb_lock_expand(tdb, F_RDLCK); if (ecode != TDB_SUCCESS) { return ecode; } if (fstat(tdb->file->fd, &st) != 0) { tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "Failed to fstat file: %s", strerror(errno)); tdb_unlock_expand(tdb, F_RDLCK); return TDB_ERR_IO; } tdb_unlock_expand(tdb, F_RDLCK); if (st.st_size < off + len) { if (probe) return TDB_SUCCESS; tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "tdb_oob len %llu beyond eof at %llu", (long long)(off + len), (long long)st.st_size); return TDB_ERR_IO; } /* Unmap, update size, remap */ tdb_munmap(tdb->file); tdb->file->map_size = st.st_size; return tdb_mmap(tdb); } /* Endian conversion: we only ever deal with 8 byte quantities */ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) { assert(size % 8 == 0); if (unlikely((tdb->flags & TDB_CONVERT)) && buf) { uint64_t i, *p = (uint64_t *)buf; for (i = 0; i < size / 8; i++) p[i] = bswap_64(p[i]); } return buf; } /* Return first non-zero offset in offset array, or end, or -ve error. */ /* FIXME: Return the off? */ uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t base, uint64_t start, uint64_t end) { uint64_t i; const uint64_t *val; /* Zero vs non-zero is the same unconverted: minor optimization. */ val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t), (end - start) * sizeof(tdb_off_t), false); if (TDB_PTR_IS_ERR(val)) { return TDB_ERR_TO_OFF(TDB_PTR_ERR(val)); } for (i = 0; i < (end - start); i++) { if (val[i]) break; } tdb_access_release(tdb, val); return start + i; } /* Return first zero offset in num offset array, or num, or -ve error. */ uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, uint64_t num) { uint64_t i; const uint64_t *val; /* Zero vs non-zero is the same unconverted: minor optimization. */ val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false); if (TDB_PTR_IS_ERR(val)) { return TDB_ERR_TO_OFF(TDB_PTR_ERR(val)); } for (i = 0; i < num; i++) { if (!val[i]) break; } tdb_access_release(tdb, val); return i; } enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len) { char buf[8192] = { 0 }; void *p = tdb->tdb2.io->direct(tdb, off, len, true); enum TDB_ERROR ecode = TDB_SUCCESS; assert(!(tdb->flags & TDB_RDONLY)); if (TDB_PTR_IS_ERR(p)) { return TDB_PTR_ERR(p); } if (p) { memset(p, 0, len); return ecode; } while (len) { unsigned todo = len < sizeof(buf) ? len : sizeof(buf); ecode = tdb->tdb2.io->twrite(tdb, off, buf, todo); if (ecode != TDB_SUCCESS) { break; } len -= todo; off += todo; } return ecode; } tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) { tdb_off_t ret; enum TDB_ERROR ecode; if (likely(!(tdb->flags & TDB_CONVERT))) { tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p), false); if (TDB_PTR_IS_ERR(p)) { return TDB_ERR_TO_OFF(TDB_PTR_ERR(p)); } if (p) return *p; } ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret)); if (ecode != TDB_SUCCESS) { return TDB_ERR_TO_OFF(ecode); } return ret; } /* write a lump of data at a specified offset */ static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { enum TDB_ERROR ecode; if (tdb->flags & TDB_RDONLY) { return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, "Write to read-only database"); } ecode = tdb->tdb2.io->oob(tdb, off, len, false); if (ecode != TDB_SUCCESS) { return ecode; } if (tdb->file->map_ptr) { memcpy(off + (char *)tdb->file->map_ptr, buf, len); } else { #ifdef HAVE_INCOHERENT_MMAP return TDB_ERR_IO; #else ssize_t ret; ret = pwrite(tdb->file->fd, buf, len, off); if (ret != len) { /* This shouldn't happen: we avoid sparse files. */ if (ret >= 0) errno = ENOSPC; return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "tdb_write: %zi at %zu len=%zu (%s)", ret, (size_t)off, (size_t)len, strerror(errno)); } #endif } return TDB_SUCCESS; } /* read a lump of data at a specified offset */ static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb_len_t len) { enum TDB_ERROR ecode; ecode = tdb->tdb2.io->oob(tdb, off, len, false); if (ecode != TDB_SUCCESS) { return ecode; } if (tdb->file->map_ptr) { memcpy(buf, off + (char *)tdb->file->map_ptr, len); } else { #ifdef HAVE_INCOHERENT_MMAP return TDB_ERR_IO; #else ssize_t r = pread(tdb->file->fd, buf, len, off); if (r != len) { return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "tdb_read failed with %zi at %zu " "len=%zu (%s) map_size=%zu", r, (size_t)off, (size_t)len, strerror(errno), (size_t)tdb->file->map_size); } #endif } return TDB_SUCCESS; } enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, const void *rec, size_t len) { enum TDB_ERROR ecode; if (unlikely((tdb->flags & TDB_CONVERT))) { void *conv = malloc(len); if (!conv) { return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, "tdb_write: no memory converting" " %zu bytes", len); } memcpy(conv, rec, len); ecode = tdb->tdb2.io->twrite(tdb, off, tdb_convert(tdb, conv, len), len); free(conv); } else { ecode = tdb->tdb2.io->twrite(tdb, off, rec, len); } return ecode; } enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, void *rec, size_t len) { enum TDB_ERROR ecode = tdb->tdb2.io->tread(tdb, off, rec, len); tdb_convert(tdb, rec, len); return ecode; } enum TDB_ERROR tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val) { if (tdb->flags & TDB_RDONLY) { return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, "Write to read-only database"); } if (likely(!(tdb->flags & TDB_CONVERT))) { tdb_off_t *p = tdb->tdb2.io->direct(tdb, off, sizeof(*p), true); if (TDB_PTR_IS_ERR(p)) { return TDB_PTR_ERR(p); } if (p) { *p = val; return TDB_SUCCESS; } } return tdb_write_convert(tdb, off, &val, sizeof(val)); } static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len, unsigned int prefix) { unsigned char *buf; enum TDB_ERROR ecode; /* some systems don't like zero length malloc */ buf = malloc(prefix + len ? prefix + len : 1); if (!buf) { tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR, "tdb_alloc_read malloc failed len=%zu", (size_t)(prefix + len)); return TDB_ERR_PTR(TDB_ERR_OOM); } else { ecode = tdb->tdb2.io->tread(tdb, offset, buf+prefix, len); if (unlikely(ecode != TDB_SUCCESS)) { free(buf); return TDB_ERR_PTR(ecode); } } return buf; } /* read a lump of data, allocating the space for it */ void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) { return _tdb_alloc_read(tdb, offset, len, 0); } static enum TDB_ERROR fill(struct tdb_context *tdb, const void *buf, size_t size, tdb_off_t off, tdb_len_t len) { while (len) { size_t n = len > size ? size : len; ssize_t ret = pwrite(tdb->file->fd, buf, n, off); if (ret != n) { if (ret >= 0) errno = ENOSPC; return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, "fill failed:" " %zi at %zu len=%zu (%s)", ret, (size_t)off, (size_t)len, strerror(errno)); } len -= n; off += n; } return TDB_SUCCESS; } /* expand a file. we prefer to use ftruncate, as that is what posix says to use for mmap expansion */ static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition) { char buf[8192]; enum TDB_ERROR ecode; if (tdb->flags & TDB_RDONLY) { return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, "Expand on read-only database"); } if (tdb->flags & TDB_INTERNAL) { char *new = realloc(tdb->file->map_ptr, tdb->file->map_size + addition); if (!new) { return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, "No memory to expand database"); } tdb->file->map_ptr = new; tdb->file->map_size += addition; return TDB_SUCCESS; } else { /* Unmap before trying to write; old TDB claimed OpenBSD had * problem with this otherwise. */ tdb_munmap(tdb->file); /* If this fails, we try to fill anyway. */ if (ftruncate(tdb->file->fd, tdb->file->map_size + addition)) ; /* now fill the file with something. This ensures that the file isn't sparse, which would be very bad if we ran out of disk. This must be done with write, not via mmap */ memset(buf, 0x43, sizeof(buf)); ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size, addition); if (ecode != TDB_SUCCESS) return ecode; tdb->file->map_size += addition; return tdb_mmap(tdb); } } const void *tdb_access_read(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, bool convert) { void *ret = NULL; if (likely(!(tdb->flags & TDB_CONVERT))) { ret = tdb->tdb2.io->direct(tdb, off, len, false); if (TDB_PTR_IS_ERR(ret)) { return ret; } } if (!ret) { struct tdb_access_hdr *hdr; hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr)); if (TDB_PTR_IS_ERR(hdr)) { return hdr; } hdr->next = tdb->tdb2.access; tdb->tdb2.access = hdr; ret = hdr + 1; if (convert) { tdb_convert(tdb, (void *)ret, len); } } else tdb->tdb2.direct_access++; return ret; } void *tdb_access_write(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, bool convert) { void *ret = NULL; if (tdb->flags & TDB_RDONLY) { tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, "Write to read-only database"); return TDB_ERR_PTR(TDB_ERR_RDONLY); } if (likely(!(tdb->flags & TDB_CONVERT))) { ret = tdb->tdb2.io->direct(tdb, off, len, true); if (TDB_PTR_IS_ERR(ret)) { return ret; } } if (!ret) { struct tdb_access_hdr *hdr; hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr)); if (TDB_PTR_IS_ERR(hdr)) { return hdr; } hdr->next = tdb->tdb2.access; tdb->tdb2.access = hdr; hdr->off = off; hdr->len = len; hdr->convert = convert; ret = hdr + 1; if (convert) tdb_convert(tdb, (void *)ret, len); } else tdb->tdb2.direct_access++; return ret; } static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p) { struct tdb_access_hdr **hp; for (hp = &tdb->tdb2.access; *hp; hp = &(*hp)->next) { if (*hp + 1 == p) return hp; } return NULL; } void tdb_access_release(struct tdb_context *tdb, const void *p) { struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p); if (hp) { hdr = *hp; *hp = hdr->next; free(hdr); } else tdb->tdb2.direct_access--; } enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p) { struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p); enum TDB_ERROR ecode; if (hp) { hdr = *hp; if (hdr->convert) ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len); else ecode = tdb_write(tdb, hdr->off, p, hdr->len); *hp = hdr->next; free(hdr); } else { tdb->tdb2.direct_access--; ecode = TDB_SUCCESS; } return ecode; } static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len, bool write_mode) { enum TDB_ERROR ecode; if (unlikely(!tdb->file->map_ptr)) return NULL; ecode = tdb_oob(tdb, off, len, false); if (unlikely(ecode != TDB_SUCCESS)) return TDB_ERR_PTR(ecode); return (char *)tdb->file->map_ptr + off; } void tdb_inc_seqnum(struct tdb_context *tdb) { tdb_off_t seq; if (tdb->flags & TDB_VERSION1) { tdb1_increment_seqnum_nonblock(tdb); return; } if (likely(!(tdb->flags & TDB_CONVERT))) { int64_t *direct; direct = tdb->tdb2.io->direct(tdb, offsetof(struct tdb_header, seqnum), sizeof(*direct), true); if (likely(direct)) { /* Don't let it go negative, even briefly */ if (unlikely((*direct) + 1) < 0) *direct = 0; (*direct)++; return; } } seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum)); if (!TDB_OFF_IS_ERR(seq)) { seq++; if (unlikely((int64_t)seq < 0)) seq = 0; tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq); } } static const struct tdb_methods io_methods = { tdb_read, tdb_write, tdb_oob, tdb_expand_file, tdb_direct, }; /* initialise the default methods table */ void tdb_io_init(struct tdb_context *tdb) { tdb->tdb2.io = &io_methods; }