diff options
Diffstat (limited to 'source3/lib')
26 files changed, 781 insertions, 324 deletions
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c index 8a00b235cc..eeff805459 100644 --- a/source3/lib/charcnv.c +++ b/source3/lib/charcnv.c @@ -614,10 +614,16 @@ size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to, out: destlen = destlen - o_len; - if (ctx) { - ob = (char *)TALLOC_REALLOC(ctx,ob,destlen); - } else { - ob = (char *)SMB_REALLOC(ob,destlen); + /* Don't shrink unless we're reclaiming a lot of + * space. This is in the hot codepath and these + * reallocs *cost*. JRA. + */ + if (o_len > 1024) { + if (ctx) { + ob = (char *)TALLOC_REALLOC(ctx,ob,destlen); + } else { + ob = (char *)SMB_REALLOC(ob,destlen); + } } if (destlen && !ob) { @@ -778,7 +784,7 @@ char *strdup_upper(const char *s) while (*p) { if (*p & 0x80) break; - *q++ = toupper_ascii(*p); + *q++ = toupper_ascii_fast(*p); p++; } @@ -844,7 +850,7 @@ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s) while (*p) { if (*p & 0x80) break; - *q++ = toupper_ascii(*p); + *q++ = toupper_ascii_fast(*p); p++; } diff --git a/source3/lib/dbwrap_tdb.c b/source3/lib/dbwrap_tdb.c index 83a0d111aa..710e45de6b 100644 --- a/source3/lib/dbwrap_tdb.c +++ b/source3/lib/dbwrap_tdb.c @@ -160,8 +160,7 @@ static int db_tdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx, state.mem_ctx = mem_ctx; state.result = 0; - state.data.dptr = NULL; - state.data.dsize = 0; + state.data = tdb_null; tdb_parse_record(ctx->wtdb->tdb, key, db_tdb_fetch_parse, &state); diff --git a/source3/lib/netapi/examples/getdc/getdc.c b/source3/lib/netapi/examples/getdc/getdc.c index 4f5c5332d5..cdd4d0b3b4 100644 --- a/source3/lib/netapi/examples/getdc/getdc.c +++ b/source3/lib/netapi/examples/getdc/getdc.c @@ -46,7 +46,7 @@ int main(int argc, char **argv) status = NetGetDCName(argv[1], argv[2], &buffer); if (status != 0) { - printf("GetDcName failed with: %s\n", libnetapi_errstr(ctx, status)); + printf("GetDcName failed with: %s\n", libnetapi_errstr(status)); } else { printf("%s\n", (char *)buffer); } diff --git a/source3/lib/netapi/examples/netdomjoin-gui/netdomjoin-gui.c b/source3/lib/netapi/examples/netdomjoin-gui/netdomjoin-gui.c index 4a3588e9ab..9dc2a18138 100644 --- a/source3/lib/netapi/examples/netdomjoin-gui/netdomjoin-gui.c +++ b/source3/lib/netapi/examples/netdomjoin-gui/netdomjoin-gui.c @@ -147,13 +147,13 @@ static void callback_apply_description_change(GtkWidget *widget, status = NetServerSetInfo(NULL, 1005, (uint8_t *)&info1005, &parm_err); if (status) { debug("NetServerSetInfo failed with: %s\n", - libnetapi_errstr(state->ctx, status)); + libnetapi_errstr(status)); dialog = gtk_message_dialog_new(GTK_WINDOW(state->window_main), GTK_DIALOG_DESTROY_WITH_PARENT, GTK_MESSAGE_ERROR, GTK_BUTTONS_OK, "Failed to change computer description: %s.", - libnetapi_errstr(state->ctx, status)); + libnetapi_errstr(status)); g_signal_connect_swapped(dialog, "response", G_CALLBACK(gtk_widget_destroy), dialog); @@ -439,7 +439,7 @@ static void callback_do_join(GtkWidget *widget, state->password, unjoin_flags); if (status != 0) { - err_str = libnetapi_errstr(state->ctx, status); + err_str = libnetapi_errstr(status); g_print("callback_do_join: failed to unjoin (%s)\n", err_str); @@ -463,7 +463,7 @@ static void callback_do_join(GtkWidget *widget, state->password, join_flags); if (status != 0) { - err_str = libnetapi_errstr(state->ctx, status); + err_str = libnetapi_errstr(status); g_print("callback_do_join: failed to join (%s)\n", err_str); dialog = gtk_message_dialog_new(GTK_WINDOW(state->window_parent), diff --git a/source3/lib/netapi/examples/netdomjoin/netdomjoin.c b/source3/lib/netapi/examples/netdomjoin/netdomjoin.c index a0ac0b1e56..29f66a17a2 100644 --- a/source3/lib/netapi/examples/netdomjoin/netdomjoin.c +++ b/source3/lib/netapi/examples/netdomjoin/netdomjoin.c @@ -105,9 +105,9 @@ int main(int argc, char **argv) join_flags); if (status != 0) { const char *errstr = NULL; - errstr = libnetapi_get_error_string(ctx); + errstr = libnetapi_get_error_string(ctx, status); if (!errstr) { - errstr = libnetapi_errstr(ctx, status); + errstr = libnetapi_errstr(status); } printf("Join failed with: %s\n", errstr); } else { diff --git a/source3/lib/netapi/joindomain.c b/source3/lib/netapi/joindomain.c index e4fb63eebb..b268e41a2a 100644 --- a/source3/lib/netapi/joindomain.c +++ b/source3/lib/netapi/joindomain.c @@ -49,7 +49,7 @@ static WERROR NetJoinDomainLocal(struct libnetapi_ctx *mem_ctx, uint32_t flags = DS_DIRECTORY_SERVICE_REQUIRED | DS_WRITABLE_REQUIRED | DS_RETURN_DNS_NAME; - status = DsGetDcName(mem_ctx, NULL, domain_name, + status = dsgetdcname(mem_ctx, NULL, domain_name, NULL, NULL, flags, &info); if (!NT_STATUS_IS_OK(status)) { return ntstatus_to_werror(status); @@ -244,7 +244,7 @@ static WERROR NetUnjoinDomainLocal(struct libnetapi_ctx *mem_ctx, } else { domain = lp_workgroup(); } - status = DsGetDcName(mem_ctx, NULL, domain, + status = dsgetdcname(mem_ctx, NULL, domain, NULL, NULL, flags, &info); if (!NT_STATUS_IS_OK(status)) { return ntstatus_to_werror(status); diff --git a/source3/lib/netapi/netapi.c b/source3/lib/netapi/netapi.c index d4cb3a9fe2..ce00054e6e 100644 --- a/source3/lib/netapi/netapi.c +++ b/source3/lib/netapi/netapi.c @@ -192,8 +192,7 @@ NET_API_STATUS libnetapi_set_workgroup(struct libnetapi_ctx *ctx, /**************************************************************** ****************************************************************/ -const char *libnetapi_errstr(struct libnetapi_ctx *ctx, - NET_API_STATUS status) +const char *libnetapi_errstr(NET_API_STATUS status) { if (status & 0xc0000000) { return get_friendly_nt_error_msg(NT_STATUS(status)); @@ -220,9 +219,23 @@ NET_API_STATUS libnetapi_set_error_string(struct libnetapi_ctx *ctx, /**************************************************************** ****************************************************************/ -const char *libnetapi_get_error_string(struct libnetapi_ctx *ctx) +const char *libnetapi_get_error_string(struct libnetapi_ctx *ctx, + NET_API_STATUS status) { - return ctx->error_string; + struct libnetapi_ctx *tmp_ctx = ctx; + + if (!tmp_ctx) { + status = libnetapi_getctx(&tmp_ctx); + if (status != 0) { + return NULL; + } + } + + if (tmp_ctx->error_string) { + return tmp_ctx->error_string; + } + + return libnetapi_errstr(status); } /**************************************************************** diff --git a/source3/lib/netapi/netapi.h b/source3/lib/netapi/netapi.h index 4a40b32fc9..274a167d53 100644 --- a/source3/lib/netapi/netapi.h +++ b/source3/lib/netapi/netapi.h @@ -20,6 +20,16 @@ #ifndef __LIB_NETAPI_H__ #define __LIB_NETAPI_H__ +/**************************************************************** + include some basic headers +****************************************************************/ + +#include <inttypes.h> + +/**************************************************************** + NET_API_STATUS +****************************************************************/ + #define NET_API_STATUS uint32_t #define NET_API_STATUS_SUCCESS 0 @@ -46,9 +56,10 @@ NET_API_STATUS libnetapi_get_debuglevel(struct libnetapi_ctx *ctx, char **debugl NET_API_STATUS libnetapi_set_username(struct libnetapi_ctx *ctx, const char *username); NET_API_STATUS libnetapi_set_password(struct libnetapi_ctx *ctx, const char *password); NET_API_STATUS libnetapi_set_workgroup(struct libnetapi_ctx *ctx, const char *workgroup); -const char *libnetapi_errstr(struct libnetapi_ctx *ctx, NET_API_STATUS status); +const char *libnetapi_errstr(NET_API_STATUS status); NET_API_STATUS libnetapi_set_error_string(struct libnetapi_ctx *ctx, const char *error_string); -const char *libnetapi_get_error_string(struct libnetapi_ctx *ctx); +const char *libnetapi_get_error_string(struct libnetapi_ctx *ctx, NET_API_STATUS status); + /**************************************************************** ****************************************************************/ diff --git a/source3/lib/sharesec.c b/source3/lib/sharesec.c index ba025dacc1..f6ff701d5b 100644 --- a/source3/lib/sharesec.c +++ b/source3/lib/sharesec.c @@ -228,25 +228,20 @@ bool share_access_check(const NT_USER_TOKEN *token, const char *sharename, { uint32 granted; NTSTATUS status; - TALLOC_CTX *mem_ctx = NULL; SEC_DESC *psd = NULL; size_t sd_size; bool ret = True; - if (!(mem_ctx = talloc_init("share_access_check"))) { - return False; - } - - psd = get_share_security(mem_ctx, sharename, &sd_size); + psd = get_share_security(talloc_tos(), sharename, &sd_size); if (!psd) { - TALLOC_FREE(mem_ctx); return True; } ret = se_access_check(psd, token, desired_access, &granted, &status); - talloc_destroy(mem_ctx); + TALLOC_FREE(psd); + return ret; } diff --git a/source3/lib/talloc/talloc.c b/source3/lib/talloc/talloc.c index 3e976bc0fc..9dcd8a2a83 100644 --- a/source3/lib/talloc/talloc.c +++ b/source3/lib/talloc/talloc.c @@ -60,6 +60,8 @@ #define TALLOC_MAGIC 0xe814ec70 #define TALLOC_FLAG_FREE 0x01 #define TALLOC_FLAG_LOOP 0x02 +#define TALLOC_FLAG_POOL 0x04 /* This is a talloc pool */ +#define TALLOC_FLAG_POOLMEM 0x08 /* This is allocated in a pool */ #define TALLOC_MAGIC_REFERENCE ((const char *)1) /* by default we abort when given a bad pointer (such as when talloc_free() is called @@ -109,6 +111,19 @@ struct talloc_chunk { const char *name; size_t size; unsigned flags; + + /* + * "pool" has dual use: + * + * For the talloc pool itself (i.e. TALLOC_FLAG_POOL is set), "pool" + * marks the end of the currently allocated area. + * + * For members of the pool (i.e. TALLOC_FLAG_POOLMEM is set), "pool" + * is a pointer to the struct talloc_chunk of the pool that it was + * allocated from. This way children can quickly find the pool to chew + * from. + */ + void *pool; }; /* 16 byte alignment seems to keep everyone happy */ @@ -200,12 +215,87 @@ const char *talloc_parent_name(const void *ptr) return tc? tc->name : NULL; } +/* + A pool carries an in-pool object count count in the first 16 bytes. + bytes. This is done to support talloc_steal() to a parent outside of the + pool. The count includes the pool itself, so a talloc_free() on a pool will + only destroy the pool if the count has dropped to zero. A talloc_free() of a + pool member will reduce the count, and eventually also call free(3) on the + pool memory. + + The object count is not put into "struct talloc_chunk" because it is only + relevant for talloc pools and the alignment to 16 bytes would increase the + memory footprint of each talloc chunk by those 16 bytes. +*/ + +#define TALLOC_POOL_HDR_SIZE 16 + +static unsigned int *talloc_pool_objectcount(struct talloc_chunk *tc) +{ + return (unsigned int *)((char *)tc + sizeof(struct talloc_chunk)); +} + +/* + Allocate from a pool +*/ + +static struct talloc_chunk *talloc_alloc_pool(struct talloc_chunk *parent, + size_t size) +{ + struct talloc_chunk *pool_ctx = NULL; + size_t space_left; + struct talloc_chunk *result; + size_t chunk_size; + + if (parent == NULL) { + return NULL; + } + + if (parent->flags & TALLOC_FLAG_POOL) { + pool_ctx = parent; + } + else if (parent->flags & TALLOC_FLAG_POOLMEM) { + pool_ctx = (struct talloc_chunk *)parent->pool; + } + + if (pool_ctx == NULL) { + return NULL; + } + + space_left = ((char *)pool_ctx + TC_HDR_SIZE + pool_ctx->size) + - ((char *)pool_ctx->pool); + + /* + * Align size to 16 bytes + */ + chunk_size = ((size + 15) & ~15); + + if (space_left < chunk_size) { + return NULL; + } + + result = (struct talloc_chunk *)pool_ctx->pool; + +#if defined(DEVELOPER) && defined(VALGRIND_MAKE_MEM_UNDEFINED) + VALGRIND_MAKE_MEM_UNDEFINED(result, size); +#endif + + pool_ctx->pool = (void *)((char *)result + chunk_size); + + result->flags = TALLOC_MAGIC | TALLOC_FLAG_POOLMEM; + result->pool = pool_ctx; + + *talloc_pool_objectcount(pool_ctx) += 1; + + return result; +} + /* Allocate a bit of memory as a child of an existing pointer */ static inline void *__talloc(const void *context, size_t size) { - struct talloc_chunk *tc; + struct talloc_chunk *tc = NULL; if (unlikely(context == NULL)) { context = null_context; @@ -215,11 +305,19 @@ static inline void *__talloc(const void *context, size_t size) return NULL; } - tc = (struct talloc_chunk *)malloc(TC_HDR_SIZE+size); - if (unlikely(tc == NULL)) return NULL; + if (context != NULL) { + tc = talloc_alloc_pool(talloc_chunk_from_ptr(context), + TC_HDR_SIZE+size); + } + + if (tc == NULL) { + tc = (struct talloc_chunk *)malloc(TC_HDR_SIZE+size); + if (unlikely(tc == NULL)) return NULL; + tc->flags = TALLOC_MAGIC; + tc->pool = NULL; + } tc->size = size; - tc->flags = TALLOC_MAGIC; tc->destructor = NULL; tc->child = NULL; tc->name = NULL; @@ -246,6 +344,33 @@ static inline void *__talloc(const void *context, size_t size) } /* + * Create a talloc pool + */ + +void *talloc_pool(const void *context, size_t size) +{ + void *result = __talloc(context, size + TALLOC_POOL_HDR_SIZE); + struct talloc_chunk *tc; + + if (unlikely(result == NULL)) { + return NULL; + } + + tc = talloc_chunk_from_ptr(result); + + tc->flags |= TALLOC_FLAG_POOL; + tc->pool = (char *)result + TALLOC_POOL_HDR_SIZE; + + *talloc_pool_objectcount(tc) = 1; + +#if defined(DEVELOPER) && defined(VALGRIND_MAKE_MEM_NOACCESS) + VALGRIND_MAKE_MEM_NOACCESS(tc->pool, size); +#endif + + return result; +} + +/* setup a destructor to be called on free of a pointer the destructor should return 0 on success, or -1 on failure. if the destructor fails then the free is failed, and the memory can @@ -420,7 +545,29 @@ static inline int _talloc_free(void *ptr) } tc->flags |= TALLOC_FLAG_FREE; - free(tc); + + if (tc->flags & (TALLOC_FLAG_POOL|TALLOC_FLAG_POOLMEM)) { + struct talloc_chunk *pool; + unsigned int *pool_object_count; + + pool = (tc->flags & TALLOC_FLAG_POOL) + ? tc : (struct talloc_chunk *)tc->pool; + + pool_object_count = talloc_pool_objectcount(pool); + + if (*pool_object_count == 0) { + TALLOC_ABORT("Pool object count zero!"); + } + + *pool_object_count -= 1; + + if (*pool_object_count == 0) { + free(pool); + } + } + else { + free(tc); + } return 0; } @@ -718,6 +865,15 @@ void talloc_free_children(void *ptr) talloc_steal(new_parent, child); } } + + if ((tc->flags & TALLOC_FLAG_POOL) + && (*talloc_pool_objectcount(tc) == 1)) { + tc->pool = ((char *)tc + TC_HDR_SIZE + TALLOC_POOL_HDR_SIZE); +#if defined(DEVELOPER) && defined(VALGRIND_MAKE_MEM_NOACCESS) + VALGRIND_MAKE_MEM_NOACCESS( + tc->pool, tc->size - TALLOC_POOL_HDR_SIZE); +#endif + } } /* @@ -769,6 +925,7 @@ void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *n { struct talloc_chunk *tc; void *new_ptr; + bool malloced = false; /* size zero is equivalent to free() */ if (unlikely(size == 0)) { @@ -808,7 +965,24 @@ void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *n free(tc); } #else - new_ptr = realloc(tc, size + TC_HDR_SIZE); + if (tc->flags & TALLOC_FLAG_POOLMEM) { + + new_ptr = talloc_alloc_pool(tc, size + TC_HDR_SIZE); + *talloc_pool_objectcount((struct talloc_chunk *) + (tc->pool)) -= 1; + + if (new_ptr == NULL) { + new_ptr = malloc(TC_HDR_SIZE+size); + malloced = true; + } + + if (new_ptr) { + memcpy(new_ptr, tc, MIN(tc->size,size) + TC_HDR_SIZE); + } + } + else { + new_ptr = realloc(tc, size + TC_HDR_SIZE); + } #endif if (unlikely(!new_ptr)) { tc->flags &= ~TALLOC_FLAG_FREE; @@ -816,7 +990,10 @@ void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *n } tc = (struct talloc_chunk *)new_ptr; - tc->flags &= ~TALLOC_FLAG_FREE; + tc->flags &= ~TALLOC_FLAG_FREE; + if (malloced) { + tc->flags &= ~TALLOC_FLAG_POOLMEM; + } if (tc->parent) { tc->parent->child = tc; } diff --git a/source3/lib/talloc/talloc.h b/source3/lib/talloc/talloc.h index e103391681..5431971655 100644 --- a/source3/lib/talloc/talloc.h +++ b/source3/lib/talloc/talloc.h @@ -116,6 +116,7 @@ typedef void TALLOC_CTX; /* The following definitions come from talloc.c */ void *_talloc(const void *context, size_t size); +void *talloc_pool(const void *context, size_t size); void _talloc_set_destructor(const void *ptr, int (*destructor)(void *)); int talloc_increase_ref_count(const void *ptr); size_t talloc_reference_count(const void *ptr); diff --git a/source3/lib/talloc/testsuite.c b/source3/lib/talloc/testsuite.c index e16c91f8b9..fedbda95aa 100644 --- a/source3/lib/talloc/testsuite.c +++ b/source3/lib/talloc/testsuite.c @@ -813,6 +813,25 @@ static bool test_speed(void) talloc_free(ctx); + ctx = talloc_pool(NULL, 1024); + + tv = timeval_current(); + count = 0; + do { + void *p1, *p2, *p3; + for (i=0;i<loop;i++) { + p1 = talloc_size(ctx, loop % 100); + p2 = talloc_strdup(p1, "foo bar"); + p3 = talloc_size(p1, 300); + talloc_free_children(ctx); + } + count += 3 * loop; + } while (timeval_elapsed(&tv) < 5.0); + + talloc_free(ctx); + + fprintf(stderr, "talloc_pool: %.0f ops/sec\n", count/timeval_elapsed(&tv)); + tv = timeval_current(); count = 0; do { @@ -1066,6 +1085,23 @@ static bool test_autofree(void) return true; } +static bool test_pool(void) +{ + void *pool; + void *p1, *p2, *p3, *p4; + + pool = talloc_pool(NULL, 1024); + + p1 = talloc_size(pool, 80); + p2 = talloc_size(pool, 20); + p3 = talloc_size(p1, 50); + p4 = talloc_size(p3, 1000); + + talloc_free(pool); + + return true; +} + struct torture_context; bool torture_local_talloc(struct torture_context *tctx) { @@ -1094,6 +1130,7 @@ bool torture_local_talloc(struct torture_context *tctx) ret &= test_free_parent_deny_child(); ret &= test_talloc_ptrtype(); ret &= test_talloc_free_in_destructor(); + ret &= test_pool(); if (ret) { ret &= test_speed(); diff --git a/source3/lib/talloc_stack.c b/source3/lib/talloc_stack.c index d887b2d415..2722fb9676 100644 --- a/source3/lib/talloc_stack.c +++ b/source3/lib/talloc_stack.c @@ -64,9 +64,9 @@ static int talloc_pop(TALLOC_CTX *frame) * not explicitly freed. */ -TALLOC_CTX *talloc_stackframe(void) +static TALLOC_CTX *talloc_stackframe_internal(size_t poolsize) { - TALLOC_CTX **tmp, *top; + TALLOC_CTX **tmp, *top, *parent; if (talloc_stack_arraysize < talloc_stacksize + 1) { tmp = TALLOC_REALLOC_ARRAY(NULL, talloc_stack, TALLOC_CTX *, @@ -78,7 +78,18 @@ TALLOC_CTX *talloc_stackframe(void) talloc_stack_arraysize = talloc_stacksize + 1; } - top = talloc_new(talloc_stack); + if (talloc_stacksize == 0) { + parent = talloc_stack; + } + else { + parent = talloc_stack[talloc_stacksize-1]; + } + + if (poolsize) { + top = talloc_pool(parent, poolsize); + } else { + top = talloc_new(parent); + } if (top == NULL) { goto fail; @@ -94,6 +105,16 @@ TALLOC_CTX *talloc_stackframe(void) return NULL; } +TALLOC_CTX *talloc_stackframe(void) +{ + return talloc_stackframe_internal(0); +} + +TALLOC_CTX *talloc_stackframe_pool(size_t poolsize) +{ + return talloc_stackframe_internal(poolsize); +} + /* * Get us the current top of the talloc stack. */ diff --git a/source3/lib/tdb/common/freelist.c b/source3/lib/tdb/common/freelist.c index b109643f23..c086c151fa 100644 --- a/source3/lib/tdb/common/freelist.c +++ b/source3/lib/tdb/common/freelist.c @@ -27,6 +27,12 @@ #include "tdb_private.h" +/* 'right' merges can involve O(n^2) cost when combined with a + traverse, so they are disabled until we find a way to do them in + O(1) time +*/ +#define USE_RIGHT_MERGES 0 + /* read a freelist record and check for simple errors */ int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec) { @@ -56,7 +62,7 @@ int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct } - +#if USE_RIGHT_MERGES /* Remove an element from the freelist. Must have alloc lock. */ static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next) { @@ -75,6 +81,7 @@ static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_ TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); } +#endif /* update a record tailer (must hold allocation lock) */ @@ -93,8 +100,6 @@ static int update_tailer(struct tdb_context *tdb, tdb_off_t offset, neccessary. */ int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) { - tdb_off_t right, left; - /* Allocation and tailer lock */ if (tdb_lock(tdb, -1, F_WRLCK) != 0) return -1; @@ -105,9 +110,10 @@ int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) goto fail; } +#if USE_RIGHT_MERGES /* Look right first (I'm an Australian, dammit) */ - right = offset + sizeof(*rec) + rec->rec_len; - if (right + sizeof(*rec) <= tdb->map_size) { + if (offset + sizeof(*rec) + rec->rec_len + sizeof(*rec) <= tdb->map_size) { + tdb_off_t right = offset + sizeof(*rec) + rec->rec_len; struct list_struct r; if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) { @@ -122,13 +128,18 @@ int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec) goto left; } rec->rec_len += sizeof(r) + r.rec_len; + if (update_tailer(tdb, offset, rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); + goto fail; + } } } - left: +#endif + /* Look left */ - left = offset - sizeof(tdb_off_t); - if (left > TDB_DATA_START(tdb->header.hash_size)) { + if (offset - sizeof(tdb_off_t) > TDB_DATA_START(tdb->header.hash_size)) { + tdb_off_t left = offset - sizeof(tdb_off_t); struct list_struct l; tdb_off_t leftsize; @@ -145,7 +156,12 @@ left: left = offset - leftsize; - /* Now read in record */ + if (leftsize > offset || + left < TDB_DATA_START(tdb->header.hash_size)) { + goto update; + } + + /* Now read in the left record */ if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize)); goto update; @@ -153,21 +169,24 @@ left: /* If it's free, expand to include it. */ if (l.magic == TDB_FREE_MAGIC) { - if (remove_from_freelist(tdb, left, l.next) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left free failed at %u\n", left)); - goto update; - } else { - offset = left; - rec->rec_len += leftsize; + /* we now merge the new record into the left record, rather than the other + way around. This makes the operation O(1) instead of O(n). This change + prevents traverse from being O(n^2) after a lot of deletes */ + l.rec_len += sizeof(*rec) + rec->rec_len; + if (tdb_rec_write(tdb, left, &l) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_left failed at %u\n", left)); + goto fail; } + if (update_tailer(tdb, left, &l) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); + goto fail; + } + tdb_unlock(tdb, -1, F_WRLCK); + return 0; } } update: - if (update_tailer(tdb, offset, rec) == -1) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); - goto fail; - } /* Now, prepend to free list */ rec->magic = TDB_FREE_MAGIC; @@ -261,6 +280,7 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st tdb_off_t rec_ptr, last_ptr; tdb_len_t rec_len; } bestfit; + float multiplier = 1.0; if (tdb_lock(tdb, -1, F_WRLCK) == -1) return 0; @@ -295,18 +315,27 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st bestfit.rec_len = rec->rec_len; bestfit.rec_ptr = rec_ptr; bestfit.last_ptr = last_ptr; - /* consider a fit to be good enough if - we aren't wasting more than half - the space */ - if (bestfit.rec_len < 2*length) { - break; - } } } /* move to the next record */ last_ptr = rec_ptr; rec_ptr = rec->next; + + /* if we've found a record that is big enough, then + stop searching if its also not too big. The + definition of 'too big' changes as we scan + through */ + if (bestfit.rec_len > 0 && + bestfit.rec_len < length * multiplier) { + break; + } + + /* this multiplier means we only extremely rarely + search more than 50 or so records. At 50 records we + accept records up to 11 times larger than what we + want */ + multiplier *= 1.05; } if (bestfit.rec_ptr != 0) { @@ -328,3 +357,25 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_st return 0; } + + +/* + return the size of the freelist - used to decide if we should repack +*/ +int tdb_freelist_size(struct tdb_context *tdb) +{ + tdb_off_t ptr; + int count=0; + + if (tdb_lock(tdb, -1, F_RDLCK) == -1) { + return -1; + } + + ptr = FREELIST_TOP; + while (tdb_ofs_read(tdb, ptr, &ptr) == 0 && ptr != 0) { + count++; + } + + tdb_unlock(tdb, -1, F_RDLCK); + return count; +} diff --git a/source3/lib/tdb/common/io.c b/source3/lib/tdb/common/io.c index 8ab0768883..172ab69d8c 100644 --- a/source3/lib/tdb/common/io.c +++ b/source3/lib/tdb/common/io.c @@ -101,8 +101,8 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, off+written); } if (written == -1) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d " "len=%d (%s)\n", off, len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_IO, -1); @@ -111,8 +111,8 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, "write %d bytes at %d in two attempts\n", len, off)); errno = ENOSPC; - return TDB_ERRCODE(TDB_ERR_IO, -1); - } + return TDB_ERRCODE(TDB_ERR_IO, -1); + } } return 0; } @@ -230,7 +230,7 @@ void tdb_mmap(struct tdb_context *tdb) says to use for mmap expansion */ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition) { - char buf[1024]; + char buf[8192]; if (tdb->read_only || tdb->traverse_read) { tdb->ecode = TDB_ERR_RDONLY; @@ -294,7 +294,7 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad int tdb_expand(struct tdb_context *tdb, tdb_off_t size) { struct list_struct rec; - tdb_off_t offset; + tdb_off_t offset, new_size; if (tdb_lock(tdb, -1, F_WRLCK) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n")); @@ -304,9 +304,11 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size) /* must know about any previous expansions by another process */ tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); - /* always make room for at least 10 more records, and round - the database up to a multiple of the page size */ - size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size; + /* always make room for at least 100 more records, and at + least 25% more space. Round the database up to a multiple + of the page size */ + new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25); + size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size; if (!(tdb->flags & TDB_INTERNAL)) tdb_munmap(tdb); diff --git a/source3/lib/tdb/common/lock.c b/source3/lib/tdb/common/lock.c index e3fe888c46..f156c0fa7b 100644 --- a/source3/lib/tdb/common/lock.c +++ b/source3/lib/tdb/common/lock.c @@ -505,6 +505,9 @@ int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key) /* record lock stops delete underneath */ int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off) { + if (tdb->global_lock.count) { + return 0; + } return off ? tdb->methods->tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0, 1) : 0; } @@ -537,6 +540,10 @@ int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off) struct tdb_traverse_lock *i; uint32_t count = 0; + if (tdb->global_lock.count) { + return 0; + } + if (off == 0) return 0; for (i = &tdb->travlocks; i; i = i->next) diff --git a/source3/lib/tdb/common/open.c b/source3/lib/tdb/common/open.c index 0bd1c91a5e..6bd8fda2bf 100644 --- a/source3/lib/tdb/common/open.c +++ b/source3/lib/tdb/common/open.c @@ -35,7 +35,7 @@ static struct tdb_context *tdbs = NULL; static unsigned int default_tdb_hash(TDB_DATA *key) { uint32_t value; /* Used to compute the hash value. */ - uint32_t i; /* Used to cycle through random values. */ + uint32_t i; /* Used to cycle through random values. */ /* Set the initial value from the key size. */ for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) @@ -90,7 +90,7 @@ static int tdb_new_database(struct tdb_context *tdb, int hash_size) size -= written; written = write(tdb->fd, newdb+written, size); if (written == size) { - ret = 0; + ret = 0; } else if (written >= 0) { /* a second incomplete write - we give up. * guessing the errno... */ @@ -152,6 +152,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int rev = 0, locked = 0; unsigned char *vp; uint32_t vertest; + unsigned v; if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { /* Can't log this */ @@ -215,6 +216,10 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, goto fail; /* errno set by open(2) */ } + /* on exec, don't inherit the fd */ + v = fcntl(tdb->fd, F_GETFD, 0); + fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC); + /* ensure there is only one process initialising at once */ if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n", @@ -242,7 +247,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, /* its not a valid database - possibly initialise it */ if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { if (errno == 0) { - errno = EIO; /* ie bad format or something */ + errno = EIO; /* ie bad format or something */ } goto fail; } diff --git a/source3/lib/tdb/common/tdb.c b/source3/lib/tdb/common/tdb.c index 0e9d1dbd74..bf3abb71ac 100644 --- a/source3/lib/tdb/common/tdb.c +++ b/source3/lib/tdb/common/tdb.c @@ -102,8 +102,7 @@ static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, } /* As tdb_find, but if you succeed, keep the lock */ -tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, - uint32_t hash, int locktype, +tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype, struct list_struct *rec) { uint32_t rec_ptr; @@ -237,14 +236,15 @@ int tdb_exists(struct tdb_context *tdb, TDB_DATA key) } /* actually delete an entry in the database given the offset */ -int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct*rec) +int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec) { tdb_off_t last_ptr, i; struct list_struct lastrec; if (tdb->read_only || tdb->traverse_read) return -1; - if (tdb_write_lock_record(tdb, rec_ptr) == -1) { + if (tdb->traverse_write != 0 || + tdb_write_lock_record(tdb, rec_ptr) == -1) { /* Someone traversing here: mark it as dead */ rec->magic = TDB_DEAD_MAGIC; return tdb_rec_write(tdb, rec_ptr, rec); @@ -666,6 +666,16 @@ int tdb_get_flags(struct tdb_context *tdb) return tdb->flags; } +void tdb_add_flags(struct tdb_context *tdb, unsigned flags) +{ + tdb->flags |= flags; +} + +void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) +{ + tdb->flags &= ~flags; +} + /* enable sequence number handling on an open tdb @@ -674,3 +684,57 @@ void tdb_enable_seqnum(struct tdb_context *tdb) { tdb->flags |= TDB_SEQNUM; } + + +/* + wipe the entire database, deleting all records. This can be done + very fast by using a global lock. The entire data portion of the + file becomes a single entry in the freelist. + */ +int tdb_wipe_all(struct tdb_context *tdb) +{ + int i; + tdb_off_t offset = 0; + ssize_t data_len; + + if (tdb_lockall(tdb) != 0) { + return -1; + } + + /* wipe the hashes */ + for (i=0;i<tdb->header.hash_size;i++) { + if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i)); + goto failed; + } + } + + /* wipe the freelist */ + if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n")); + goto failed; + } + + /* add all the rest of the file to the freelist */ + data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size)) - sizeof(struct list_struct); + if (data_len > 0) { + struct list_struct rec; + memset(&rec,'\0',sizeof(rec)); + rec.rec_len = data_len; + if (tdb_free(tdb, TDB_DATA_START(tdb->header.hash_size), &rec) == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to add free record\n")); + goto failed; + } + } + + if (tdb_unlockall(tdb) != 0) { + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n")); + goto failed; + } + + return 0; + +failed: + tdb_unlockall(tdb); + return -1; +} diff --git a/source3/lib/tdb/common/tdb_private.h b/source3/lib/tdb/common/tdb_private.h index 58c30c1706..63a6d04e72 100644 --- a/source3/lib/tdb/common/tdb_private.h +++ b/source3/lib/tdb/common/tdb_private.h @@ -38,6 +38,10 @@ typedef uint32_t tdb_len_t; typedef uint32_t tdb_off_t; +#ifndef offsetof +#define offsetof(t,f) ((unsigned int)&((t *)0)->f) +#endif + #define TDB_MAGIC_FOOD "TDB file\n" #define TDB_VERSION (0x26011967 + 6) #define TDB_MAGIC (0x26011999U) @@ -54,7 +58,7 @@ typedef uint32_t tdb_off_t; #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r)) #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t)) #define TDB_HASHTABLE_SIZE(tdb) ((tdb->header.hash_size+1)*sizeof(tdb_off_t)) -#define TDB_DATA_START(hash_size) TDB_HASH_TOP(hash_size-1) +#define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t)) #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start) #define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number) #define TDB_PAD_BYTE 0x42 @@ -144,6 +148,7 @@ struct tdb_context { tdb_len_t map_size; /* how much space has been mapped */ int read_only; /* opened read-only */ int traverse_read; /* read-only traversal */ + int traverse_write; /* read-write traversal */ struct tdb_lock_type global_lock; int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ @@ -173,7 +178,6 @@ struct tdb_context { int tdb_munmap(struct tdb_context *tdb); void tdb_mmap(struct tdb_context *tdb); int tdb_lock(struct tdb_context *tdb, int list, int ltype); -int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype); int tdb_unlock(struct tdb_context *tdb, int list, int ltype); int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len); int tdb_transaction_lock(struct tdb_context *tdb, int ltype); diff --git a/source3/lib/tdb/common/transaction.c b/source3/lib/tdb/common/transaction.c index 7eaacf7a16..2da09face0 100644 --- a/source3/lib/tdb/common/transaction.c +++ b/source3/lib/tdb/common/transaction.c @@ -87,13 +87,6 @@ */ -struct tdb_transaction_el { - struct tdb_transaction_el *next, *prev; - tdb_off_t offset; - tdb_len_t length; - unsigned char *data; -}; - /* hold the context of any current transaction */ @@ -105,12 +98,12 @@ struct tdb_transaction { /* the original io methods - used to do IOs to the real db */ const struct tdb_methods *io_methods; - /* the list of transaction elements. We use a doubly linked - list with a last pointer to allow us to keep the list - ordered, with first element at the front of the list. It - needs to be doubly linked as the read/write traversals need - to be backwards, while the commit needs to be forwards */ - struct tdb_transaction_el *elements, *elements_last; + /* the list of transaction blocks. When a block is first + written to, it gets created in this list */ + uint8_t **blocks; + uint32_t num_blocks; + uint32_t block_size; /* bytes in each block */ + uint32_t last_block_size; /* number of valid bytes in the last block */ /* non-zero when an internal transaction error has occurred. All write operations will then fail until the @@ -134,52 +127,48 @@ struct tdb_transaction { static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb_len_t len, int cv) { - struct tdb_transaction_el *el; - - /* we need to walk the list backwards to get the most recent data */ - for (el=tdb->transaction->elements_last;el;el=el->prev) { - tdb_len_t partial; + uint32_t blk; - if (off+len <= el->offset) { - continue; - } - if (off >= el->offset + el->length) { - continue; + /* break it down into block sized ops */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_read(tdb, off, buf, len2, cv) != 0) { + return -1; } + len -= len2; + off += len2; + buf = (void *)(len2 + (char *)buf); + } - /* an overlapping read - needs to be split into up to - 2 reads and a memcpy */ - if (off < el->offset) { - partial = el->offset - off; - if (transaction_read(tdb, off, buf, partial, cv) != 0) { - goto fail; - } - len -= partial; - off += partial; - buf = (void *)(partial + (char *)buf); - } - if (off + len <= el->offset + el->length) { - partial = len; - } else { - partial = el->offset + el->length - off; - } - memcpy(buf, el->data + (off - el->offset), partial); - if (cv) { - tdb_convert(buf, len); - } - len -= partial; - off += partial; - buf = (void *)(partial + (char *)buf); - - if (len != 0 && transaction_read(tdb, off, buf, len, cv) != 0) { + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + + /* see if we have it in the block list */ + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + /* nope, do a real read */ + if (tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv) != 0) { goto fail; } - return 0; } - /* its not in the transaction elements - do a real read */ - return tdb->transaction->io_methods->tdb_read(tdb, off, buf, len, cv); + /* it is in the block list. Now check for the last block */ + if (blk == tdb->transaction->num_blocks-1) { + if (len > tdb->transaction->last_block_size) { + goto fail; + } + } + + /* now copy it out of this block */ + memcpy(buf, tdb->transaction->blocks[blk] + (off % tdb->transaction->block_size), len); + if (cv) { + tdb_convert(buf, len); + } + return 0; fail: TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len)); @@ -195,12 +184,8 @@ fail: static int transaction_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { - struct tdb_transaction_el *el, *best_el=NULL; + uint32_t blk; - if (len == 0) { - return 0; - } - /* if the write is to a hash head, then update the transaction hash heads */ if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP && @@ -209,106 +194,88 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off, memcpy(&tdb->transaction->hash_heads[chain], buf, len); } - /* first see if we can replace an existing entry */ - for (el=tdb->transaction->elements_last;el;el=el->prev) { - tdb_len_t partial; - - if (best_el == NULL && off == el->offset+el->length) { - best_el = el; + /* break it up into block sized chunks */ + while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size); + if (transaction_write(tdb, off, buf, len2) != 0) { + return -1; } - - if (off+len <= el->offset) { - continue; - } - if (off >= el->offset + el->length) { - continue; + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); } + } - /* an overlapping write - needs to be split into up to - 2 writes and a memcpy */ - if (off < el->offset) { - partial = el->offset - off; - if (transaction_write(tdb, off, buf, partial) != 0) { - goto fail; - } - len -= partial; - off += partial; - buf = (const void *)(partial + (const char *)buf); - } - if (off + len <= el->offset + el->length) { - partial = len; + if (len == 0) { + return 0; + } + + blk = off / tdb->transaction->block_size; + off = off % tdb->transaction->block_size; + + if (tdb->transaction->num_blocks <= blk) { + uint8_t **new_blocks; + /* expand the blocks array */ + if (tdb->transaction->blocks == NULL) { + new_blocks = malloc((blk+1)*sizeof(uint8_t *)); } else { - partial = el->offset + el->length - off; + new_blocks = realloc(tdb->transaction->blocks, (blk+1)*sizeof(uint8_t *)); } - memcpy(el->data + (off - el->offset), buf, partial); - len -= partial; - off += partial; - buf = (const void *)(partial + (const char *)buf); - - if (len != 0 && transaction_write(tdb, off, buf, len) != 0) { + if (new_blocks == NULL) { + tdb->ecode = TDB_ERR_OOM; goto fail; } - - return 0; + memset(&new_blocks[tdb->transaction->num_blocks], 0, + (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *)); + tdb->transaction->blocks = new_blocks; + tdb->transaction->num_blocks = blk+1; + tdb->transaction->last_block_size = 0; } - /* see if we can append the new entry to an existing entry */ - if (best_el && best_el->offset + best_el->length == off && - (off+len < tdb->transaction->old_map_size || - off > tdb->transaction->old_map_size)) { - unsigned char *data = best_el->data; - el = best_el; - el->data = (unsigned char *)realloc(el->data, - el->length + len); - if (el->data == NULL) { + /* allocate and fill a block? */ + if (tdb->transaction->blocks[blk] == NULL) { + tdb->transaction->blocks[blk] = (uint8_t *)calloc(tdb->transaction->block_size, 1); + if (tdb->transaction->blocks[blk] == NULL) { tdb->ecode = TDB_ERR_OOM; tdb->transaction->transaction_error = 1; - el->data = data; - return -1; + return -1; } - if (buf) { - memcpy(el->data + el->length, buf, len); - } else { - memset(el->data + el->length, TDB_PAD_BYTE, len); + if (tdb->transaction->old_map_size > blk * tdb->transaction->block_size) { + tdb_len_t len2 = tdb->transaction->block_size; + if (len2 + (blk * tdb->transaction->block_size) > tdb->transaction->old_map_size) { + len2 = tdb->transaction->old_map_size - (blk * tdb->transaction->block_size); + } + if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size, + tdb->transaction->blocks[blk], + len2, 0) != 0) { + SAFE_FREE(tdb->transaction->blocks[blk]); + tdb->ecode = TDB_ERR_IO; + goto fail; + } + if (blk == tdb->transaction->num_blocks-1) { + tdb->transaction->last_block_size = len2; + } } - el->length += len; - return 0; } - - /* add a new entry at the end of the list */ - el = (struct tdb_transaction_el *)malloc(sizeof(*el)); - if (el == NULL) { - tdb->ecode = TDB_ERR_OOM; - tdb->transaction->transaction_error = 1; - return -1; - } - el->next = NULL; - el->prev = tdb->transaction->elements_last; - el->offset = off; - el->length = len; - el->data = (unsigned char *)malloc(len); - if (el->data == NULL) { - free(el); - tdb->ecode = TDB_ERR_OOM; - tdb->transaction->transaction_error = 1; - return -1; - } - if (buf) { - memcpy(el->data, buf, len); + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(tdb->transaction->blocks[blk] + off, 0, len); } else { - memset(el->data, TDB_PAD_BYTE, len); + memcpy(tdb->transaction->blocks[blk] + off, buf, len); } - if (el->prev) { - el->prev->next = el; - } else { - tdb->transaction->elements = el; + if (blk == tdb->transaction->num_blocks-1) { + if (len + off > tdb->transaction->last_block_size) { + tdb->transaction->last_block_size = len + off; + } } - tdb->transaction->elements_last = el; + return 0; fail: - TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", off, len)); - tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n", + (blk*tdb->transaction->block_size) + off, len)); tdb->transaction->transaction_error = 1; return -1; } @@ -419,10 +386,14 @@ int tdb_transaction_start(struct tdb_context *tdb) return -1; } + /* a page at a time seems like a reasonable compromise between compactness and efficiency */ + tdb->transaction->block_size = tdb->page_size; + /* get the transaction write lock. This is a blocking lock. As discussed with Volker, there are a number of ways we could make this async, which we will probably do in the future */ if (tdb_transaction_lock(tdb, F_WRLCK) == -1) { + SAFE_FREE(tdb->transaction->blocks); SAFE_FREE(tdb->transaction); return -1; } @@ -460,21 +431,12 @@ int tdb_transaction_start(struct tdb_context *tdb) tdb->transaction->io_methods = tdb->methods; tdb->methods = &transaction_methods; - /* by calling this transaction write here, we ensure that we don't grow the - transaction linked list due to hash table updates */ - if (transaction_write(tdb, FREELIST_TOP, tdb->transaction->hash_heads, - TDB_HASHTABLE_SIZE(tdb)) != 0) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_start: failed to prime hash table\n")); - tdb->ecode = TDB_ERR_IO; - tdb->methods = tdb->transaction->io_methods; - goto fail; - } - return 0; fail: tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0); tdb_transaction_unlock(tdb); + SAFE_FREE(tdb->transaction->blocks); SAFE_FREE(tdb->transaction->hash_heads); SAFE_FREE(tdb->transaction); return -1; @@ -486,6 +448,8 @@ fail: */ int tdb_transaction_cancel(struct tdb_context *tdb) { + int i; + if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n")); return -1; @@ -499,13 +463,13 @@ int tdb_transaction_cancel(struct tdb_context *tdb) tdb->map_size = tdb->transaction->old_map_size; - /* free all the transaction elements */ - while (tdb->transaction->elements) { - struct tdb_transaction_el *el = tdb->transaction->elements; - tdb->transaction->elements = el->next; - free(el->data); - free(el); + /* free all the transaction blocks */ + for (i=0;i<tdb->transaction->num_blocks;i++) { + if (tdb->transaction->blocks[i] != NULL) { + free(tdb->transaction->blocks[i]); + } } + SAFE_FREE(tdb->transaction->blocks); /* remove any global lock created during the transaction */ if (tdb->global_lock.count != 0) { @@ -515,7 +479,6 @@ int tdb_transaction_cancel(struct tdb_context *tdb) /* remove any locks created during the transaction */ if (tdb->num_locks != 0) { - int i; for (i=0;i<tdb->num_lockrecs;i++) { tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list, F_UNLCK,F_SETLKW, 0, 1); @@ -567,16 +530,24 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t */ static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) { - struct tdb_transaction_el *el; tdb_len_t recovery_size = 0; + int i; recovery_size = sizeof(uint32_t); - for (el=tdb->transaction->elements;el;el=el->next) { - if (el->offset >= tdb->transaction->old_map_size) { + for (i=0;i<tdb->transaction->num_blocks;i++) { + if (i * tdb->transaction->block_size >= tdb->transaction->old_map_size) { + break; + } + if (tdb->transaction->blocks[i] == NULL) { continue; } - recovery_size += 2*sizeof(tdb_off_t) + el->length; - } + recovery_size += 2*sizeof(tdb_off_t); + if (i == tdb->transaction->num_blocks-1) { + recovery_size += tdb->transaction->last_block_size; + } else { + recovery_size += tdb->transaction->block_size; + } + } return recovery_size; } @@ -669,7 +640,6 @@ static int tdb_recovery_allocate(struct tdb_context *tdb, static int transaction_setup_recovery(struct tdb_context *tdb, tdb_off_t *magic_offset) { - struct tdb_transaction_el *el; tdb_len_t recovery_size; unsigned char *data, *p; const struct tdb_methods *methods = tdb->transaction->io_methods; @@ -677,6 +647,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb, tdb_off_t recovery_offset, recovery_max_size; tdb_off_t old_map_size = tdb->transaction->old_map_size; uint32_t magic, tailer; + int i; /* check that the recovery area has enough space @@ -704,30 +675,43 @@ static int transaction_setup_recovery(struct tdb_context *tdb, /* build the recovery data into a single blob to allow us to do a single large write, which should be more efficient */ p = data + sizeof(*rec); - for (el=tdb->transaction->elements;el;el=el->next) { - if (el->offset >= old_map_size) { + for (i=0;i<tdb->transaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; + + if (tdb->transaction->blocks[i] == NULL) { continue; } - if (el->offset + el->length > tdb->transaction->old_map_size) { + + offset = i * tdb->transaction->block_size; + length = tdb->transaction->block_size; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (offset >= old_map_size) { + continue; + } + if (offset + length > tdb->transaction->old_map_size) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_setup_recovery: transaction data over new region boundary\n")); free(data); tdb->ecode = TDB_ERR_CORRUPT; return -1; } - memcpy(p, &el->offset, 4); - memcpy(p+4, &el->length, 4); + memcpy(p, &offset, 4); + memcpy(p+4, &length, 4); if (DOCONV()) { tdb_convert(p, 8); } /* the recovery area contains the old data, not the new data, so we have to call the original tdb_read method to get it */ - if (methods->tdb_read(tdb, el->offset, p + 8, el->length, 0) != 0) { + if (methods->tdb_read(tdb, offset, p + 8, length, 0) != 0) { free(data); tdb->ecode = TDB_ERR_IO; return -1; } - p += 8 + el->length; + p += 8 + length; } /* and the tailer */ @@ -780,6 +764,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) const struct tdb_methods *methods; tdb_off_t magic_offset = 0; uint32_t zero = 0; + int i; if (tdb->transaction == NULL) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n")); @@ -799,7 +784,7 @@ int tdb_transaction_commit(struct tdb_context *tdb) } /* check for a null transaction */ - if (tdb->transaction->elements == NULL) { + if (tdb->transaction->blocks == NULL) { tdb_transaction_cancel(tdb); return 0; } @@ -858,10 +843,21 @@ int tdb_transaction_commit(struct tdb_context *tdb) } /* perform all the writes */ - while (tdb->transaction->elements) { - struct tdb_transaction_el *el = tdb->transaction->elements; + for (i=0;i<tdb->transaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; - if (methods->tdb_write(tdb, el->offset, el->data, el->length) == -1) { + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * tdb->transaction->block_size; + length = tdb->transaction->block_size; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (methods->tdb_write(tdb, offset, tdb->transaction->blocks[i], length) == -1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n")); /* we've overwritten part of the data and @@ -876,11 +872,12 @@ int tdb_transaction_commit(struct tdb_context *tdb) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n")); return -1; } - tdb->transaction->elements = el->next; - free(el->data); - free(el); + SAFE_FREE(tdb->transaction->blocks[i]); } + SAFE_FREE(tdb->transaction->blocks); + tdb->transaction->num_blocks = 0; + if (!(tdb->flags & TDB_NOSYNC)) { /* ensure the new data is on disk */ if (transaction_sync(tdb, 0, tdb->map_size) == -1) { diff --git a/source3/lib/tdb/common/traverse.c b/source3/lib/tdb/common/traverse.c index 27b2cfc54a..07b0c23858 100644 --- a/source3/lib/tdb/common/traverse.c +++ b/source3/lib/tdb/common/traverse.c @@ -241,7 +241,9 @@ int tdb_traverse(struct tdb_context *tdb, return -1; } + tdb->traverse_write++; ret = tdb_traverse_internal(tdb, fn, private_data, &tl); + tdb->traverse_write--; tdb_transaction_unlock(tdb); @@ -333,3 +335,4 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); return key; } + diff --git a/source3/lib/tdb/configure.ac b/source3/lib/tdb/configure.ac index de8fc9c2b5..1085055bc9 100644 --- a/source3/lib/tdb/configure.ac +++ b/source3/lib/tdb/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.50) AC_DEFUN([SMB_MODULE_DEFAULT], [echo -n ""]) AC_DEFUN([SMB_LIBRARY_ENABLE], [echo -n ""]) AC_DEFUN([SMB_ENABLE], [echo -n ""]) -AC_INIT(tdb, 1.1.0) +AC_INIT(tdb, 1.1.1) AC_CONFIG_SRCDIR([common/tdb.c]) AC_CONFIG_HEADER(include/config.h) AC_LIBREPLACE_ALL_CHECKS diff --git a/source3/lib/tdb/include/tdb.h b/source3/lib/tdb/include/tdb.h index fce6628b84..0008085de5 100644 --- a/source3/lib/tdb/include/tdb.h +++ b/source3/lib/tdb/include/tdb.h @@ -135,6 +135,8 @@ int tdb_get_seqnum(struct tdb_context *tdb); int tdb_hash_size(struct tdb_context *tdb); size_t tdb_map_size(struct tdb_context *tdb); int tdb_get_flags(struct tdb_context *tdb); +void tdb_add_flags(struct tdb_context *tdb, unsigned flag); +void tdb_remove_flags(struct tdb_context *tdb, unsigned flag); void tdb_enable_seqnum(struct tdb_context *tdb); void tdb_increment_seqnum_nonblock(struct tdb_context *tdb); @@ -153,6 +155,8 @@ void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *sigptr) void tdb_dump_all(struct tdb_context *tdb); int tdb_printfreelist(struct tdb_context *tdb); int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries); +int tdb_wipe_all(struct tdb_context *tdb); +int tdb_freelist_size(struct tdb_context *tdb); extern TDB_DATA tdb_null; diff --git a/source3/lib/util.c b/source3/lib/util.c index 25b2700ae3..0653fc9d3f 100644 --- a/source3/lib/util.c +++ b/source3/lib/util.c @@ -897,67 +897,6 @@ int set_blocking(int fd, bool set) #undef FLAG_TO_SET } -/**************************************************************************** - Transfer some data between two fd's. -****************************************************************************/ - -#ifndef TRANSFER_BUF_SIZE -#define TRANSFER_BUF_SIZE 65536 -#endif - -ssize_t transfer_file_internal(int infd, int outfd, size_t n, ssize_t (*read_fn)(int, void *, size_t), - ssize_t (*write_fn)(int, const void *, size_t)) -{ - char *buf; - size_t total = 0; - ssize_t read_ret; - ssize_t write_ret; - size_t num_to_read_thistime; - size_t num_written = 0; - - if ((buf = SMB_MALLOC_ARRAY(char, TRANSFER_BUF_SIZE)) == NULL) - return -1; - - while (total < n) { - num_to_read_thistime = MIN((n - total), TRANSFER_BUF_SIZE); - - read_ret = (*read_fn)(infd, buf, num_to_read_thistime); - if (read_ret == -1) { - DEBUG(0,("transfer_file_internal: read failure. Error = %s\n", strerror(errno) )); - SAFE_FREE(buf); - return -1; - } - if (read_ret == 0) - break; - - num_written = 0; - - while (num_written < read_ret) { - write_ret = (*write_fn)(outfd,buf + num_written, read_ret - num_written); - - if (write_ret == -1) { - DEBUG(0,("transfer_file_internal: write failure. Error = %s\n", strerror(errno) )); - SAFE_FREE(buf); - return -1; - } - if (write_ret == 0) - return (ssize_t)total; - - num_written += (size_t)write_ret; - } - - total += (size_t)read_ret; - } - - SAFE_FREE(buf); - return (ssize_t)total; -} - -SMB_OFF_T transfer_file(int infd,int outfd,SMB_OFF_T n) -{ - return (SMB_OFF_T)transfer_file_internal(infd, outfd, (size_t)n, sys_read, sys_write); -} - /******************************************************************* Sleep for a specified number of milliseconds. ********************************************************************/ diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 7e21fe1195..3e3268104c 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -24,6 +24,17 @@ #include "includes.h" +char toupper_ascii_fast_table[128] = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f +}; + /** * @file * @brief String utilities. @@ -187,8 +198,8 @@ int StrCaseCmp(const char *s, const char *t) * from here on in */ break; - us = toupper_ascii(*ps); - ut = toupper_ascii(*pt); + us = toupper_ascii_fast(*ps); + ut = toupper_ascii_fast(*pt); if (us == ut) continue; else if (us < ut) @@ -246,8 +257,8 @@ int StrnCaseCmp(const char *s, const char *t, size_t len) * hard way from here on in */ break; - us = toupper_ascii(*ps); - ut = toupper_ascii(*pt); + us = toupper_ascii_fast(*ps); + ut = toupper_ascii_fast(*pt); if (us == ut) continue; else if (us < ut) @@ -1679,7 +1690,7 @@ void strupper_m(char *s) (ie. they match for the first 128 chars) */ while (*s && !(((unsigned char)s[0]) & 0x80)) { - *s = toupper_ascii((unsigned char)*s); + *s = toupper_ascii_fast((unsigned char)*s); s++; } diff --git a/source3/lib/util_transfer_file.c b/source3/lib/util_transfer_file.c new file mode 100644 index 0000000000..1e3b76fc77 --- /dev/null +++ b/source3/lib/util_transfer_file.c @@ -0,0 +1,110 @@ +/* + * Unix SMB/CIFS implementation. + * Utility functions to transfer files. + * + * Copyright (C) Jeremy Allison 2001-2002 + * Copyright (C) Michael Adam 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <includes.h> + +/**************************************************************************** + Transfer some data between two fd's. +****************************************************************************/ + +#ifndef TRANSFER_BUF_SIZE +#define TRANSFER_BUF_SIZE 65536 +#endif + + +ssize_t transfer_file_internal(void *in_file, + void *out_file, + size_t n, + ssize_t (*read_fn)(void *, void *, size_t), + ssize_t (*write_fn)(void *, const void *, size_t)) +{ + char *buf; + size_t total = 0; + ssize_t read_ret; + ssize_t write_ret; + size_t num_to_read_thistime; + size_t num_written = 0; + + if ((buf = SMB_MALLOC_ARRAY(char, TRANSFER_BUF_SIZE)) == NULL) { + return -1; + } + + while (total < n) { + num_to_read_thistime = MIN((n - total), TRANSFER_BUF_SIZE); + + read_ret = (*read_fn)(in_file, buf, num_to_read_thistime); + if (read_ret == -1) { + DEBUG(0,("transfer_file_internal: read failure. " + "Error = %s\n", strerror(errno) )); + SAFE_FREE(buf); + return -1; + } + if (read_ret == 0) { + break; + } + + num_written = 0; + + while (num_written < read_ret) { + write_ret = (*write_fn)(out_file, buf + num_written, + read_ret - num_written); + + if (write_ret == -1) { + DEBUG(0,("transfer_file_internal: " + "write failure. Error = %s\n", + strerror(errno) )); + SAFE_FREE(buf); + return -1; + } + if (write_ret == 0) { + return (ssize_t)total; + } + + num_written += (size_t)write_ret; + } + + total += (size_t)read_ret; + } + + SAFE_FREE(buf); + return (ssize_t)total; +} + +static ssize_t sys_read_fn(void *file, void *buf, size_t len) +{ + int *fd = (int *)file; + + return sys_read(*fd, buf, len); +} + +static ssize_t sys_write_fn(void *file, const void *buf, size_t len) +{ + int *fd = (int *)file; + + return sys_write(*fd, buf, len); +} + +SMB_OFF_T transfer_file(int infd, int outfd, SMB_OFF_T n) +{ + return (SMB_OFF_T)transfer_file_internal(&infd, &outfd, (size_t)n, + sys_read_fn, sys_write_fn); +} |