From 1be6d849ab9d2c992dfa94419260fc28cf573d87 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 15 Feb 2012 16:38:43 +0100 Subject: s3-g_lock: Use dbwrap_record_watch_send/recv This simplifies the g_lock implementation. The new implementation tries to acquire a lock. If that fails due to a lock conflict, wait for the g_lock record to change. Upon change, just try again. The old logic had to cope with pending records and an ugly hack into ctdb itself. As a bonus, we now get a really clean async g_lock_lock_send/recv that can asynchronously wait for a global lock. This would have been almost impossible to do without the dbwrap_record_watch infrastructure. --- source3/lib/g_lock.c | 616 ++++++++++++++------------------------------------- 1 file changed, 170 insertions(+), 446 deletions(-) (limited to 'source3/lib') diff --git a/source3/lib/g_lock.c b/source3/lib/g_lock.c index 1011584a25..06a14fac6c 100644 --- a/source3/lib/g_lock.c +++ b/source3/lib/g_lock.c @@ -21,16 +21,15 @@ #include "system/filesys.h" #include "dbwrap/dbwrap.h" #include "dbwrap/dbwrap_open.h" +#include "dbwrap/dbwrap_watch.h" #include "g_lock.h" #include "util_tdb.h" #include "ctdbd_conn.h" #include "../lib/util/select.h" +#include "../lib/util/tevent_ntstatus.h" #include "system/select.h" #include "messages.h" -static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, - struct server_id pid); - struct g_lock_ctx { struct db_context *db; struct messaging_context *msg; @@ -39,7 +38,7 @@ struct g_lock_ctx { /* * The "g_lock.tdb" file contains records, indexed by the 0-terminated * lockname. The record contains an array of "struct g_lock_rec" - * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed. + * structures. */ struct g_lock_rec { @@ -67,453 +66,248 @@ struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx, TALLOC_FREE(result); return NULL; } + dbwrap_watch_db(result->db, msg); return result; } -static bool g_lock_conflicts(enum g_lock_type lock_type, - const struct g_lock_rec *rec) +static bool g_lock_conflicts(enum g_lock_type l1, enum g_lock_type l2) { - enum g_lock_type rec_lock = rec->lock_type; - - if ((rec_lock & G_LOCK_PENDING) != 0) { - return false; - } - /* * Only tested write locks so far. Very likely this routine * needs to be fixed for read locks.... */ - if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) { + if ((l1 == G_LOCK_READ) && (l2 == G_LOCK_READ)) { return false; } return true; } static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data, - int *pnum_locks, struct g_lock_rec **plocks) + unsigned *pnum_locks, struct g_lock_rec **plocks) { - int i, num_locks; + unsigned num_locks; struct g_lock_rec *locks; if ((data.dsize % sizeof(struct g_lock_rec)) != 0) { DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize)); return false; } - num_locks = data.dsize / sizeof(struct g_lock_rec); - locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks); + locks = talloc_memdup(mem_ctx, data.dptr, data.dsize); if (locks == NULL) { - DEBUG(1, ("talloc failed\n")); + DEBUG(1, ("talloc_memdup failed\n")); return false; } - - memcpy(locks, data.dptr, data.dsize); - - DEBUG(10, ("locks:\n")); - for (i=0; istore failed: %s\n", + nt_errstr(store_status))); + status = store_status; + } + } + TALLOC_FREE(locks); + return status; } -static void g_lock_got_retry(struct messaging_context *msg, - void *private_data, - uint32_t msg_type, - struct server_id server_id, - DATA_BLOB *data); +struct g_lock_lock_state { + struct tevent_context *ev; + struct g_lock_ctx *ctx; + const char *name; + enum g_lock_type type; +}; + +static void g_lock_lock_retry(struct tevent_req *subreq); -static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name, - enum g_lock_type lock_type) +struct tevent_req *g_lock_lock_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct g_lock_ctx *ctx, + const char *name, + enum g_lock_type type) { - struct db_record *rec = NULL; - struct g_lock_rec *locks = NULL; - int i, num_locks; + struct tevent_req *req, *subreq; + struct g_lock_lock_state *state; + struct db_record *rec; struct server_id self; - int our_index; - TDB_DATA data; - NTSTATUS status = NT_STATUS_OK; - NTSTATUS store_status; - TDB_DATA value; + NTSTATUS status; + + req = tevent_req_create(mem_ctx, &state, struct g_lock_lock_state); + if (req == NULL) { + return NULL; + } + state->ev = ev; + state->ctx = ctx; + state->name = name; + state->type = type; -again: rec = dbwrap_fetch_locked(ctx->db, talloc_tos(), - string_term_tdb_data(name)); + string_term_tdb_data(state->name)); if (rec == NULL) { DEBUG(10, ("fetch_locked(\"%s\") failed\n", name)); - status = NT_STATUS_LOCK_NOT_GRANTED; - goto done; + tevent_req_nterror(req, NT_STATUS_LOCK_NOT_GRANTED); + return tevent_req_post(req, ev); } - value = dbwrap_record_get_value(rec); - if (!g_lock_parse(talloc_tos(), value, &num_locks, &locks)) { - DEBUG(10, ("g_lock_parse for %s failed\n", name)); - status = NT_STATUS_INTERNAL_ERROR; - goto done; - } - - self = messaging_server_id(ctx->msg); - our_index = -1; + self = messaging_server_id(state->ctx->msg); - for (i=0; itype); + if (NT_STATUS_IS_OK(status)) { + TALLOC_FREE(rec); + tevent_req_done(req); + return tevent_req_post(req, ev); } - - if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) { - /* - * Walk through the list of locks, search for dead entries - */ - g_lock_cleanup(&num_locks, locks); - } - - data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks)); - store_status = dbwrap_record_store(rec, data, 0); - if (!NT_STATUS_IS_OK(store_status)) { - DEBUG(1, ("rec->store failed: %s\n", - nt_errstr(store_status))); - status = store_status; + if (!NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED)) { + TALLOC_FREE(rec); + tevent_req_nterror(req, status); + return tevent_req_post(req, ev); } - -done: - TALLOC_FREE(locks); + subreq = dbwrap_record_watch_send(state, state->ev, rec, + state->ctx->msg); TALLOC_FREE(rec); - - if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) { - return STATUS_PENDING; + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); } - - return NT_STATUS_OK; + tevent_req_set_callback(subreq, g_lock_lock_retry, req); + return req; } -NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, - enum g_lock_type lock_type, struct timeval timeout) +static void g_lock_lock_retry(struct tevent_req *subreq) { - struct tevent_timer *te = NULL; + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct g_lock_lock_state *state = tevent_req_data( + req, struct g_lock_lock_state); + struct server_id self = messaging_server_id(state->ctx->msg); + struct db_record *rec; NTSTATUS status; - bool retry = false; - struct timeval timeout_end; - struct timeval time_now; - - DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type, - name)); - if (lock_type & ~1) { - DEBUG(1, ("Got invalid lock type %d for %s\n", - (int)lock_type, name)); - return NT_STATUS_INVALID_PARAMETER; + status = dbwrap_record_watch_recv(subreq, talloc_tos(), &rec); + TALLOC_FREE(subreq); + if (tevent_req_nterror(req, status)) { + return; } - -#ifdef CLUSTER_SUPPORT - if (lp_clustering()) { - status = ctdb_watch_us(messaging_ctdbd_connection()); - if (!NT_STATUS_IS_OK(status)) { - DEBUG(10, ("could not register retry with ctdb: %s\n", - nt_errstr(status))); - goto done; - } + status = g_lock_trylock(rec, self, state->type); + if (NT_STATUS_IS_OK(status)) { + TALLOC_FREE(rec); + tevent_req_done(req); + return; } -#endif - - status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY, - g_lock_got_retry); - if (!NT_STATUS_IS_OK(status)) { - DEBUG(10, ("messaging_register failed: %s\n", - nt_errstr(status))); - return status; + if (!NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED)) { + TALLOC_FREE(rec); + tevent_req_nterror(req, status); + return; } - - time_now = timeval_current(); - timeout_end = timeval_sum(&time_now, &timeout); - - while (true) { - struct pollfd *pollfds; - int num_pollfds; - int saved_errno; - int ret; - struct timeval timeout_remaining, select_timeout; - - status = g_lock_trylock(ctx, name, lock_type); - if (NT_STATUS_IS_OK(status)) { - DEBUG(10, ("Got lock %s\n", name)); - break; - } - if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) { - DEBUG(10, ("g_lock_trylock failed: %s\n", - nt_errstr(status))); - break; - } - - DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n")); - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * !!! HACK ALERT --- FIX ME !!! - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * What we really want to do here is to react to - * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent - * by a client doing g_lock_unlock or by ourselves when - * we receive a CTDB_SRVID_SAMBA_NOTIFY or - * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when - * either a client holding a lock or a complete node - * has died. - * - * Doing this properly involves calling tevent_loop_once(), - * but doing this here with the main ctdbd messaging context - * creates a nested event loop when g_lock_lock() is called - * from the main event loop, e.g. in a tcon_and_X where the - * share_info.tdb needs to be initialized and is locked by - * another process, or when the remore registry is accessed - * for writing and some other process already holds a lock - * on the registry.tdb. - * - * So as a quick fix, we act a little coarsely here: we do - * a select on the ctdb connection fd and when it is readable - * or we get EINTR, then we retry without actually parsing - * any ctdb packages or dispatching messages. This means that - * we retry more often than intended by design, but this does - * not harm and it is unobtrusive. When we have finished, - * the main loop will pick up all the messages and ctdb - * packets. The only extra twist is that we cannot use timed - * events here but have to handcode a timeout. - */ - - /* - * We allocate 1 entries here. In the clustering case - * we might have to add the ctdb fd. This avoids the - * realloc then. - */ - pollfds = talloc_array(talloc_tos(), struct pollfd, 1); - if (pollfds == NULL) { - status = NT_STATUS_NO_MEMORY; - break; - } - num_pollfds = 0; - -#ifdef CLUSTER_SUPPORT - if (lp_clustering()) { - struct ctdbd_connection *conn; - conn = messaging_ctdbd_connection(); - - pollfds[0].fd = ctdbd_conn_get_fd(conn); - pollfds[0].events = POLLIN|POLLHUP; - - num_pollfds += 1; - } -#endif - - time_now = timeval_current(); - timeout_remaining = timeval_until(&time_now, &timeout_end); - select_timeout = timeval_set(60, 0); - - select_timeout = timeval_min(&select_timeout, - &timeout_remaining); - - ret = poll(pollfds, num_pollfds, - timeval_to_msec(select_timeout)); - - /* - * We're not *really interested in the actual flags. We just - * need to retry this whole thing. - */ - saved_errno = errno; - TALLOC_FREE(pollfds); - errno = saved_errno; - - if (ret == -1) { - if (errno != EINTR) { - DEBUG(1, ("error calling select: %s\n", - strerror(errno))); - status = NT_STATUS_INTERNAL_ERROR; - break; - } - /* - * errno == EINTR: - * This means a signal was received. - * It might have been a MSG_DBWRAP_G_LOCK_RETRY message. - * ==> retry - */ - } else if (ret == 0) { - if (timeval_expired(&timeout_end)) { - DEBUG(10, ("g_lock_lock timed out\n")); - status = NT_STATUS_LOCK_NOT_GRANTED; - break; - } else { - DEBUG(10, ("select returned 0 but timeout not " - "not expired, retrying\n")); - } - } else if (ret != 1) { - DEBUG(1, ("invalid return code of select: %d\n", ret)); - status = NT_STATUS_INTERNAL_ERROR; - break; - } - /* - * ret == 1: - * This means ctdbd has sent us some data. - * Might be a CTDB_SRVID_RECONFIGURE or a - * CTDB_SRVID_SAMBA_NOTIFY message. - * ==> retry - */ - } - -#ifdef CLUSTER_SUPPORT -done: -#endif - - if (!NT_STATUS_IS_OK(status)) { - NTSTATUS unlock_status; - - unlock_status = g_lock_unlock(ctx, name); - - if (!NT_STATUS_IS_OK(unlock_status)) { - DEBUG(1, ("Could not remove ourself from the locking " - "db: %s\n", nt_errstr(status))); - } + subreq = dbwrap_record_watch_send(state, state->ev, rec, + state->ctx->msg); + if (tevent_req_nomem(subreq, req)) { + return; } + tevent_req_set_callback(subreq, g_lock_lock_retry, req); + return; - messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry); - TALLOC_FREE(te); - - return status; } -static void g_lock_got_retry(struct messaging_context *msg, - void *private_data, - uint32_t msg_type, - struct server_id server_id, - DATA_BLOB *data) +NTSTATUS g_lock_lock_recv(struct tevent_req *req) { - bool *pretry = (bool *)private_data; + return tevent_req_simple_recv_ntstatus(req); +} - DEBUG(10, ("Got retry message from pid %s\n", - server_id_str(talloc_tos(), &server_id))); +NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name, + enum g_lock_type type, struct timeval timeout) +{ + TALLOC_CTX *frame = talloc_stackframe(); + struct tevent_context *ev; + struct tevent_req *req; + struct timeval end; + NTSTATUS status = NT_STATUS_NO_MEMORY; - *pretry = true; + ev = tevent_context_init(frame); + if (ev == NULL) { + goto fail; + } + req = g_lock_lock_send(frame, ev, ctx, name, type); + if (req == NULL) { + goto fail; + } + end = timeval_current_ofs(timeout.tv_sec, timeout.tv_usec); + if (!tevent_req_set_endtime(req, ev, end)) { + goto fail; + } + if (!tevent_req_poll_ntstatus(req, ev, &status)) { + goto fail; + } + status = g_lock_lock_recv(req); + fail: + TALLOC_FREE(frame); + return status; } -static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, - struct server_id pid) +NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name) { + struct server_id self = messaging_server_id(ctx->msg); struct db_record *rec = NULL; struct g_lock_rec *locks = NULL; - int i, num_locks; - enum g_lock_type lock_type; + unsigned i, num_locks; NTSTATUS status; TDB_DATA value; @@ -532,24 +326,18 @@ static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name, status = NT_STATUS_FILE_INVALID; goto done; } - for (i=0; imsg, locks[i].pid, - MSG_DBWRAP_G_LOCK_RETRY, - &data_blob_null); - if (!NT_STATUS_IS_OK(status)) { - DEBUG(1, ("sending retry to %s failed: %s\n", - server_id_str(talloc_tos(), - &locks[i].pid), - nt_errstr(status))); - } else { - num_wakeups += 1; - } - if (num_wakeups > 5) { - break; - } - } - } + status = NT_STATUS_OK; done: - /* - * For the error path, TALLOC_FREE(rec) as well. In the good - * path we have already freed it. - */ TALLOC_FREE(rec); - TALLOC_FREE(locks); return status; } -NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name) -{ - NTSTATUS status; - - status = g_lock_force_unlock(ctx, name, messaging_server_id(ctx->msg)); - -#ifdef CLUSTER_SUPPORT - if (lp_clustering()) { - ctdb_unwatch(messaging_ctdbd_connection()); - } -#endif - return status; -} - struct g_lock_locks_state { int (*fn)(const char *name, void *private_data); void *private_data; @@ -676,7 +405,7 @@ NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name, void *private_data) { TDB_DATA data; - int i, num_locks; + unsigned i, num_locks; struct g_lock_rec *locks = NULL; bool ret; NTSTATUS status; @@ -717,11 +446,6 @@ static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type, void *priv) { struct g_lock_get_state *state = (struct g_lock_get_state *)priv; - - if ((lock_type & G_LOCK_PENDING) != 0) { - return 0; - } - state->found = true; *state->pid = pid; return 1; -- cgit