From 650d81b252cc669ef848448afad7e9bb79c4f20e Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 21 Apr 2007 07:23:42 +0000 Subject: r22421: merged in latest ctdb changes from bzr (This used to be commit 3633f862b966866819c9a0a6ad0238a858e15e62) --- source4/cluster/ctdb/common/ctdb_ltdb.c | 144 ++++++++++++++++++++++++++++++-- 1 file changed, 136 insertions(+), 8 deletions(-) (limited to 'source4/cluster/ctdb/common/ctdb_ltdb.c') diff --git a/source4/cluster/ctdb/common/ctdb_ltdb.c b/source4/cluster/ctdb/common/ctdb_ltdb.c index 785ccad9b3..cb07a72375 100644 --- a/source4/cluster/ctdb/common/ctdb_ltdb.c +++ b/source4/cluster/ctdb/common/ctdb_ltdb.c @@ -45,9 +45,8 @@ struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *na /* this is the dummy null procedure that all databases support */ -static int ctdb_fetch_func(struct ctdb_call_info *call) +static int ctdb_null_func(struct ctdb_call_info *call) { - call->reply_data = &call->record_data; return 0; } @@ -82,10 +81,21 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, } } + if (mkdir(ctdb->db_directory, 0700) == -1 && errno != EEXIST) { + DEBUG(0,(__location__ " Unable to create ctdb directory '%s'\n", + ctdb->db_directory)); + talloc_free(ctdb_db); + return NULL; + } + + /* add the node id to the database name, so when we run on loopback + we don't conflict in the local filesystem */ + name = talloc_asprintf(ctdb_db, "%s/%s", ctdb->db_directory, name); + /* when we have a separate daemon this will need to be a real file, not a TDB_INTERNAL, so the parent can access it to for ltdb bypass */ - ctdb_db->ltdb = tdb_wrap_open(ctdb, name, 0, TDB_INTERNAL, open_flags, mode); + ctdb_db->ltdb = tdb_wrap_open(ctdb, name, 0, TDB_CLEAR_IF_FIRST, open_flags, mode); if (ctdb_db->ltdb == NULL) { ctdb_set_error(ctdb, "Failed to open tdb %s\n", name); talloc_free(ctdb_db); @@ -94,9 +104,10 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, /* - all databases support the "fetch" function. we need this in order to do forced migration of records + all databases support the "null" function. we need this in + order to do forced migration of records */ - ret = ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC); + ret = ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC); if (ret != 0) { talloc_free(ctdb_db); return NULL; @@ -145,13 +156,15 @@ int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, rec = tdb_fetch(ctdb_db->ltdb->tdb, key); if (rec.dsize < sizeof(*header)) { + TDB_DATA d2; /* return an initial header */ - free(rec.dptr); + if (rec.dptr) free(rec.dptr); ltdb_initial_header(ctdb_db, key, header); + ZERO_STRUCT(d2); if (data) { - data->dptr = NULL; - data->dsize = 0; + *data = d2; } + ctdb_ltdb_store(ctdb_db, key, header, d2); return 0; } @@ -215,3 +228,118 @@ int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key) return tdb_chainunlock(ctdb_db->ltdb->tdb, key); } +struct lock_fetch_state { + struct ctdb_context *ctdb; + void (*recv_pkt)(void *, uint8_t *, uint32_t); + void *recv_context; + struct ctdb_req_header *hdr; +}; + +/* + called when we should retry the operation + */ +static void lock_fetch_callback(void *p) +{ + struct lock_fetch_state *state = talloc_get_type(p, struct lock_fetch_state); + state->recv_pkt(state->recv_context, (uint8_t *)state->hdr, state->hdr->length); + talloc_free(state); + DEBUG(2,(__location__ " PACKET REQUEUED\n")); +} + + +/* + do a non-blocking ltdb_lock, deferring this ctdb request until we + have the chainlock + + It does the following: + + 1) tries to get the chainlock. If it succeeds, then it returns 0 + + 2) if it fails to get a chainlock immediately then it sets up a + non-blocking chainlock via ctdb_lockwait, and when it gets the + chainlock it re-submits this ctdb request to the main packet + receive function + + This effectively queues all ctdb requests that cannot be + immediately satisfied until it can get the lock. This means that + the main ctdb daemon will not block waiting for a chainlock held by + a client + + There are 3 possible return values: + + 0: means that it got the lock immediately. + -1: means that it failed to get the lock, and won't retry + -2: means that it failed to get the lock immediately, but will retry + */ +int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db, + TDB_DATA key, struct ctdb_req_header *hdr, + void (*recv_pkt)(void *, uint8_t *, uint32_t ), + void *recv_context) +{ + int ret; + struct tdb_context *tdb = ctdb_db->ltdb->tdb; + struct lockwait_handle *h; + struct lock_fetch_state *state; + + ret = tdb_chainlock_nonblock(tdb, key); + + if (ret != 0 && + !(errno == EACCES || errno == EAGAIN || errno == EDEADLK)) { + /* a hard failure - don't try again */ + return -1; + } + + /* when torturing, ensure we test the contended path */ + if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) && + random() % 5 == 0) { + ret = -1; + tdb_chainunlock(tdb, key); + } + + /* first the non-contended path */ + if (ret == 0) { + return 0; + } + + state = talloc(ctdb_db, struct lock_fetch_state); + state->ctdb = ctdb_db->ctdb; + state->hdr = hdr; + state->recv_pkt = recv_pkt; + state->recv_context = recv_context; + + /* now the contended path */ + h = ctdb_lockwait(ctdb_db, key, lock_fetch_callback, state); + if (h == NULL) { + tdb_chainunlock(tdb, key); + return -1; + } + + /* we need to move the packet off the temporary context in ctdb_recv_pkt(), + so it won't be freed yet */ + talloc_steal(state, hdr); + talloc_steal(state, h); + + /* now tell the caller than we will retry asynchronously */ + return -2; +} + +/* + a varient of ctdb_ltdb_lock_requeue that also fetches the record + */ +int ctdb_ltdb_lock_fetch_requeue(struct ctdb_db_context *ctdb_db, + TDB_DATA key, struct ctdb_ltdb_header *header, + struct ctdb_req_header *hdr, TDB_DATA *data, + void (*recv_pkt)(void *, uint8_t *, uint32_t ), + void *recv_context) +{ + int ret; + + ret = ctdb_ltdb_lock_requeue(ctdb_db, key, hdr, recv_pkt, recv_context); + if (ret == 0) { + ret = ctdb_ltdb_fetch(ctdb_db, key, header, hdr, data); + if (ret != 0) { + ctdb_ltdb_unlock(ctdb_db, key); + } + } + return ret; +} -- cgit