From 5dcf20961ee52eca4eb0bb4ab0e20ebd547c8058 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Tue, 5 Aug 2008 11:32:20 +0200 Subject: dbwrap ctdb: add a retry loop to the persistent store operation. This is because ctdbd can fail in performing the persistent_store due to race conditions, and this does not mean it can't succeed the next time. To not loop infinitely, this makes use of a new parametric option: "dbwrap ctdb:max store retries" (integer) which defaults to 5 and sets the upper limit for the number or repeats of the fetch/store cycle. Michael (This used to be commit 2bcc9e6ecef876030e552a607d92597f60203db2) --- source3/lib/dbwrap_ctdb.c | 171 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 120 insertions(+), 51 deletions(-) (limited to 'source3') diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c index bb56b66bdc..bdcec3985d 100644 --- a/source3/lib/dbwrap_ctdb.c +++ b/source3/lib/dbwrap_ctdb.c @@ -33,6 +33,11 @@ struct db_ctdb_rec { struct ctdb_ltdb_header header; }; +static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx, + TALLOC_CTX *mem_ctx, + TDB_DATA key, + bool persistent); + static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag) { struct db_ctdb_rec *crec = talloc_get_type_abort( @@ -62,61 +67,116 @@ static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag) store */ static NTSTATUS db_ctdb_store_persistent(struct db_record *rec, TDB_DATA data, int flag) { - struct db_ctdb_rec *crec = talloc_get_type_abort( - rec->private_data, struct db_ctdb_rec); + struct db_ctdb_rec *crec; + struct db_record *record; TDB_DATA cdata; int ret; NTSTATUS status; + uint32_t count; + int max_retries = lp_parm_int(-1, "dbwrap ctdb", "max store retries", 5); + + for (count = 0, status = NT_STATUS_UNSUCCESSFUL, record = rec; + (count < max_retries) && !NT_STATUS_IS_OK(status); + count++) + { + if (count > 0) { + /* retry */ + /* + * There is a hack here: We use rec as a memory + * context and re-use it as the record struct ptr. + * We don't free the record data allocated + * in each turn. So all gets freed when the caller + * releases the original record. This is because + * we don't get the record passed in by reference + * in the first place and the caller relies on + * having to free the record himself. + */ + record = fetch_locked_internal(crec->ctdb_ctx, + rec, + rec->key, + true /* persistent */); + if (record == NULL) { + DEBUG(5, ("fetch_locked_internal failed.\n")); + status = NT_STATUS_NO_MEMORY; + break; + } + } - cdata.dsize = sizeof(crec->header) + data.dsize; + crec = talloc_get_type_abort(record->private_data, + struct db_ctdb_rec); - if (!(cdata.dptr = SMB_MALLOC_ARRAY(uint8, cdata.dsize))) { - return NT_STATUS_NO_MEMORY; - } + cdata.dsize = sizeof(crec->header) + data.dsize; - crec->header.rsn++; + if (!(cdata.dptr = SMB_MALLOC_ARRAY(uint8, cdata.dsize))) { + return NT_STATUS_NO_MEMORY; + } - memcpy(cdata.dptr, &crec->header, sizeof(crec->header)); - memcpy(cdata.dptr + sizeof(crec->header), data.dptr, data.dsize); + crec->header.rsn++; - status = ctdbd_start_persistent_update(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata); - - if (NT_STATUS_IS_OK(status)) { - ret = tdb_store(crec->ctdb_ctx->wtdb->tdb, rec->key, cdata, TDB_REPLACE); - status = (ret == 0) ? NT_STATUS_OK : NT_STATUS_INTERNAL_DB_CORRUPTION; - } + memcpy(cdata.dptr, &crec->header, sizeof(crec->header)); + memcpy(cdata.dptr + sizeof(crec->header), data.dptr, data.dsize); - /* - * release the lock *now* in order to prevent deadlocks. - * - * There is a tradeoff: Usually, the record is still locked - * after db->store operation. This lock is usually released - * via the talloc destructor with the TALLOC_FREE to - * the record. So we have two choices: - * - * - Either re-lock the record after the call to persistent_store - * or cancel_persistent update and this way not changing any - * assumptions callers may have about the state, but possibly - * introducing new race conditions. - * - * - Or don't lock the record again but just remove the - * talloc_destructor. This is less racy but assumes that - * the lock is always released via TALLOC_FREE of the record. - * - * I choose the first variant for now since it seems less racy. - * We can't guarantee that we succeed in getting the lock - * anyways. The only real danger here is that a caller - * performs multiple store operations after a fetch_locked() - * which is currently not the case. - */ - tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, rec->key); - talloc_set_destructor(rec, NULL); + status = ctdbd_start_persistent_update( + messaging_ctdbd_connection(), + crec->ctdb_ctx->db_id, + rec->key, + cdata); - /* now tell ctdbd to update this record on all other nodes */ - if (NT_STATUS_IS_OK(status)) { - status = ctdbd_persistent_store(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata); - } else { - ctdbd_cancel_persistent_update(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata); + if (NT_STATUS_IS_OK(status)) { + ret = tdb_store(crec->ctdb_ctx->wtdb->tdb, rec->key, + cdata, TDB_REPLACE); + status = (ret == 0) ? NT_STATUS_OK + : NT_STATUS_INTERNAL_DB_CORRUPTION; + } + + /* + * release the lock *now* in order to prevent deadlocks. + * + * There is a tradeoff: Usually, the record is still locked + * after db->store operation. This lock is usually released + * via the talloc destructor with the TALLOC_FREE to + * the record. So we have two choices: + * + * - Either re-lock the record after the call to persistent_store + * or cancel_persistent update and this way not changing any + * assumptions callers may have about the state, but possibly + * introducing new race conditions. + * + * - Or don't lock the record again but just remove the + * talloc_destructor. This is less racy but assumes that + * the lock is always released via TALLOC_FREE of the record. + * + * I choose the first variant for now since it seems less racy. + * We can't guarantee that we succeed in getting the lock + * anyways. The only real danger here is that a caller + * performs multiple store operations after a fetch_locked() + * which is currently not the case. + */ + tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, rec->key); + talloc_set_destructor(record, NULL); + + /* now tell ctdbd to update this record on all other nodes */ + if (NT_STATUS_IS_OK(status)) { + status = ctdbd_persistent_store( + messaging_ctdbd_connection(), + crec->ctdb_ctx->db_id, + rec->key, + cdata); + } else { + ctdbd_cancel_persistent_update( + messaging_ctdbd_connection(), + crec->ctdb_ctx->db_id, + rec->key, + cdata); + } + + SAFE_FREE(cdata.dptr); + } /* retry-loop */ + + if (!NT_STATUS_IS_OK(status)) { + DEBUG(5, ("ctdbd_persistent_store still failed after " + "%d retries with error %s - giving up.\n", + count, nt_errstr(status))); } SAFE_FREE(cdata.dptr); @@ -164,12 +224,11 @@ static int db_ctdb_record_destr(struct db_record* data) return 0; } -static struct db_record *db_ctdb_fetch_locked(struct db_context *db, - TALLOC_CTX *mem_ctx, - TDB_DATA key) +static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx, + TALLOC_CTX *mem_ctx, + TDB_DATA key, + bool persistent) { - struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data, - struct db_ctdb_ctx); struct db_record *result; struct db_ctdb_rec *crec; NTSTATUS status; @@ -218,7 +277,7 @@ again: return NULL; } - if (db->persistent) { + if (persistent) { result->store = db_ctdb_store_persistent; } else { result->store = db_ctdb_store; @@ -285,6 +344,16 @@ again: return result; } +static struct db_record *db_ctdb_fetch_locked(struct db_context *db, + TALLOC_CTX *mem_ctx, + TDB_DATA key) +{ + struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data, + struct db_ctdb_ctx); + + return fetch_locked_internal(ctx, mem_ctx, key, db->persistent); +} + /* fetch (unlocked, no migration) operation on ctdb */ -- cgit