summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Adam <obnox@samba.org>2008-08-05 11:32:20 +0200
committerMichael Adam <obnox@samba.org>2008-08-13 11:54:06 +0200
commit5dcf20961ee52eca4eb0bb4ab0e20ebd547c8058 (patch)
tree5be26c5f95266da05aebee444aa817546d9412f8
parented6692964768fd14d1b6c3378e40e23c68e9dd63 (diff)
downloadsamba-5dcf20961ee52eca4eb0bb4ab0e20ebd547c8058.tar.gz
samba-5dcf20961ee52eca4eb0bb4ab0e20ebd547c8058.tar.bz2
samba-5dcf20961ee52eca4eb0bb4ab0e20ebd547c8058.zip
dbwrap ctdb: add a retry loop to the persistent store operation.
This is because ctdbd can fail in performing the persistent_store due to race conditions, and this does not mean it can't succeed the next time. To not loop infinitely, this makes use of a new parametric option: "dbwrap ctdb:max store retries" (integer) which defaults to 5 and sets the upper limit for the number or repeats of the fetch/store cycle. Michael (This used to be commit 2bcc9e6ecef876030e552a607d92597f60203db2)
-rw-r--r--source3/lib/dbwrap_ctdb.c171
1 files changed, 120 insertions, 51 deletions
diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c
index bb56b66bdc..bdcec3985d 100644
--- a/source3/lib/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap_ctdb.c
@@ -33,6 +33,11 @@ struct db_ctdb_rec {
struct ctdb_ltdb_header header;
};
+static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA key,
+ bool persistent);
+
static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag)
{
struct db_ctdb_rec *crec = talloc_get_type_abort(
@@ -62,61 +67,116 @@ static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag)
store */
static NTSTATUS db_ctdb_store_persistent(struct db_record *rec, TDB_DATA data, int flag)
{
- struct db_ctdb_rec *crec = talloc_get_type_abort(
- rec->private_data, struct db_ctdb_rec);
+ struct db_ctdb_rec *crec;
+ struct db_record *record;
TDB_DATA cdata;
int ret;
NTSTATUS status;
+ uint32_t count;
+ int max_retries = lp_parm_int(-1, "dbwrap ctdb", "max store retries", 5);
+
+ for (count = 0, status = NT_STATUS_UNSUCCESSFUL, record = rec;
+ (count < max_retries) && !NT_STATUS_IS_OK(status);
+ count++)
+ {
+ if (count > 0) {
+ /* retry */
+ /*
+ * There is a hack here: We use rec as a memory
+ * context and re-use it as the record struct ptr.
+ * We don't free the record data allocated
+ * in each turn. So all gets freed when the caller
+ * releases the original record. This is because
+ * we don't get the record passed in by reference
+ * in the first place and the caller relies on
+ * having to free the record himself.
+ */
+ record = fetch_locked_internal(crec->ctdb_ctx,
+ rec,
+ rec->key,
+ true /* persistent */);
+ if (record == NULL) {
+ DEBUG(5, ("fetch_locked_internal failed.\n"));
+ status = NT_STATUS_NO_MEMORY;
+ break;
+ }
+ }
- cdata.dsize = sizeof(crec->header) + data.dsize;
+ crec = talloc_get_type_abort(record->private_data,
+ struct db_ctdb_rec);
- if (!(cdata.dptr = SMB_MALLOC_ARRAY(uint8, cdata.dsize))) {
- return NT_STATUS_NO_MEMORY;
- }
+ cdata.dsize = sizeof(crec->header) + data.dsize;
- crec->header.rsn++;
+ if (!(cdata.dptr = SMB_MALLOC_ARRAY(uint8, cdata.dsize))) {
+ return NT_STATUS_NO_MEMORY;
+ }
- memcpy(cdata.dptr, &crec->header, sizeof(crec->header));
- memcpy(cdata.dptr + sizeof(crec->header), data.dptr, data.dsize);
+ crec->header.rsn++;
- status = ctdbd_start_persistent_update(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata);
-
- if (NT_STATUS_IS_OK(status)) {
- ret = tdb_store(crec->ctdb_ctx->wtdb->tdb, rec->key, cdata, TDB_REPLACE);
- status = (ret == 0) ? NT_STATUS_OK : NT_STATUS_INTERNAL_DB_CORRUPTION;
- }
+ memcpy(cdata.dptr, &crec->header, sizeof(crec->header));
+ memcpy(cdata.dptr + sizeof(crec->header), data.dptr, data.dsize);
- /*
- * release the lock *now* in order to prevent deadlocks.
- *
- * There is a tradeoff: Usually, the record is still locked
- * after db->store operation. This lock is usually released
- * via the talloc destructor with the TALLOC_FREE to
- * the record. So we have two choices:
- *
- * - Either re-lock the record after the call to persistent_store
- * or cancel_persistent update and this way not changing any
- * assumptions callers may have about the state, but possibly
- * introducing new race conditions.
- *
- * - Or don't lock the record again but just remove the
- * talloc_destructor. This is less racy but assumes that
- * the lock is always released via TALLOC_FREE of the record.
- *
- * I choose the first variant for now since it seems less racy.
- * We can't guarantee that we succeed in getting the lock
- * anyways. The only real danger here is that a caller
- * performs multiple store operations after a fetch_locked()
- * which is currently not the case.
- */
- tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, rec->key);
- talloc_set_destructor(rec, NULL);
+ status = ctdbd_start_persistent_update(
+ messaging_ctdbd_connection(),
+ crec->ctdb_ctx->db_id,
+ rec->key,
+ cdata);
- /* now tell ctdbd to update this record on all other nodes */
- if (NT_STATUS_IS_OK(status)) {
- status = ctdbd_persistent_store(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata);
- } else {
- ctdbd_cancel_persistent_update(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata);
+ if (NT_STATUS_IS_OK(status)) {
+ ret = tdb_store(crec->ctdb_ctx->wtdb->tdb, rec->key,
+ cdata, TDB_REPLACE);
+ status = (ret == 0) ? NT_STATUS_OK
+ : NT_STATUS_INTERNAL_DB_CORRUPTION;
+ }
+
+ /*
+ * release the lock *now* in order to prevent deadlocks.
+ *
+ * There is a tradeoff: Usually, the record is still locked
+ * after db->store operation. This lock is usually released
+ * via the talloc destructor with the TALLOC_FREE to
+ * the record. So we have two choices:
+ *
+ * - Either re-lock the record after the call to persistent_store
+ * or cancel_persistent update and this way not changing any
+ * assumptions callers may have about the state, but possibly
+ * introducing new race conditions.
+ *
+ * - Or don't lock the record again but just remove the
+ * talloc_destructor. This is less racy but assumes that
+ * the lock is always released via TALLOC_FREE of the record.
+ *
+ * I choose the first variant for now since it seems less racy.
+ * We can't guarantee that we succeed in getting the lock
+ * anyways. The only real danger here is that a caller
+ * performs multiple store operations after a fetch_locked()
+ * which is currently not the case.
+ */
+ tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, rec->key);
+ talloc_set_destructor(record, NULL);
+
+ /* now tell ctdbd to update this record on all other nodes */
+ if (NT_STATUS_IS_OK(status)) {
+ status = ctdbd_persistent_store(
+ messaging_ctdbd_connection(),
+ crec->ctdb_ctx->db_id,
+ rec->key,
+ cdata);
+ } else {
+ ctdbd_cancel_persistent_update(
+ messaging_ctdbd_connection(),
+ crec->ctdb_ctx->db_id,
+ rec->key,
+ cdata);
+ }
+
+ SAFE_FREE(cdata.dptr);
+ } /* retry-loop */
+
+ if (!NT_STATUS_IS_OK(status)) {
+ DEBUG(5, ("ctdbd_persistent_store still failed after "
+ "%d retries with error %s - giving up.\n",
+ count, nt_errstr(status)));
}
SAFE_FREE(cdata.dptr);
@@ -164,12 +224,11 @@ static int db_ctdb_record_destr(struct db_record* data)
return 0;
}
-static struct db_record *db_ctdb_fetch_locked(struct db_context *db,
- TALLOC_CTX *mem_ctx,
- TDB_DATA key)
+static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA key,
+ bool persistent)
{
- struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
- struct db_ctdb_ctx);
struct db_record *result;
struct db_ctdb_rec *crec;
NTSTATUS status;
@@ -218,7 +277,7 @@ again:
return NULL;
}
- if (db->persistent) {
+ if (persistent) {
result->store = db_ctdb_store_persistent;
} else {
result->store = db_ctdb_store;
@@ -285,6 +344,16 @@ again:
return result;
}
+static struct db_record *db_ctdb_fetch_locked(struct db_context *db,
+ TALLOC_CTX *mem_ctx,
+ TDB_DATA key)
+{
+ struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
+ struct db_ctdb_ctx);
+
+ return fetch_locked_internal(ctx, mem_ctx, key, db->persistent);
+}
+
/*
fetch (unlocked, no migration) operation on ctdb
*/