From 2592565bde6faa8ecc0524c4130c22e51c75f536 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 09:58:15 +1000 Subject: handle two special cases 1) when all nodes write the same value to the record, or when writing a value that is already there, we can skip the write and save ourselves a network transactions 2) when all remote nodes fail an update, and we then fail a replay, we don't need to trigger a recovery. This solves a corner case where we could get into a recovery loop (This used to be commit 2481bfce4307274806584b0d8e295cc7f638e184) --- source3/lib/dbwrap_ctdb.c | 58 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'source3/lib/dbwrap_ctdb.c') diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c index 645b93846b..bd062b368a 100644 --- a/source3/lib/dbwrap_ctdb.c +++ b/source3/lib/dbwrap_ctdb.c @@ -460,6 +460,14 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, header.dmaster = get_my_vnn(); } else { memcpy(&header, rec.dptr, sizeof(struct ctdb_ltdb_header)); + rec.dsize -= sizeof(struct ctdb_ltdb_header); + /* a special case, we are writing the same data that is there now */ + if (data.dsize == rec.dsize && + memcmp(data.dptr, rec.dptr + sizeof(struct ctdb_ltdb_header), data.dsize) == 0) { + SAFE_FREE(rec.dptr); + talloc_free(tmp_ctx); + return 0; + } SAFE_FREE(rec.dptr); } @@ -472,13 +480,13 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h, talloc_free(tmp_ctx); return -1; } + } - h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data); - if (h->m_write == NULL) { - DEBUG(0,(__location__ " Failed to add to marshalling record\n")); - talloc_free(tmp_ctx); - return -1; - } + h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data); + if (h->m_write == NULL) { + DEBUG(0,(__location__ " Failed to add to marshalling record\n")); + talloc_free(tmp_ctx); + return -1; } rec.dsize = data.dsize + sizeof(struct ctdb_ltdb_header); @@ -541,6 +549,8 @@ static int ctdb_replay_transaction(struct db_ctdb_transaction_handle *h) struct ctdb_rec_data *rec = NULL; h->in_replay = true; + talloc_free(h->m_write); + h->m_write = NULL; ret = db_ctdb_transaction_fetch_start(h); if (ret != 0) { @@ -599,6 +609,7 @@ static int db_ctdb_transaction_commit(struct db_context *db) int status; int retries = 0; struct db_ctdb_transaction_handle *h = ctx->transaction; + enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR; if (h == NULL) { DEBUG(0,(__location__ " transaction commit with no open transaction on db 0x%08x\n", ctx->db_id)); @@ -607,13 +618,6 @@ static int db_ctdb_transaction_commit(struct db_context *db) DEBUG(5,(__location__ " Commit transaction on db 0x%08x\n", ctx->db_id)); - if (h->m_write == NULL) { - /* no changes were made */ - talloc_free(h); - ctx->transaction = NULL; - return 0; - } - talloc_set_destructor(h, NULL); /* our commit strategy is quite complex. @@ -632,6 +636,14 @@ static int db_ctdb_transaction_commit(struct db_context *db) */ again: + if (h->m_write == NULL) { + /* no changes were made, potentially after a retry */ + tdb_transaction_cancel(h->ctx->wtdb->tdb); + talloc_free(h); + ctx->transaction = NULL; + return 0; + } + /* tell ctdbd to commit to the other nodes */ rets = ctdbd_control_local(messaging_ctdbd_connection(), CTDB_CONTROL_TRANS2_COMMIT, h->ctx->db_id, 0, @@ -639,9 +651,23 @@ again: if (!NT_STATUS_IS_OK(rets) || status != 0) { tdb_transaction_cancel(h->ctx->wtdb->tdb); sleep(1); + + /* work out what error code we will give if we + have to fail the operation */ + switch ((enum ctdb_trans2_commit_error)status) { + case CTDB_TRANS2_COMMIT_SUCCESS: + case CTDB_TRANS2_COMMIT_SOMEFAIL: + case CTDB_TRANS2_COMMIT_TIMEOUT: + failure_control = CTDB_CONTROL_TRANS2_ERROR; + break; + case CTDB_TRANS2_COMMIT_ALLFAIL: + failure_control = CTDB_CONTROL_TRANS2_FINISHED; + break; + } + if (ctdb_replay_transaction(h) != 0) { DEBUG(0,(__location__ " Failed to replay transaction\n")); - ctdbd_control_local(messaging_ctdbd_connection(), CTDB_CONTROL_TRANS2_ERROR, + ctdbd_control_local(messaging_ctdbd_connection(), failure_control, h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, tdb_null, NULL, NULL, NULL); h->ctx->transaction = NULL; @@ -649,10 +675,10 @@ again: ctx->transaction = NULL; return -1; } - if (retries++ == 10) { + if (++retries == 10) { DEBUG(0,(__location__ " Giving up transaction on db 0x%08x after %d retries\n", h->ctx->db_id, retries)); - ctdbd_control_local(messaging_ctdbd_connection(), CTDB_CONTROL_TRANS2_ERROR, + ctdbd_control_local(messaging_ctdbd_connection(), failure_control, h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, tdb_null, NULL, NULL, NULL); h->ctx->transaction = NULL; -- cgit