From 2592565bde6faa8ecc0524c4130c22e51c75f536 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <tridge@samba.org>
Date: Fri, 8 Aug 2008 09:58:15 +1000
Subject: handle two special cases

1) when all nodes write the same value to the record, or when writing
   a value that is already there, we can skip the write and save
   ourselves a network transactions

2) when all remote nodes fail an update, and we then fail a replay, we
   don't need to trigger a recovery. This solves a corner case where
   we could get into a recovery loop
(This used to be commit 2481bfce4307274806584b0d8e295cc7f638e184)
---
 source3/lib/dbwrap_ctdb.c | 58 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 16 deletions(-)

(limited to 'source3/lib/dbwrap_ctdb.c')

diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c
index 645b93846b..bd062b368a 100644
--- a/source3/lib/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap_ctdb.c
@@ -460,6 +460,14 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h,
 		header.dmaster = get_my_vnn();
 	} else {
 		memcpy(&header, rec.dptr, sizeof(struct ctdb_ltdb_header));
+		rec.dsize -= sizeof(struct ctdb_ltdb_header);
+		/* a special case, we are writing the same data that is there now */
+		if (data.dsize == rec.dsize &&
+		    memcmp(data.dptr, rec.dptr + sizeof(struct ctdb_ltdb_header), data.dsize) == 0) {
+			SAFE_FREE(rec.dptr);
+			talloc_free(tmp_ctx);
+			return 0;
+		}
 		SAFE_FREE(rec.dptr);
 	}
 
@@ -472,13 +480,13 @@ static int db_ctdb_transaction_store(struct db_ctdb_transaction_handle *h,
 			talloc_free(tmp_ctx);
 			return -1;
 		}
+	}
 		
-		h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data);
-		if (h->m_write == NULL) {
-			DEBUG(0,(__location__ " Failed to add to marshalling record\n"));
-			talloc_free(tmp_ctx);
-			return -1;
-		}
+	h->m_write = db_ctdb_marshall_add(h, h->m_write, h->ctx->db_id, 0, key, &header, data);
+	if (h->m_write == NULL) {
+		DEBUG(0,(__location__ " Failed to add to marshalling record\n"));
+		talloc_free(tmp_ctx);
+		return -1;
 	}
 	
 	rec.dsize = data.dsize + sizeof(struct ctdb_ltdb_header);
@@ -541,6 +549,8 @@ static int ctdb_replay_transaction(struct db_ctdb_transaction_handle *h)
 	struct ctdb_rec_data *rec = NULL;
 
 	h->in_replay = true;
+	talloc_free(h->m_write);
+	h->m_write = NULL;
 
 	ret = db_ctdb_transaction_fetch_start(h);
 	if (ret != 0) {
@@ -599,6 +609,7 @@ static int db_ctdb_transaction_commit(struct db_context *db)
 	int status;
 	int retries = 0;
 	struct db_ctdb_transaction_handle *h = ctx->transaction;
+	enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
 
 	if (h == NULL) {
 		DEBUG(0,(__location__ " transaction commit with no open transaction on db 0x%08x\n", ctx->db_id));
@@ -607,13 +618,6 @@ static int db_ctdb_transaction_commit(struct db_context *db)
 
 	DEBUG(5,(__location__ " Commit transaction on db 0x%08x\n", ctx->db_id));
 
-	if (h->m_write == NULL) {
-		/* no changes were made */
-		talloc_free(h);
-		ctx->transaction = NULL;
-		return 0;
-	}
-
 	talloc_set_destructor(h, NULL);
 
 	/* our commit strategy is quite complex.
@@ -632,6 +636,14 @@ static int db_ctdb_transaction_commit(struct db_context *db)
 	*/
 
 again:
+	if (h->m_write == NULL) {
+		/* no changes were made, potentially after a retry */
+		tdb_transaction_cancel(h->ctx->wtdb->tdb);
+		talloc_free(h);
+		ctx->transaction = NULL;
+		return 0;
+	}
+
 	/* tell ctdbd to commit to the other nodes */
 	rets = ctdbd_control_local(messaging_ctdbd_connection(), 
 				  CTDB_CONTROL_TRANS2_COMMIT, h->ctx->db_id, 0,
@@ -639,9 +651,23 @@ again:
 	if (!NT_STATUS_IS_OK(rets) || status != 0) {
 		tdb_transaction_cancel(h->ctx->wtdb->tdb);
 		sleep(1);
+
+		/* work out what error code we will give if we 
+		   have to fail the operation */
+		switch ((enum ctdb_trans2_commit_error)status) {
+		case CTDB_TRANS2_COMMIT_SUCCESS:
+		case CTDB_TRANS2_COMMIT_SOMEFAIL:
+		case CTDB_TRANS2_COMMIT_TIMEOUT:
+			failure_control = CTDB_CONTROL_TRANS2_ERROR;
+			break;
+		case CTDB_TRANS2_COMMIT_ALLFAIL:
+			failure_control = CTDB_CONTROL_TRANS2_FINISHED;
+			break;
+		}
+
 		if (ctdb_replay_transaction(h) != 0) {
 			DEBUG(0,(__location__ " Failed to replay transaction\n"));
-			ctdbd_control_local(messaging_ctdbd_connection(), CTDB_CONTROL_TRANS2_ERROR,
+			ctdbd_control_local(messaging_ctdbd_connection(), failure_control,
 					    h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, 
 					    tdb_null, NULL, NULL, NULL);
 			h->ctx->transaction = NULL;
@@ -649,10 +675,10 @@ again:
 			ctx->transaction = NULL;
 			return -1;
 		}
-		if (retries++ == 10) {
+		if (++retries == 10) {
 			DEBUG(0,(__location__ " Giving up transaction on db 0x%08x after %d retries\n", 
 				 h->ctx->db_id, retries));
-			ctdbd_control_local(messaging_ctdbd_connection(), CTDB_CONTROL_TRANS2_ERROR,
+			ctdbd_control_local(messaging_ctdbd_connection(), failure_control,
 					    h->ctx->db_id, CTDB_CTRL_FLAG_NOREPLY, 
 					    tdb_null, NULL, NULL, NULL);
 			h->ctx->transaction = NULL;
-- 
cgit