From 70f9338bf2e6081916ffe5bb7cddf50b4e958b24 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 5 Mar 2010 16:46:36 +0100 Subject: s3: Add the "ctdb locktime warn threshold" parameter This is mainly a debugging aid for post-mortem analysis in case a cluster file system is slow. --- docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml | 16 ++++++++++++++++ source3/include/proto.h | 1 + source3/lib/dbwrap_ctdb.c | 12 ++++++++++++ source3/param/loadparm.c | 12 ++++++++++++ 4 files changed, 41 insertions(+) create mode 100644 docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml diff --git a/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml b/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml new file mode 100644 index 0000000000..149d8d67e3 --- /dev/null +++ b/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml @@ -0,0 +1,16 @@ + + + In a cluster, ctdb is very unhappy if tdb database locks + are held for extended periods of time. This parameter adds a + warning threshold in milliseconds. If Samba holds a lock for + longer that ctdb locktime warn threshold milliseconds, a debug + level 0 message is printed when the lock is released. This is + mainly a debugging aid for post-mortem analysis. + If this parameter is set to 0, no message is printed. + +0 + diff --git a/source3/include/proto.h b/source3/include/proto.h index 06d324eea4..346da39749 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -4145,6 +4145,7 @@ const char *lp_ctdbd_socket(void); const char **lp_cluster_addresses(void); bool lp_clustering(void); int lp_ctdb_timeout(void); +int lp_ctdb_locktime_warn_threshold(void); char *lp_printcommand(int ); char *lp_lpqcommand(int ); char *lp_lprmcommand(int ); diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c index 938a312657..bef91cb1ec 100644 --- a/source3/lib/dbwrap_ctdb.c +++ b/source3/lib/dbwrap_ctdb.c @@ -50,6 +50,7 @@ struct db_ctdb_ctx { struct db_ctdb_rec { struct db_ctdb_ctx *ctdb_ctx; struct ctdb_ltdb_header header; + struct timeval lock_time; }; static NTSTATUS tdb_error_to_ntstatus(struct tdb_context *tdb) @@ -898,6 +899,7 @@ static int db_ctdb_record_destr(struct db_record* data) { struct db_ctdb_rec *crec = talloc_get_type_abort( data->private_data, struct db_ctdb_rec); + int threshold; DEBUG(10, (DEBUGLEVEL > 10 ? "Unlocking db %u key %s\n" @@ -911,6 +913,14 @@ static int db_ctdb_record_destr(struct db_record* data) return -1; } + threshold = lp_ctdb_locktime_warn_threshold(); + if (threshold != 0) { + double timediff = timeval_elapsed(&crec->lock_time); + if ((timediff * 1000) > threshold) { + DEBUG(0, ("Held tdb lock %f seconds\n", timediff)); + } + } + return 0; } @@ -1011,6 +1021,8 @@ again: migrate_attempts)); } + GetTimeOfDay(&crec->lock_time); + memcpy(&crec->header, ctdb_data.dptr, sizeof(crec->header)); result->value.dsize = ctdb_data.dsize - sizeof(crec->header); diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c index 76b0d35066..a6e096e09f 100644 --- a/source3/param/loadparm.c +++ b/source3/param/loadparm.c @@ -274,6 +274,7 @@ struct global { char **szClusterAddresses; bool clustering; int ctdb_timeout; + int ctdb_locktime_warn_threshold; int ldap_passwd_sync; int ldap_replication_sleep; int ldap_timeout; /* This is initialised in init_globals */ @@ -2577,6 +2578,15 @@ static struct parm_struct parm_table[] = { .enum_list = NULL, .flags = FLAG_ADVANCED | FLAG_GLOBAL, }, + { + .label = "ctdb locktime warn threshold", + .type = P_INTEGER, + .p_class = P_GLOBAL, + .ptr = &Globals.ctdb_locktime_warn_threshold, + .special = NULL, + .enum_list = NULL, + .flags = FLAG_ADVANCED | FLAG_GLOBAL, + }, {N_("Printing Options"), P_SEP, P_SEPARATOR}, @@ -5185,6 +5195,7 @@ static void init_globals(bool first_time_only) Globals.szClusterAddresses = NULL; Globals.clustering = False; Globals.ctdb_timeout = 0; + Globals.ctdb_locktime_warn_threshold = 0; Globals.winbind_cache_time = 300; /* 5 minutes */ Globals.winbind_reconnect_delay = 30; /* 30 seconds */ @@ -5640,6 +5651,7 @@ FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket) FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses) FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering) FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout) +FN_GLOBAL_INTEGER(lp_ctdb_locktime_warn_threshold, &Globals.ctdb_locktime_warn_threshold) FN_LOCAL_STRING(lp_printcommand, szPrintcommand) FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand) FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand) -- cgit