summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml16
-rw-r--r--source3/include/proto.h1
-rw-r--r--source3/lib/dbwrap_ctdb.c12
-rw-r--r--source3/param/loadparm.c12
4 files changed, 41 insertions, 0 deletions
diff --git a/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml b/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml
new file mode 100644
index 0000000000..149d8d67e3
--- /dev/null
+++ b/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml
@@ -0,0 +1,16 @@
+<samba:parameter name="ctdb locktime warn threshold"
+ context="G"
+ type="integer"
+ advanced="1"
+ xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+ <para>In a cluster, ctdb is very unhappy if tdb database locks
+ are held for extended periods of time. This parameter adds a
+ warning threshold in milliseconds. If Samba holds a lock for
+ longer that ctdb locktime warn threshold milliseconds, a debug
+ level 0 message is printed when the lock is released. This is
+ mainly a debugging aid for post-mortem analysis.</para>
+ <para>If this parameter is set to 0, no message is printed.</para>
+</description>
+<value type="default">0</value>
+</samba:parameter>
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 06d324eea4..346da39749 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -4145,6 +4145,7 @@ const char *lp_ctdbd_socket(void);
const char **lp_cluster_addresses(void);
bool lp_clustering(void);
int lp_ctdb_timeout(void);
+int lp_ctdb_locktime_warn_threshold(void);
char *lp_printcommand(int );
char *lp_lpqcommand(int );
char *lp_lprmcommand(int );
diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c
index 938a312657..bef91cb1ec 100644
--- a/source3/lib/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap_ctdb.c
@@ -50,6 +50,7 @@ struct db_ctdb_ctx {
struct db_ctdb_rec {
struct db_ctdb_ctx *ctdb_ctx;
struct ctdb_ltdb_header header;
+ struct timeval lock_time;
};
static NTSTATUS tdb_error_to_ntstatus(struct tdb_context *tdb)
@@ -898,6 +899,7 @@ static int db_ctdb_record_destr(struct db_record* data)
{
struct db_ctdb_rec *crec = talloc_get_type_abort(
data->private_data, struct db_ctdb_rec);
+ int threshold;
DEBUG(10, (DEBUGLEVEL > 10
? "Unlocking db %u key %s\n"
@@ -911,6 +913,14 @@ static int db_ctdb_record_destr(struct db_record* data)
return -1;
}
+ threshold = lp_ctdb_locktime_warn_threshold();
+ if (threshold != 0) {
+ double timediff = timeval_elapsed(&crec->lock_time);
+ if ((timediff * 1000) > threshold) {
+ DEBUG(0, ("Held tdb lock %f seconds\n", timediff));
+ }
+ }
+
return 0;
}
@@ -1011,6 +1021,8 @@ again:
migrate_attempts));
}
+ GetTimeOfDay(&crec->lock_time);
+
memcpy(&crec->header, ctdb_data.dptr, sizeof(crec->header));
result->value.dsize = ctdb_data.dsize - sizeof(crec->header);
diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c
index 76b0d35066..a6e096e09f 100644
--- a/source3/param/loadparm.c
+++ b/source3/param/loadparm.c
@@ -274,6 +274,7 @@ struct global {
char **szClusterAddresses;
bool clustering;
int ctdb_timeout;
+ int ctdb_locktime_warn_threshold;
int ldap_passwd_sync;
int ldap_replication_sleep;
int ldap_timeout; /* This is initialised in init_globals */
@@ -2577,6 +2578,15 @@ static struct parm_struct parm_table[] = {
.enum_list = NULL,
.flags = FLAG_ADVANCED | FLAG_GLOBAL,
},
+ {
+ .label = "ctdb locktime warn threshold",
+ .type = P_INTEGER,
+ .p_class = P_GLOBAL,
+ .ptr = &Globals.ctdb_locktime_warn_threshold,
+ .special = NULL,
+ .enum_list = NULL,
+ .flags = FLAG_ADVANCED | FLAG_GLOBAL,
+ },
{N_("Printing Options"), P_SEP, P_SEPARATOR},
@@ -5185,6 +5195,7 @@ static void init_globals(bool first_time_only)
Globals.szClusterAddresses = NULL;
Globals.clustering = False;
Globals.ctdb_timeout = 0;
+ Globals.ctdb_locktime_warn_threshold = 0;
Globals.winbind_cache_time = 300; /* 5 minutes */
Globals.winbind_reconnect_delay = 30; /* 30 seconds */
@@ -5640,6 +5651,7 @@ FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket)
FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses)
FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering)
FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout)
+FN_GLOBAL_INTEGER(lp_ctdb_locktime_warn_threshold, &Globals.ctdb_locktime_warn_threshold)
FN_LOCAL_STRING(lp_printcommand, szPrintcommand)
FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand)
FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand)