summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs-xml/smbdotconf/misc/ctdbtimeout.xml37
-rw-r--r--source3/include/packet.h3
-rw-r--r--source3/include/proto.h1
-rw-r--r--source3/lib/ctdbd_conn.c15
-rw-r--r--source3/lib/packet.c10
-rw-r--r--source3/param/loadparm.c12
6 files changed, 73 insertions, 5 deletions
diff --git a/docs-xml/smbdotconf/misc/ctdbtimeout.xml b/docs-xml/smbdotconf/misc/ctdbtimeout.xml
new file mode 100644
index 0000000000..97d5039000
--- /dev/null
+++ b/docs-xml/smbdotconf/misc/ctdbtimeout.xml
@@ -0,0 +1,37 @@
+<samba:parameter name="ctdb timeout"
+ context="G"
+ type="integer"
+ advanced="1"
+ xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+ <para>This parameter specifies a timeout in seconds for the
+ connection between Samba and ctdb. It is only valid if you
+ have compiled Samba with clustering and if you have
+ set <parameter>clustering=yes</parameter>.
+ </para>
+ <para>When something in the cluster blocks, it can happen that
+ we wait indefinitely long for ctdb, just adding to the
+ blocking condition. In a well-running cluster this should
+ never happen, but there are too many components in a cluster
+ that might have hickups. Choosing the right balance for this
+ value is very tricky, because on a busy cluster long service
+ times to transfer something across the cluster might be
+ valid. Setting it too short will degrade the service your
+ cluster presents, setting it too long might make the cluster
+ itself not recover from something severely broken for too
+ long.
+ </para>
+ <para>
+ Be aware that if you set this parameter, this needs to be in
+ the file smb.conf, it is not really helpful to put this into
+ a registry configuration (typical on a cluster), because to
+ access the registry contact to ctdb is requred.
+ </para>
+ <para>Setting <parameter>ctdb timeout</parameter> to n makes
+ any process waiting longer than n seconds for a reply by the
+ cluster panic. Setting it to 0 (the default) makes Samba
+ block forever, which is the highly recommended default.
+ </para>
+</description>
+<value type="default">0</value>
+</samba:parameter>
diff --git a/source3/include/packet.h b/source3/include/packet.h
index 03331da750..45a9bc2ef1 100644
--- a/source3/include/packet.h
+++ b/source3/include/packet.h
@@ -38,7 +38,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx);
/*
* Sync read, wait for the next chunk
*/
-NTSTATUS packet_fd_read_sync(struct packet_context *ctx);
+NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
+ struct timeval *timeout);
/*
* Handle an incoming packet:
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 6a90c87f84..d08ed79050 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -4130,6 +4130,7 @@ int lp_cups_connection_timeout(void);
const char *lp_ctdbd_socket(void);
const char **lp_cluster_addresses(void);
bool lp_clustering(void);
+int lp_ctdb_timeout(void);
char *lp_printcommand(int );
char *lp_lpqcommand(int );
char *lp_lprmcommand(int );
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index ffd79c9fe1..84bba3bea3 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -275,6 +275,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx,
return result;
}
+static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx)
+{
+ struct timeval timeout;
+ struct timeval *ptimeout;
+
+ timeout = timeval_set(lp_ctdb_timeout(), 0);
+ ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL;
+
+ return packet_fd_read_sync(ctx, ptimeout);
+}
+
/*
* Read a full ctdbd request. If we have a messaging context, defer incoming
* messages that might come in between.
@@ -289,7 +300,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
again:
- status = packet_fd_read_sync(conn->pkt);
+ status = ctdb_packet_fd_read_sync(conn->pkt);
if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) {
/* EAGAIN */
@@ -1156,7 +1167,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
break;
}
- status = packet_fd_read_sync(conn->pkt);
+ status = ctdb_packet_fd_read_sync(conn->pkt);
if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
/*
diff --git a/source3/lib/packet.c b/source3/lib/packet.c
index ef28bf9f62..c131b973bc 100644
--- a/source3/lib/packet.c
+++ b/source3/lib/packet.c
@@ -101,7 +101,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx)
return NT_STATUS_OK;
}
-NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
+NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
+ struct timeval *timeout)
{
int res;
fd_set r_fds;
@@ -109,7 +110,12 @@ NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
FD_ZERO(&r_fds);
FD_SET(ctx->fd, &r_fds);
- res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, NULL);
+ res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, timeout);
+
+ if (res == 0) {
+ DEBUG(10, ("select timed out\n"));
+ return NT_STATUS_IO_TIMEOUT;
+ }
if (res == -1) {
DEBUG(10, ("select returned %s\n", strerror(errno)));
diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c
index 06fb962576..c62deb5eda 100644
--- a/source3/param/loadparm.c
+++ b/source3/param/loadparm.c
@@ -271,6 +271,7 @@ struct global {
char *ctdbdSocket;
char **szClusterAddresses;
bool clustering;
+ int ctdb_timeout;
int ldap_passwd_sync;
int ldap_replication_sleep;
int ldap_timeout; /* This is initialised in init_globals */
@@ -2541,6 +2542,15 @@ static struct parm_struct parm_table[] = {
.enum_list = NULL,
.flags = FLAG_ADVANCED | FLAG_GLOBAL,
},
+ {
+ .label = "ctdb timeout",
+ .type = P_INTEGER,
+ .p_class = P_GLOBAL,
+ .ptr = &Globals.ctdb_timeout,
+ .special = NULL,
+ .enum_list = NULL,
+ .flags = FLAG_ADVANCED | FLAG_GLOBAL,
+ },
{N_("Printing Options"), P_SEP, P_SEPARATOR},
@@ -5107,6 +5117,7 @@ static void init_globals(bool first_time_only)
string_set(&Globals.ctdbdSocket, "");
Globals.szClusterAddresses = NULL;
Globals.clustering = False;
+ Globals.ctdb_timeout = 0;
Globals.winbind_cache_time = 300; /* 5 minutes */
Globals.winbind_reconnect_delay = 30; /* 30 seconds */
@@ -5557,6 +5568,7 @@ FN_GLOBAL_INTEGER(lp_cups_connection_timeout, &Globals.cups_connection_timeout)
FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket)
FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses)
FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering)
+FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout)
FN_LOCAL_STRING(lp_printcommand, szPrintcommand)
FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand)
FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand)