summaryrefslogtreecommitdiff
path: root/source3
diff options
context:
space:
mode:
authorVolker Lendecke <vl@samba.org>2009-11-03 05:41:02 +0100
committerVolker Lendecke <vl@samba.org>2009-11-05 12:05:36 +0100
commitd415d4d32f2e8e61de21abfdfce02e1b1ea1e1d3 (patch)
tree03469f109c8e19e15318e5fa4da6d7526b6a575f /source3
parent49397a8b3e30b23a4723125986f306fff502a144 (diff)
downloadsamba-d415d4d32f2e8e61de21abfdfce02e1b1ea1e1d3.tar.gz
samba-d415d4d32f2e8e61de21abfdfce02e1b1ea1e1d3.tar.bz2
samba-d415d4d32f2e8e61de21abfdfce02e1b1ea1e1d3.zip
s3: Add parameter "ctdb timeout"
When something in the cluster blocks, it can happen that we wait indefinitely long for ctdb, just adding to the blocking condition. In theory, nothing should block, but as someone said "In practice the difference between theory and practice is larger than in theory". This adds a timeout parameter in seconds, after which we stop waiting for ctdb and panic.
Diffstat (limited to 'source3')
-rw-r--r--source3/include/packet.h3
-rw-r--r--source3/include/proto.h1
-rw-r--r--source3/lib/ctdbd_conn.c15
-rw-r--r--source3/lib/packet.c10
-rw-r--r--source3/param/loadparm.c12
5 files changed, 36 insertions, 5 deletions
diff --git a/source3/include/packet.h b/source3/include/packet.h
index 03331da750..45a9bc2ef1 100644
--- a/source3/include/packet.h
+++ b/source3/include/packet.h
@@ -38,7 +38,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx);
/*
* Sync read, wait for the next chunk
*/
-NTSTATUS packet_fd_read_sync(struct packet_context *ctx);
+NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
+ struct timeval *timeout);
/*
* Handle an incoming packet:
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 6a90c87f84..d08ed79050 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -4130,6 +4130,7 @@ int lp_cups_connection_timeout(void);
const char *lp_ctdbd_socket(void);
const char **lp_cluster_addresses(void);
bool lp_clustering(void);
+int lp_ctdb_timeout(void);
char *lp_printcommand(int );
char *lp_lpqcommand(int );
char *lp_lprmcommand(int );
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index ffd79c9fe1..84bba3bea3 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -275,6 +275,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx,
return result;
}
+static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx)
+{
+ struct timeval timeout;
+ struct timeval *ptimeout;
+
+ timeout = timeval_set(lp_ctdb_timeout(), 0);
+ ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL;
+
+ return packet_fd_read_sync(ctx, ptimeout);
+}
+
/*
* Read a full ctdbd request. If we have a messaging context, defer incoming
* messages that might come in between.
@@ -289,7 +300,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
again:
- status = packet_fd_read_sync(conn->pkt);
+ status = ctdb_packet_fd_read_sync(conn->pkt);
if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) {
/* EAGAIN */
@@ -1156,7 +1167,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
break;
}
- status = packet_fd_read_sync(conn->pkt);
+ status = ctdb_packet_fd_read_sync(conn->pkt);
if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
/*
diff --git a/source3/lib/packet.c b/source3/lib/packet.c
index ef28bf9f62..c131b973bc 100644
--- a/source3/lib/packet.c
+++ b/source3/lib/packet.c
@@ -101,7 +101,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx)
return NT_STATUS_OK;
}
-NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
+NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
+ struct timeval *timeout)
{
int res;
fd_set r_fds;
@@ -109,7 +110,12 @@ NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
FD_ZERO(&r_fds);
FD_SET(ctx->fd, &r_fds);
- res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, NULL);
+ res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, timeout);
+
+ if (res == 0) {
+ DEBUG(10, ("select timed out\n"));
+ return NT_STATUS_IO_TIMEOUT;
+ }
if (res == -1) {
DEBUG(10, ("select returned %s\n", strerror(errno)));
diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c
index 06fb962576..c62deb5eda 100644
--- a/source3/param/loadparm.c
+++ b/source3/param/loadparm.c
@@ -271,6 +271,7 @@ struct global {
char *ctdbdSocket;
char **szClusterAddresses;
bool clustering;
+ int ctdb_timeout;
int ldap_passwd_sync;
int ldap_replication_sleep;
int ldap_timeout; /* This is initialised in init_globals */
@@ -2541,6 +2542,15 @@ static struct parm_struct parm_table[] = {
.enum_list = NULL,
.flags = FLAG_ADVANCED | FLAG_GLOBAL,
},
+ {
+ .label = "ctdb timeout",
+ .type = P_INTEGER,
+ .p_class = P_GLOBAL,
+ .ptr = &Globals.ctdb_timeout,
+ .special = NULL,
+ .enum_list = NULL,
+ .flags = FLAG_ADVANCED | FLAG_GLOBAL,
+ },
{N_("Printing Options"), P_SEP, P_SEPARATOR},
@@ -5107,6 +5117,7 @@ static void init_globals(bool first_time_only)
string_set(&Globals.ctdbdSocket, "");
Globals.szClusterAddresses = NULL;
Globals.clustering = False;
+ Globals.ctdb_timeout = 0;
Globals.winbind_cache_time = 300; /* 5 minutes */
Globals.winbind_reconnect_delay = 30; /* 30 seconds */
@@ -5557,6 +5568,7 @@ FN_GLOBAL_INTEGER(lp_cups_connection_timeout, &Globals.cups_connection_timeout)
FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket)
FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses)
FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering)
+FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout)
FN_LOCAL_STRING(lp_printcommand, szPrintcommand)
FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand)
FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand)