diff options
-rw-r--r-- | docs-xml/smbdotconf/misc/ctdbtimeout.xml | 37 | ||||
-rw-r--r-- | source3/include/packet.h | 3 | ||||
-rw-r--r-- | source3/include/proto.h | 1 | ||||
-rw-r--r-- | source3/lib/ctdbd_conn.c | 15 | ||||
-rw-r--r-- | source3/lib/packet.c | 10 | ||||
-rw-r--r-- | source3/param/loadparm.c | 12 |
6 files changed, 73 insertions, 5 deletions
diff --git a/docs-xml/smbdotconf/misc/ctdbtimeout.xml b/docs-xml/smbdotconf/misc/ctdbtimeout.xml new file mode 100644 index 0000000000..97d5039000 --- /dev/null +++ b/docs-xml/smbdotconf/misc/ctdbtimeout.xml @@ -0,0 +1,37 @@ +<samba:parameter name="ctdb timeout" + context="G" + type="integer" + advanced="1" + xmlns:samba="http://www.samba.org/samba/DTD/samba-doc"> +<description> + <para>This parameter specifies a timeout in seconds for the + connection between Samba and ctdb. It is only valid if you + have compiled Samba with clustering and if you have + set <parameter>clustering=yes</parameter>. + </para> + <para>When something in the cluster blocks, it can happen that + we wait indefinitely long for ctdb, just adding to the + blocking condition. In a well-running cluster this should + never happen, but there are too many components in a cluster + that might have hickups. Choosing the right balance for this + value is very tricky, because on a busy cluster long service + times to transfer something across the cluster might be + valid. Setting it too short will degrade the service your + cluster presents, setting it too long might make the cluster + itself not recover from something severely broken for too + long. + </para> + <para> + Be aware that if you set this parameter, this needs to be in + the file smb.conf, it is not really helpful to put this into + a registry configuration (typical on a cluster), because to + access the registry contact to ctdb is requred. + </para> + <para>Setting <parameter>ctdb timeout</parameter> to n makes + any process waiting longer than n seconds for a reply by the + cluster panic. Setting it to 0 (the default) makes Samba + block forever, which is the highly recommended default. + </para> +</description> +<value type="default">0</value> +</samba:parameter> diff --git a/source3/include/packet.h b/source3/include/packet.h index 03331da750..45a9bc2ef1 100644 --- a/source3/include/packet.h +++ b/source3/include/packet.h @@ -38,7 +38,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx); /* * Sync read, wait for the next chunk */ -NTSTATUS packet_fd_read_sync(struct packet_context *ctx); +NTSTATUS packet_fd_read_sync(struct packet_context *ctx, + struct timeval *timeout); /* * Handle an incoming packet: diff --git a/source3/include/proto.h b/source3/include/proto.h index 6a90c87f84..d08ed79050 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -4130,6 +4130,7 @@ int lp_cups_connection_timeout(void); const char *lp_ctdbd_socket(void); const char **lp_cluster_addresses(void); bool lp_clustering(void); +int lp_ctdb_timeout(void); char *lp_printcommand(int ); char *lp_lpqcommand(int ); char *lp_lprmcommand(int ); diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c index ffd79c9fe1..84bba3bea3 100644 --- a/source3/lib/ctdbd_conn.c +++ b/source3/lib/ctdbd_conn.c @@ -275,6 +275,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx, return result; } +static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx) +{ + struct timeval timeout; + struct timeval *ptimeout; + + timeout = timeval_set(lp_ctdb_timeout(), 0); + ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL; + + return packet_fd_read_sync(ctx, ptimeout); +} + /* * Read a full ctdbd request. If we have a messaging context, defer incoming * messages that might come in between. @@ -289,7 +300,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid, again: - status = packet_fd_read_sync(conn->pkt); + status = ctdb_packet_fd_read_sync(conn->pkt); if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) { /* EAGAIN */ @@ -1156,7 +1167,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id, break; } - status = packet_fd_read_sync(conn->pkt); + status = ctdb_packet_fd_read_sync(conn->pkt); if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) { /* diff --git a/source3/lib/packet.c b/source3/lib/packet.c index ef28bf9f62..c131b973bc 100644 --- a/source3/lib/packet.c +++ b/source3/lib/packet.c @@ -101,7 +101,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx) return NT_STATUS_OK; } -NTSTATUS packet_fd_read_sync(struct packet_context *ctx) +NTSTATUS packet_fd_read_sync(struct packet_context *ctx, + struct timeval *timeout) { int res; fd_set r_fds; @@ -109,7 +110,12 @@ NTSTATUS packet_fd_read_sync(struct packet_context *ctx) FD_ZERO(&r_fds); FD_SET(ctx->fd, &r_fds); - res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, NULL); + res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, timeout); + + if (res == 0) { + DEBUG(10, ("select timed out\n")); + return NT_STATUS_IO_TIMEOUT; + } if (res == -1) { DEBUG(10, ("select returned %s\n", strerror(errno))); diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c index 06fb962576..c62deb5eda 100644 --- a/source3/param/loadparm.c +++ b/source3/param/loadparm.c @@ -271,6 +271,7 @@ struct global { char *ctdbdSocket; char **szClusterAddresses; bool clustering; + int ctdb_timeout; int ldap_passwd_sync; int ldap_replication_sleep; int ldap_timeout; /* This is initialised in init_globals */ @@ -2541,6 +2542,15 @@ static struct parm_struct parm_table[] = { .enum_list = NULL, .flags = FLAG_ADVANCED | FLAG_GLOBAL, }, + { + .label = "ctdb timeout", + .type = P_INTEGER, + .p_class = P_GLOBAL, + .ptr = &Globals.ctdb_timeout, + .special = NULL, + .enum_list = NULL, + .flags = FLAG_ADVANCED | FLAG_GLOBAL, + }, {N_("Printing Options"), P_SEP, P_SEPARATOR}, @@ -5107,6 +5117,7 @@ static void init_globals(bool first_time_only) string_set(&Globals.ctdbdSocket, ""); Globals.szClusterAddresses = NULL; Globals.clustering = False; + Globals.ctdb_timeout = 0; Globals.winbind_cache_time = 300; /* 5 minutes */ Globals.winbind_reconnect_delay = 30; /* 30 seconds */ @@ -5557,6 +5568,7 @@ FN_GLOBAL_INTEGER(lp_cups_connection_timeout, &Globals.cups_connection_timeout) FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket) FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses) FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering) +FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout) FN_LOCAL_STRING(lp_printcommand, szPrintcommand) FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand) FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand) |