summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Allison <jra@samba.org>2008-08-20 16:24:22 -0700
committerJeremy Allison <jra@samba.org>2008-08-20 16:24:22 -0700
commit79150da70bbfddea3dc4013212fc7314b1004534 (patch)
tree2d815dd132fc52648a1075fd30b245bccfc727aa
parent2fdcaab8c49259427df8ea00e47583772ea06602 (diff)
downloadsamba-79150da70bbfddea3dc4013212fc7314b1004534.tar.gz
samba-79150da70bbfddea3dc4013212fc7314b1004534.tar.bz2
samba-79150da70bbfddea3dc4013212fc7314b1004534.zip
Here is a re-working of the winbindd
reconnect code to cope with rebooting a DC. This replaces the code I asked Volker to revert. The logic is pretty simple. It adds a new parameter, "winbind reconnect delay", set to 30 seconds by default, which determines how long to wait between connection attempts. To avoid overwhelming the box with DC-probe forked children, the code now keeps track of the DC probe child per winbindd_domain struct and only starts a new one if the existing one has died. I also added a little logic to make sure the dc probe child always sends a message whatever the reason for exit so we will always reschedule another connect attempt. Also added documentation. Jeremy. (This used to be commit 8027197635b988b3dcf9d3d00126a024e768fa62)
-rw-r--r--docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml15
-rw-r--r--source3/include/proto.h1
-rw-r--r--source3/param/loadparm.c12
-rw-r--r--source3/winbindd/winbindd.h1
-rw-r--r--source3/winbindd/winbindd_cm.c39
-rw-r--r--source3/winbindd/winbindd_util.c2
6 files changed, 61 insertions, 9 deletions
diff --git a/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml b/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml
new file mode 100644
index 0000000000..2da263e5ce
--- /dev/null
+++ b/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml
@@ -0,0 +1,15 @@
+<samba:parameter name="winbind reconnect delay"
+ context="G"
+ type="integer"
+ advanced="1" developer="1"
+ xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+ <para>This parameter specifies the number of
+ seconds the <citerefentry><refentrytitle>winbindd</refentrytitle>
+ <manvolnum>8</manvolnum></citerefentry> daemon will wait between
+ attempts to contact a Domain controller for a domain that is
+ determined to be down or not contactable.</para>
+</description>
+
+<value type="default">30</value>
+</samba:parameter>
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 3d0d419022..d3a8dbbc7f 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -6049,6 +6049,7 @@ int lp_directory_name_cache_size(int );
int lp_smb_encrypt(int );
char lp_magicchar(const struct share_params *p );
int lp_winbind_cache_time(void);
+int lp_winbind_reconnect_delay(void);
const char **lp_winbind_nss_info(void);
int lp_algorithmic_rid_base(void);
int lp_name_cache_timeout(void);
diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c
index bc111df4e1..6817eca1d1 100644
--- a/source3/param/loadparm.c
+++ b/source3/param/loadparm.c
@@ -240,6 +240,7 @@ struct global {
int map_to_guest;
int oplock_break_wait_time;
int winbind_cache_time;
+ int winbind_reconnect_delay;
int winbind_max_idle_children;
char **szWinbindNssInfo;
int iLockSpinTime;
@@ -4363,6 +4364,15 @@ static struct parm_struct parm_table[] = {
.flags = FLAG_ADVANCED,
},
{
+ .label = "winbind reconnect delay",
+ .type = P_INTEGER,
+ .p_class = P_GLOBAL,
+ .ptr = &Globals.winbind_reconnect_delay,
+ .special = NULL,
+ .enum_list = NULL,
+ .flags = FLAG_ADVANCED,
+ },
+ {
.label = "winbind enum users",
.type = P_BOOL,
.p_class = P_GLOBAL,
@@ -4829,6 +4839,7 @@ static void init_globals(bool first_time_only)
Globals.clustering = False;
Globals.winbind_cache_time = 300; /* 5 minutes */
+ Globals.winbind_reconnect_delay = 30; /* 30 seconds */
Globals.bWinbindEnumUsers = False;
Globals.bWinbindEnumGroups = False;
Globals.bWinbindUseDefaultDomain = False;
@@ -5341,6 +5352,7 @@ FN_LOCAL_INTEGER(lp_directory_name_cache_size, iDirectoryNameCacheSize)
FN_LOCAL_INTEGER(lp_smb_encrypt, ismb_encrypt)
FN_LOCAL_CHAR(lp_magicchar, magic_char)
FN_GLOBAL_INTEGER(lp_winbind_cache_time, &Globals.winbind_cache_time)
+FN_GLOBAL_INTEGER(lp_winbind_reconnect_delay, &Globals.winbind_reconnect_delay)
FN_GLOBAL_LIST(lp_winbind_nss_info, &Globals.szWinbindNssInfo)
FN_GLOBAL_INTEGER(lp_algorithmic_rid_base, &Globals.AlgorithmicRidBase)
FN_GLOBAL_INTEGER(lp_name_cache_timeout, &Globals.name_cache_timeout)
diff --git a/source3/winbindd/winbindd.h b/source3/winbindd/winbindd.h
index fe0c076209..1b8cd9163f 100644
--- a/source3/winbindd/winbindd.h
+++ b/source3/winbindd/winbindd.h
@@ -204,6 +204,7 @@ struct winbindd_domain {
uint32_t id_range_low, id_range_high;
/* A working DC */
+ pid_t dc_probe_pid; /* Child we're using to detect the DC. */
fstring dcname;
struct sockaddr_storage dcaddr;
diff --git a/source3/winbindd/winbindd_cm.c b/source3/winbindd/winbindd_cm.c
index 47df4e4058..69e95b1c05 100644
--- a/source3/winbindd/winbindd_cm.c
+++ b/source3/winbindd/winbindd_cm.c
@@ -171,20 +171,33 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain)
struct dc_name_ip *dcs = NULL;
int num_dcs = 0;
TALLOC_CTX *mem_ctx = NULL;
- pid_t child_pid;
pid_t parent_pid = sys_getpid();
/* Stop zombies */
CatchChild();
- child_pid = sys_fork();
+ if (domain->dc_probe_pid != (pid_t)-1) {
+ /*
+ * We might already have a DC probe
+ * child working, check.
+ */
+ if (process_exists_by_pid(domain->dc_probe_pid)) {
+ DEBUG(10,("fork_child_dc_connect: pid %u already "
+ "checking for DC's.\n",
+ (unsigned int)domain->dc_probe_pid));
+ return true;
+ }
+ domain->dc_probe_pid = (pid_t)-1;
+ }
- if (child_pid == -1) {
+ domain->dc_probe_pid = sys_fork();
+
+ if (domain->dc_probe_pid == (pid_t)-1) {
DEBUG(0, ("fork_child_dc_connect: Could not fork: %s\n", strerror(errno)));
return False;
}
- if (child_pid != 0) {
+ if (domain->dc_probe_pid != (pid_t)0) {
/* Parent */
messaging_register(winbind_messaging_context(), NULL,
MSG_WINBIND_TRY_TO_GO_ONLINE,
@@ -201,6 +214,11 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain)
if (!reinit_after_fork(winbind_messaging_context(), true)) {
DEBUG(0,("reinit_after_fork() failed\n"));
+ messaging_send_buf(winbind_messaging_context(),
+ pid_to_procid(parent_pid),
+ MSG_WINBIND_FAILED_TO_GO_ONLINE,
+ (uint8 *)domain->name,
+ strlen(domain->name)+1);
_exit(0);
}
@@ -218,6 +236,11 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain)
mem_ctx = talloc_init("fork_child_dc_connect");
if (!mem_ctx) {
DEBUG(0,("talloc_init failed.\n"));
+ messaging_send_buf(winbind_messaging_context(),
+ pid_to_procid(parent_pid),
+ MSG_WINBIND_FAILED_TO_GO_ONLINE,
+ (uint8 *)domain->name,
+ strlen(domain->name)+1);
_exit(0);
}
@@ -291,12 +314,12 @@ static void check_domain_online_handler(struct event_context *ctx,
static void calc_new_online_timeout_check(struct winbindd_domain *domain)
{
- int wbc = lp_winbind_cache_time();
+ int wbr = lp_winbind_reconnect_delay();
if (domain->startup) {
domain->check_online_timeout = 10;
- } else if (domain->check_online_timeout < wbc) {
- domain->check_online_timeout = wbc;
+ } else if (domain->check_online_timeout < wbr) {
+ domain->check_online_timeout = wbr;
}
}
@@ -336,7 +359,7 @@ void set_domain_offline(struct winbindd_domain *domain)
}
/* If we're in statup mode, check again in 10 seconds, not in
- lp_winbind_cache_time() seconds (which is 5 mins by default). */
+ lp_winbind_reconnect_delay() seconds (which is 30 seconds by default). */
calc_new_online_timeout_check(domain);
diff --git a/source3/winbindd/winbindd_util.c b/source3/winbindd/winbindd_util.c
index 77b17787c9..4668d3725d 100644
--- a/source3/winbindd/winbindd_util.c
+++ b/source3/winbindd/winbindd_util.c
@@ -180,11 +180,11 @@ static struct winbindd_domain *add_trusted_domain(const char *domain_name, const
domain->initialized = False;
domain->online = is_internal_domain(sid);
domain->check_online_timeout = 0;
+ domain->dc_probe_pid = (pid_t)-1;
if (sid) {
sid_copy(&domain->sid, sid);
}
-
/* Link to domain list */
DLIST_ADD_END(_domain_list, domain, struct winbindd_domain *);