From 79150da70bbfddea3dc4013212fc7314b1004534 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Wed, 20 Aug 2008 16:24:22 -0700 Subject: Here is a re-working of the winbindd reconnect code to cope with rebooting a DC. This replaces the code I asked Volker to revert. The logic is pretty simple. It adds a new parameter, "winbind reconnect delay", set to 30 seconds by default, which determines how long to wait between connection attempts. To avoid overwhelming the box with DC-probe forked children, the code now keeps track of the DC probe child per winbindd_domain struct and only starts a new one if the existing one has died. I also added a little logic to make sure the dc probe child always sends a message whatever the reason for exit so we will always reschedule another connect attempt. Also added documentation. Jeremy. (This used to be commit 8027197635b988b3dcf9d3d00126a024e768fa62) --- .../smbdotconf/winbind/winbindreconnectdelay.xml | 15 +++++++++ source3/include/proto.h | 1 + source3/param/loadparm.c | 12 +++++++ source3/winbindd/winbindd.h | 1 + source3/winbindd/winbindd_cm.c | 39 +++++++++++++++++----- source3/winbindd/winbindd_util.c | 2 +- 6 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml diff --git a/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml b/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml new file mode 100644 index 0000000000..2da263e5ce --- /dev/null +++ b/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml @@ -0,0 +1,15 @@ + + + This parameter specifies the number of + seconds the winbindd + 8 daemon will wait between + attempts to contact a Domain controller for a domain that is + determined to be down or not contactable. + + +30 + diff --git a/source3/include/proto.h b/source3/include/proto.h index 3d0d419022..d3a8dbbc7f 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -6049,6 +6049,7 @@ int lp_directory_name_cache_size(int ); int lp_smb_encrypt(int ); char lp_magicchar(const struct share_params *p ); int lp_winbind_cache_time(void); +int lp_winbind_reconnect_delay(void); const char **lp_winbind_nss_info(void); int lp_algorithmic_rid_base(void); int lp_name_cache_timeout(void); diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c index bc111df4e1..6817eca1d1 100644 --- a/source3/param/loadparm.c +++ b/source3/param/loadparm.c @@ -240,6 +240,7 @@ struct global { int map_to_guest; int oplock_break_wait_time; int winbind_cache_time; + int winbind_reconnect_delay; int winbind_max_idle_children; char **szWinbindNssInfo; int iLockSpinTime; @@ -4362,6 +4363,15 @@ static struct parm_struct parm_table[] = { .enum_list = NULL, .flags = FLAG_ADVANCED, }, + { + .label = "winbind reconnect delay", + .type = P_INTEGER, + .p_class = P_GLOBAL, + .ptr = &Globals.winbind_reconnect_delay, + .special = NULL, + .enum_list = NULL, + .flags = FLAG_ADVANCED, + }, { .label = "winbind enum users", .type = P_BOOL, @@ -4829,6 +4839,7 @@ static void init_globals(bool first_time_only) Globals.clustering = False; Globals.winbind_cache_time = 300; /* 5 minutes */ + Globals.winbind_reconnect_delay = 30; /* 30 seconds */ Globals.bWinbindEnumUsers = False; Globals.bWinbindEnumGroups = False; Globals.bWinbindUseDefaultDomain = False; @@ -5341,6 +5352,7 @@ FN_LOCAL_INTEGER(lp_directory_name_cache_size, iDirectoryNameCacheSize) FN_LOCAL_INTEGER(lp_smb_encrypt, ismb_encrypt) FN_LOCAL_CHAR(lp_magicchar, magic_char) FN_GLOBAL_INTEGER(lp_winbind_cache_time, &Globals.winbind_cache_time) +FN_GLOBAL_INTEGER(lp_winbind_reconnect_delay, &Globals.winbind_reconnect_delay) FN_GLOBAL_LIST(lp_winbind_nss_info, &Globals.szWinbindNssInfo) FN_GLOBAL_INTEGER(lp_algorithmic_rid_base, &Globals.AlgorithmicRidBase) FN_GLOBAL_INTEGER(lp_name_cache_timeout, &Globals.name_cache_timeout) diff --git a/source3/winbindd/winbindd.h b/source3/winbindd/winbindd.h index fe0c076209..1b8cd9163f 100644 --- a/source3/winbindd/winbindd.h +++ b/source3/winbindd/winbindd.h @@ -204,6 +204,7 @@ struct winbindd_domain { uint32_t id_range_low, id_range_high; /* A working DC */ + pid_t dc_probe_pid; /* Child we're using to detect the DC. */ fstring dcname; struct sockaddr_storage dcaddr; diff --git a/source3/winbindd/winbindd_cm.c b/source3/winbindd/winbindd_cm.c index 47df4e4058..69e95b1c05 100644 --- a/source3/winbindd/winbindd_cm.c +++ b/source3/winbindd/winbindd_cm.c @@ -171,20 +171,33 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain) struct dc_name_ip *dcs = NULL; int num_dcs = 0; TALLOC_CTX *mem_ctx = NULL; - pid_t child_pid; pid_t parent_pid = sys_getpid(); /* Stop zombies */ CatchChild(); - child_pid = sys_fork(); + if (domain->dc_probe_pid != (pid_t)-1) { + /* + * We might already have a DC probe + * child working, check. + */ + if (process_exists_by_pid(domain->dc_probe_pid)) { + DEBUG(10,("fork_child_dc_connect: pid %u already " + "checking for DC's.\n", + (unsigned int)domain->dc_probe_pid)); + return true; + } + domain->dc_probe_pid = (pid_t)-1; + } - if (child_pid == -1) { + domain->dc_probe_pid = sys_fork(); + + if (domain->dc_probe_pid == (pid_t)-1) { DEBUG(0, ("fork_child_dc_connect: Could not fork: %s\n", strerror(errno))); return False; } - if (child_pid != 0) { + if (domain->dc_probe_pid != (pid_t)0) { /* Parent */ messaging_register(winbind_messaging_context(), NULL, MSG_WINBIND_TRY_TO_GO_ONLINE, @@ -201,6 +214,11 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain) if (!reinit_after_fork(winbind_messaging_context(), true)) { DEBUG(0,("reinit_after_fork() failed\n")); + messaging_send_buf(winbind_messaging_context(), + pid_to_procid(parent_pid), + MSG_WINBIND_FAILED_TO_GO_ONLINE, + (uint8 *)domain->name, + strlen(domain->name)+1); _exit(0); } @@ -218,6 +236,11 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain) mem_ctx = talloc_init("fork_child_dc_connect"); if (!mem_ctx) { DEBUG(0,("talloc_init failed.\n")); + messaging_send_buf(winbind_messaging_context(), + pid_to_procid(parent_pid), + MSG_WINBIND_FAILED_TO_GO_ONLINE, + (uint8 *)domain->name, + strlen(domain->name)+1); _exit(0); } @@ -291,12 +314,12 @@ static void check_domain_online_handler(struct event_context *ctx, static void calc_new_online_timeout_check(struct winbindd_domain *domain) { - int wbc = lp_winbind_cache_time(); + int wbr = lp_winbind_reconnect_delay(); if (domain->startup) { domain->check_online_timeout = 10; - } else if (domain->check_online_timeout < wbc) { - domain->check_online_timeout = wbc; + } else if (domain->check_online_timeout < wbr) { + domain->check_online_timeout = wbr; } } @@ -336,7 +359,7 @@ void set_domain_offline(struct winbindd_domain *domain) } /* If we're in statup mode, check again in 10 seconds, not in - lp_winbind_cache_time() seconds (which is 5 mins by default). */ + lp_winbind_reconnect_delay() seconds (which is 30 seconds by default). */ calc_new_online_timeout_check(domain); diff --git a/source3/winbindd/winbindd_util.c b/source3/winbindd/winbindd_util.c index 77b17787c9..4668d3725d 100644 --- a/source3/winbindd/winbindd_util.c +++ b/source3/winbindd/winbindd_util.c @@ -180,11 +180,11 @@ static struct winbindd_domain *add_trusted_domain(const char *domain_name, const domain->initialized = False; domain->online = is_internal_domain(sid); domain->check_online_timeout = 0; + domain->dc_probe_pid = (pid_t)-1; if (sid) { sid_copy(&domain->sid, sid); } - /* Link to domain list */ DLIST_ADD_END(_domain_list, domain, struct winbindd_domain *); -- cgit