diff options
author | Stephen Gallagher <sgallagh@redhat.com> | 2009-01-13 14:13:06 -0500 |
---|---|---|
committer | Simo Sorce <idra@samba.org> | 2009-01-13 16:41:02 -0500 |
commit | 9ca78970073ca9466cb85dceb560e2b925c9580b (patch) | |
tree | 9aeb6d3d7647554d703c5d64037a849334ce9f1e | |
parent | be1a20d965497c96b6cf1ca14832a30fa7c140b7 (diff) | |
download | sssd-9ca78970073ca9466cb85dceb560e2b925c9580b.tar.gz sssd-9ca78970073ca9466cb85dceb560e2b925c9580b.tar.bz2 sssd-9ca78970073ca9466cb85dceb560e2b925c9580b.zip |
Monitor will now start services asynchronously. This means that forked services can no longer start before the monitor is running its mainloop. This avoids the race condition where the child services attempted to connect to the monitor SBUS before it was able to answer requests.
-rw-r--r-- | server/monitor.c | 78 |
1 files changed, 57 insertions, 21 deletions
diff --git a/server/monitor.c b/server/monitor.c index 18fb3255..921e8b59 100644 --- a/server/monitor.c +++ b/server/monitor.c @@ -35,8 +35,6 @@ #include "sbus/sssd_dbus.h" #include "sbus_interfaces.h" -static int start_service(const char *name, const char *command, pid_t *retpid); - /* ping time cannot be less then once every few seconds or the * monitor will get crazy hammering children with messages */ #define MONITOR_MIN_PING_TIME 10 @@ -73,6 +71,8 @@ struct mt_ctx { int service_ping_time; }; +static int start_service(struct mt_svc *mt_svc); + static int dbus_service_init(struct sbus_conn_ctx *conn_ctx, void *data); static void identity_check(DBusPendingCall *pending, void *data); @@ -223,6 +223,7 @@ static void tasks_check_handler(struct event_context *ev, } if (!process_alive) { + DLIST_REMOVE(svc->mt_ctx->svc_list, svc); if (svc->last_restart != 0) { if ((now - svc->last_restart) > 30) { /* TODO: get val from config */ /* it was long ago reset restart threshold */ @@ -237,7 +238,7 @@ static void tasks_check_handler(struct event_context *ev, return; } - ret = start_service(svc->name, svc->command, &svc->pid); + ret = start_service(svc); if (ret != EOK) { DEBUG(0,("Failed to restart service '%s'\n", svc->name)); talloc_free(svc); @@ -246,7 +247,7 @@ static void tasks_check_handler(struct event_context *ev, svc->restarts++; svc->last_restart = now; - svc->last_pong = 0; + return; } /* all fine, set up the task checker again */ @@ -356,16 +357,15 @@ int monitor_process_init(TALLOC_CTX *mem_ctx, } talloc_free(path); - ret = start_service(svc->name, svc->command, &svc->pid); + /* Add this service to the queue to be started once the monitor + * enters its mainloop. + */ + ret = start_service(svc); if (ret != EOK) { DEBUG(0,("Failed to start service '%s'\n", svc->name)); talloc_free(svc); continue; } - - DLIST_ADD(ctx->svc_list, svc); - - set_tasks_checker(svc); } /* now start the data providers */ @@ -407,7 +407,7 @@ int monitor_process_init(TALLOC_CTX *mem_ctx, continue; } - ret = start_service(doms[i], svc->command, &svc->pid); + ret = start_service(svc); if (ret != EOK) { DEBUG(0,("Failed to start provider for '%s'\n", doms[i])); talloc_free(svc); @@ -862,32 +862,68 @@ fail: return NULL; } -static int start_service(const char *name, const char *command, pid_t *retpid) +static void service_startup_handler(struct event_context *ev, + struct timed_event *te, + struct timeval t, void *ptr); + +static int start_service(struct mt_svc *svc) { + struct timed_event *te; + struct timeval tv; + + DEBUG(4,("Queueing service %s for startup\n", svc->name)); + + /* Add a timed event to start up the service. + * We have to do this in order to avoid a race + * condition where the service being started forks + * and attempts to connect to the SBUS before + * the monitor is serving it. + */ + gettimeofday(&tv, NULL); + te = event_add_timed(svc->mt_ctx->ev, svc, tv, + service_startup_handler, svc); + if (te == NULL) { + DEBUG(0, ("Unable to queue service %s for startup\n", svc->name)); + return ENOMEM; + } + return EOK; +} + +static void service_startup_handler(struct event_context *ev, + struct timed_event *te, + struct timeval t, void *ptr) +{ + struct mt_svc *mt_svc; char **args; - pid_t pid; - DEBUG(4,("Starting service %s\n", name)); + mt_svc = talloc_get_type(ptr, struct mt_svc); + if (mt_svc == NULL) { + return; + } - pid = fork(); - if (pid != 0) { - if (pid == -1) { - return ECHILD; + mt_svc->pid = fork(); + if (mt_svc->pid != 0) { + if (mt_svc->pid == -1) { + DEBUG(0, ("Could not fork child to start service [%s]. Continuing.\n", mt_svc->name)) + return; } - *retpid = pid; + /* Parent */ + mt_svc->last_pong = time(NULL); + DLIST_ADD(mt_svc->mt_ctx->svc_list, mt_svc); + set_tasks_checker(mt_svc); - return EOK; + return; } /* child */ - args = parse_args(command); + args = parse_args(mt_svc->command); execvp(args[0], args); /* If we are here, exec() has failed * Print errno and abort quickly */ - DEBUG(0,("Could not exec %s, reason: %s\n", command, strerror(errno))); + DEBUG(0,("Could not exec %s, reason: %s\n", mt_svc->command, strerror(errno))); /* We have to call _exit() instead of exit() here * because a bug in D-BUS will cause the server to |