summaryrefslogtreecommitdiff
path: root/server/monitor
diff options
context:
space:
mode:
authorSimo Sorce <ssorce@redhat.com>2009-10-05 10:09:09 -0400
committerSimo Sorce <ssorce@redhat.com>2009-10-09 16:11:21 -0400
commite2d17ea806d273784b621583dd0490c2f69f237d (patch)
tree90f8e0989f7b93c134b082e4282df85d54cd9c9a /server/monitor
parentdc55d62f4d9f6520ffcb4aa8c6beb308896d9fbb (diff)
downloadsssd-e2d17ea806d273784b621583dd0490c2f69f237d.tar.gz
sssd-e2d17ea806d273784b621583dd0490c2f69f237d.tar.bz2
sssd-e2d17ea806d273784b621583dd0490c2f69f237d.zip
Start responders predictably after providers
Instead of waiting an arbitrary timeout, start all providers first, and wait for all of them to reply to the monitor before starting other services. Add a timeout handler so that services are started even if one of the providers fails to actually register back to the monitor. Also fixes services destructors delist_service was overriding the natural svc destructor. remove the offending code and make the svc_destructor always try to remove a service from the service list, if the service is not listed it will just be a noop.
Diffstat (limited to 'server/monitor')
-rw-r--r--server/monitor/monitor.c199
1 files changed, 147 insertions, 52 deletions
diff --git a/server/monitor/monitor.c b/server/monitor/monitor.c
index abd881e0..e5ee65a0 100644
--- a/server/monitor/monitor.c
+++ b/server/monitor/monitor.c
@@ -63,7 +63,6 @@ struct svc_spy;
struct mt_svc {
struct mt_svc *prev;
struct mt_svc *next;
-
struct sbus_connection *conn;
struct svc_spy *conn_spy;
@@ -77,6 +76,8 @@ struct mt_svc {
int ping_time;
+ bool svc_started;
+
int restarts;
time_t last_restart;
time_t last_ping;
@@ -118,9 +119,10 @@ struct mt_ctx {
int inotify_fd;
int service_id_timeout;
bool check_children;
+ bool services_started;
};
-static int start_service(struct mt_svc *mt_svc, bool startup);
+static int start_service(struct mt_svc *mt_svc);
static int monitor_service_init(struct sbus_connection *conn, void *data);
@@ -137,8 +139,10 @@ static int get_service_config(struct mt_ctx *ctx, const char *name,
struct mt_svc **svc_cfg);
static int get_provider_config(struct mt_ctx *ctx, const char *name,
struct mt_svc **svc_cfg);
-static int add_new_service(struct mt_ctx *ctx, const char *name, bool startup);
-static int add_new_provider(struct mt_ctx *ctx, const char *name, bool startup);
+static int add_new_service(struct mt_ctx *ctx, const char *name);
+static int add_new_provider(struct mt_ctx *ctx, const char *name);
+
+static int mark_service_as_started(struct mt_svc *svc);
static int monitor_signal_reconf(struct config_file_ctx *file_ctx,
const char *filename);
@@ -241,12 +245,9 @@ static int client_registration(DBusMessage *message,
/* Fill in svc structure with connection data */
svc->conn = mini->conn;
- /* we need to attach a spy to the connection structure so that if some code
- * frees it we can zero it out in the service structure. Otherwise we may
- * try to access or even free, freed memory. */
- ret = add_svc_conn_spy(svc);
+ ret = mark_service_as_started(svc);
if (ret) {
- DEBUG(0, ("Failed to attch spy\n"));
+ DEBUG(1, ("Failed to mark service [%s]!\n", svc_name));
goto done;
}
@@ -285,6 +286,9 @@ static int svc_destructor(void *mem)
return 0;
}
+ /* always try to delist service */
+ DLIST_REMOVE(svc->mt_ctx->svc_list, svc);
+
/* svc is beeing freed, neutralize the spy */
if (svc->conn_spy) {
talloc_set_destructor((TALLOC_CTX *)svc->conn_spy, NULL);
@@ -320,6 +324,93 @@ static int add_svc_conn_spy(struct mt_svc *svc)
return EOK;
}
+static int mark_service_as_started(struct mt_svc *svc)
+{
+ struct mt_ctx *ctx = svc->mt_ctx;
+ struct mt_svc *iter;
+ int ret;
+ int i;
+
+ DEBUG(5, ("Marking %s as started.\n", svc->name));
+ svc->svc_started = true;
+
+ /* we need to attach a spy to the connection structure so that if some code
+ * frees it we can zero it out in the service structure. Otherwise we may
+ * try to access or even free, freed memory. */
+ ret = add_svc_conn_spy(svc);
+ if (ret) {
+ DEBUG(0, ("Failed to attch spy\n"));
+ goto done;
+ }
+
+ if (!ctx->services_started) {
+
+ /* check if all providers are up */
+ for (iter = ctx->svc_list; iter; iter = iter->next) {
+ if (iter->provider && !iter->svc_started) {
+ DEBUG(5, ("Still waiting on %s provider.", iter->name));
+ break;
+ }
+ }
+
+ if (iter) {
+ /* there are still unstarted providers */
+ goto done;
+ }
+
+ ctx->services_started = true;
+
+ DEBUG(4, ("Now starting services!\n"));
+ /* then start all services */
+ for (i = 0; ctx->services[i]; i++) {
+ add_new_service(ctx, ctx->services[i]);
+ }
+ }
+
+done:
+ return ret;
+}
+
+static void services_startup_timeout(struct tevent_context *ev,
+ struct tevent_timer *te,
+ struct timeval t, void *ptr)
+{
+ struct mt_ctx *ctx = talloc_get_type(ptr, struct mt_ctx);
+ int i;
+
+ DEBUG(6, ("Handling timeout\n"));
+
+ if (!ctx->services_started) {
+
+ DEBUG(1, ("Providers did not start in time, "
+ "forcing services startup!\n"));
+
+ ctx->services_started = true;
+
+ DEBUG(4, ("Now starting services!\n"));
+ /* then start all services */
+ for (i = 0; ctx->services[i]; i++) {
+ add_new_service(ctx, ctx->services[i]);
+ }
+ }
+}
+
+static int add_services_startup_timeout(struct mt_ctx *ctx)
+{
+ struct tevent_timer *to;
+ struct timeval tv;
+
+ /* 5 seconds should be plenty */
+ tv = tevent_timeval_current_ofs(5, 0);
+ to = tevent_add_timer(ctx->ev, ctx, tv, services_startup_timeout, ctx);
+ if (!to) {
+ DEBUG(0,("Out of memory?!\n"));
+ return ENOMEM;
+ }
+
+ return EOK;
+}
+
struct sbus_method monitor_methods[] = {
{ MON_SRV_METHOD_VERSION, get_monitor_version },
{ MON_SRV_METHOD_REGISTER, client_registration },
@@ -379,7 +470,7 @@ static void svc_try_restart(struct mt_svc *svc, time_t now)
*/
talloc_free(svc->ping_ev);
- ret = start_service(svc, false);
+ ret = start_service(svc);
if (ret != EOK) {
DEBUG(0,("Failed to restart service '%s'\n", svc->name));
talloc_free(svc);
@@ -939,14 +1030,14 @@ static int get_service_config(struct mt_ctx *ctx, const char *name,
return EOK;
}
-static int add_new_service(struct mt_ctx *ctx, const char *name, bool startup)
+static int add_new_service(struct mt_ctx *ctx, const char *name)
{
int ret;
struct mt_svc *svc;
ret = get_service_config(ctx, name, &svc);
- ret = start_service(svc, startup);
+ ret = start_service(svc);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
@@ -1043,7 +1134,7 @@ static int get_provider_config(struct mt_ctx *ctx, const char *name,
return EOK;
}
-static int add_new_provider(struct mt_ctx *ctx, const char *name, bool startup)
+static int add_new_provider(struct mt_ctx *ctx, const char *name)
{
int ret;
struct mt_svc *svc;
@@ -1055,7 +1146,17 @@ static int add_new_provider(struct mt_ctx *ctx, const char *name, bool startup)
return ret;
}
- ret = start_service(svc, startup);
+ if (strcasecmp(svc->provider, "local") == 0) {
+ /* The LOCAL provider requires no back-end currently
+ * We'll add it to the service list, but we don't need
+ * to poll it.
+ */
+ svc->svc_started = true;
+ DLIST_ADD(ctx->svc_list, svc);
+ return ENOENT;
+ }
+
+ ret = start_service(svc);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
@@ -1135,7 +1236,7 @@ static int update_monitor_config(struct mt_ctx *ctx)
if (ctx->services[j] == NULL) {
/* New service added */
- add_new_service(ctx, new_config->services[i], false);
+ add_new_service(ctx, new_config->services[i]);
}
else {
/* Service already enabled, check for changes */
@@ -1220,7 +1321,7 @@ static int update_monitor_config(struct mt_ctx *ctx)
if (dom == NULL) {
/* New provider added */
- add_new_provider(ctx, new_dom->name, false);
+ add_new_provider(ctx, new_dom->name);
}
else {
/* Provider is already in the list.
@@ -1849,8 +1950,9 @@ int monitor_process_init(struct mt_ctx *ctx,
TALLOC_CTX *tmp_ctx;
struct sysdb_ctx_list *db_list;
struct tevent_signal *tes;
- int ret, i;
struct sss_domain_info *dom;
+ int num_providers;
+ int ret;
#if 0
This feature is incomplete and can leave the SSSD in a bad state if the
@@ -1893,14 +1995,34 @@ int monitor_process_init(struct mt_ctx *ctx,
return ret;
}
- /* then start all services */
- for (i = 0; ctx->services[i]; i++) {
- add_new_service(ctx, ctx->services[i], true);
+ /* start providers */
+ num_providers = 0;
+ for (dom = ctx->domains; dom; dom = dom->next) {
+ ret = add_new_provider(ctx, dom->name);
+ if (ret != EOK && ret != ENOENT) {
+ return ret;
+ }
+ if (ret != ENOENT) {
+ num_providers++;
+ }
}
- /* now start the data providers */
- for (dom = ctx->domains; dom; dom = dom->next) {
- add_new_provider(ctx, dom->name, true);
+ if (num_providers > 0) {
+ /* now set the services stratup timeout *
+ * (responders will be started automatically when all
+ * providers are up and running or when the tomeout
+ * expires) */
+ ret = add_services_startup_timeout(ctx);
+ if (ret != EOK) {
+ return ret;
+ }
+ } else {
+ int i;
+ /* No providers start services immediately
+ * Normally this means only LOCAL is configured */
+ for (i = 0; ctx->services[i]; i++) {
+ add_new_service(ctx, ctx->services[i]);
+ }
}
/* now start checking for global events */
@@ -2239,7 +2361,7 @@ static void service_startup_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr);
-static int start_service(struct mt_svc *svc, bool startup)
+static int start_service(struct mt_svc *svc)
{
struct tevent_timer *te;
struct timeval tv;
@@ -2251,16 +2373,7 @@ static int start_service(struct mt_svc *svc, bool startup)
* to accept connections. So if startup is true delay by 2 seconds any
* process that is not a data provider */
- /* FIXME: use stat to check the pipes are available instead and rescheduleif
- * not */
-
- if (startup &&
- ((strcasecmp(svc->name, "nss") == 0) ||
- (strcasecmp(svc->name, "pam") == 0))) {
- tv = tevent_timeval_current_ofs(2, 0);
- } else {
- tv = tevent_timeval_current();
- }
+ tv = tevent_timeval_current();
/* Add a timed event to start up the service.
* We have to do this in order to avoid a race
@@ -2277,13 +2390,6 @@ static int start_service(struct mt_svc *svc, bool startup)
return EOK;
}
-static int delist_service(void *ptr) {
- struct mt_svc *svc =
- talloc_get_type(ptr, struct mt_svc);
- DLIST_REMOVE(svc->mt_ctx->svc_list, svc);
- return 0;
-}
-
static void service_startup_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr)
@@ -2296,16 +2402,6 @@ static void service_startup_handler(struct tevent_context *ev,
return;
}
- if (mt_svc->provider && strcasecmp(mt_svc->provider, "local") == 0) {
- /* The LOCAL provider requires no back-end currently
- * We'll add it to the service list, but we don't need
- * to poll it.
- */
- DLIST_ADD(mt_svc->mt_ctx->svc_list, mt_svc);
- talloc_set_destructor((TALLOC_CTX *)mt_svc, delist_service);
- return;
- }
-
mt_svc->pid = fork();
if (mt_svc->pid != 0) {
if (mt_svc->pid == -1) {
@@ -2317,7 +2413,6 @@ static void service_startup_handler(struct tevent_context *ev,
mt_svc->mt_ctx->check_children = true;
mt_svc->failed_pongs = 0;
DLIST_ADD(mt_svc->mt_ctx->svc_list, mt_svc);
- talloc_set_destructor((TALLOC_CTX *)mt_svc, delist_service);
set_tasks_checker(mt_svc);
return;