/*
SSSD
Service monitor
Copyright (C) Simo Sorce 2008
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _GNU_SOURCE
#include
#include
#include
#include
#include "util/util.h"
#include "popt.h"
#include "tevent.h"
#include "confdb/confdb.h"
#include "db/sysdb.h"
#include "monitor/monitor.h"
#include "dbus/dbus.h"
#include "sbus/sssd_dbus.h"
#include "monitor/monitor_interfaces.h"
/* ping time cannot be less then once every few seconds or the
* monitor will get crazy hammering children with messages */
#define MONITOR_DEF_PING_TIME 10
#define MONITOR_CONF_ENTRY "config/services/monitor"
struct mt_conn {
struct sbus_conn_ctx *conn_ctx;
struct mt_svc *svc_ptr;
};
struct mt_svc {
struct mt_svc *prev;
struct mt_svc *next;
struct mt_conn *mt_conn;
struct mt_ctx *mt_ctx;
char *provider;
char *command;
char *name;
char *identity;
pid_t pid;
int ping_time;
int restarts;
time_t last_restart;
time_t last_pong;
int debug_level;
struct tevent_timer *ping_ev;
};
struct mt_ctx {
struct tevent_context *ev;
struct confdb_ctx *cdb;
struct sss_domain_info *domains;
char **services;
struct mt_svc *svc_list;
struct sbus_srv_ctx *sbus_srv;
int service_id_timeout;
};
static int start_service(struct mt_svc *mt_svc);
static int dbus_service_init(struct sbus_conn_ctx *conn_ctx, void *data);
static void identity_check(DBusPendingCall *pending, void *data);
static int service_send_ping(struct mt_svc *svc);
static void ping_check(DBusPendingCall *pending, void *data);
static int service_check_alive(struct mt_svc *svc);
static void set_tasks_checker(struct mt_svc *srv);
static void set_global_checker(struct mt_ctx *ctx);
static int monitor_kill_service (struct mt_svc *svc);
static int get_service_config(struct mt_ctx *ctx, const char *name,
struct mt_svc **svc_cfg);
static int get_provider_config(struct mt_ctx *ctx, const char *name,
struct mt_svc **svc_cfg);
static int add_new_service (struct mt_ctx *ctx, const char *name);
static int add_new_provider (struct mt_ctx *ctx, const char *name);
/* dbus_get_monitor_version
* Return the monitor version over D-BUS */
static int dbus_get_monitor_version(DBusMessage *message,
struct sbus_conn_ctx *sconn)
{
const char *version = MONITOR_VERSION;
DBusMessage *reply;
dbus_bool_t ret;
reply = dbus_message_new_method_return(message);
if (!reply) return ENOMEM;
ret = dbus_message_append_args(reply, DBUS_TYPE_STRING,
&version, DBUS_TYPE_INVALID);
if (!ret) {
dbus_message_unref(reply);
return EIO;
}
/* send reply back */
sbus_conn_send_reply(sconn, reply);
dbus_message_unref(reply);
return EOK;
}
struct sbus_method monitor_methods[] = {
{ MONITOR_METHOD_VERSION, dbus_get_monitor_version},
{NULL, NULL}
};
/* monitor_dbus_init
* Set up the monitor service as a D-BUS Server */
static int monitor_dbus_init(struct mt_ctx *ctx)
{
struct sbus_method_ctx *sd_ctx;
struct sbus_srv_ctx *sbus_srv;
char *monitor_address;
int ret;
sd_ctx = talloc_zero(ctx, struct sbus_method_ctx);
if (!sd_ctx) {
return ENOMEM;
}
monitor_address = talloc_asprintf(sd_ctx, "unix:path=%s/%s",
PIPE_PATH, SSSD_SERVICE_PIPE);
if (!monitor_address) {
talloc_free(sd_ctx);
return ENOMEM;
}
/* Set up globally-available D-BUS methods */
sd_ctx->interface = talloc_strdup(sd_ctx, MONITOR_DBUS_INTERFACE);
if (!sd_ctx->interface) {
talloc_free(sd_ctx);
return ENOMEM;
}
sd_ctx->path = talloc_strdup(sd_ctx, MONITOR_DBUS_PATH);
if (!sd_ctx->path) {
talloc_free(sd_ctx);
return ENOMEM;
}
sd_ctx->methods = monitor_methods;
sd_ctx->message_handler = sbus_message_handler;
ret = sbus_new_server(ctx, ctx->ev, sd_ctx, &sbus_srv, monitor_address, dbus_service_init, ctx);
ctx->sbus_srv = sbus_srv;
talloc_free(monitor_address);
return ret;
}
static void svc_try_restart(struct mt_svc *svc, time_t now)
{
int ret;
DLIST_REMOVE(svc->mt_ctx->svc_list, svc);
if (svc->last_restart != 0) {
if ((now - svc->last_restart) > 30) { /* TODO: get val from config */
/* it was long ago reset restart threshold */
svc->restarts = 0;
}
}
/* restart the process */
if (svc->restarts > 3) { /* TODO: get val from config */
DEBUG(0, ("Process [%s], definitely stopped!\n", svc->name));
talloc_free(svc);
return;
}
/* Shut down the current ping timer so it will restart
* cleanly in start_service()
*/
talloc_free(svc->ping_ev);
ret = start_service(svc);
if (ret != EOK) {
DEBUG(0,("Failed to restart service '%s'\n", svc->name));
talloc_free(svc);
return;
}
svc->restarts++;
svc->last_restart = now;
return;
}
static void tasks_check_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr)
{
struct mt_svc *svc = talloc_get_type(ptr, struct mt_svc);
time_t now = time(NULL);
bool process_alive = true;
int ret;
ret = service_check_alive(svc);
switch (ret) {
case EOK:
/* all fine */
break;
case ECHILD:
DEBUG(1,("Process (%s) is stopped!\n", svc->name));
process_alive = false;
break;
default:
/* TODO: should we tear down it ? */
DEBUG(1,("Checking for service %s(%d) failed!!\n",
svc->name, svc->pid));
break;
}
if (process_alive) {
ret = service_send_ping(svc);
switch (ret) {
case EOK:
/* all fine */
break;
case ENXIO:
DEBUG(1,("Child (%s) not responding! (yet)\n", svc->name));
break;
default:
/* TODO: should we tear it down ? */
DEBUG(1,("Sending a message to service (%s) failed!!\n", svc->name));
break;
}
if (svc->last_pong != 0) {
if ((now - svc->last_pong) > 30) { /* TODO: get val from config */
/* too long since we last heard of this process */
monitor_kill_service(svc);
process_alive = false;
}
}
}
if (!process_alive) {
svc_try_restart(svc, now);
return;
}
/* all fine, set up the task checker again */
set_tasks_checker(svc);
}
static void set_tasks_checker(struct mt_svc *svc)
{
struct tevent_timer *te = NULL;
struct timeval tv;
gettimeofday(&tv, NULL);
tv.tv_sec += svc->ping_time;
tv.tv_usec = 0;
te = tevent_add_timer(svc->mt_ctx->ev, svc, tv, tasks_check_handler, svc);
if (te == NULL) {
DEBUG(0, ("failed to add event, monitor offline for [%s]!\n",
svc->name));
/* FIXME: shutdown ? */
}
svc->ping_ev = te;
}
static void global_checks_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr)
{
struct mt_ctx *ctx = talloc_get_type(ptr, struct mt_ctx);
struct mt_svc *svc;
int status;
pid_t pid;
errno = 0;
pid = waitpid(0, &status, WNOHANG);
if (pid == 0) {
goto done;
}
if (pid == -1) {
DEBUG(0, ("waitpid returned -1 (errno:%d[%s])\n",
errno, strerror(errno)));
goto done;
}
/* let's see if it is a known service, and try to restart it */
for (svc = ctx->svc_list; svc; svc = svc->next) {
if (svc->pid == pid) {
time_t now = time(NULL);
DEBUG(1, ("Service [%s] did exit\n", svc->name));
svc_try_restart(svc, now);
goto done;
}
}
if (svc == NULL) {
DEBUG(0, ("Unknown child (%d) did exit\n", pid));
}
done:
set_global_checker(ctx);
}
static void set_global_checker(struct mt_ctx *ctx)
{
struct tevent_timer *te = NULL;
struct timeval tv;
gettimeofday(&tv, NULL);
tv.tv_sec += 1; /* once a second */
tv.tv_usec = 0;
te = tevent_add_timer(ctx->ev, ctx, tv, global_checks_handler, ctx);
if (te == NULL) {
DEBUG(0, ("failed to add global checker event! PANIC TIME!\n"));
/* FIXME: is this right ? shoulkd we try to clean up first ?*/
exit(-1);
}
}
static int monitor_kill_service (struct mt_svc *svc)
{
int ret;
ret = kill(svc->pid, SIGTERM);
if (ret != EOK) {
DEBUG(0,("Sending signal to child (%s:%d) failed! "
"Ignore and pretend child is dead.\n",
svc->name, svc->pid));
}
return ret;
}
static void shutdown_reply(DBusPendingCall *pending, void *data)
{
DBusMessage *reply;
int type;
struct sbus_conn_ctx *conn_ctx;
struct mt_svc *svc = talloc_get_type(data, struct mt_svc);
conn_ctx = svc->mt_conn->conn_ctx;
reply = dbus_pending_call_steal_reply(pending);
if (!reply) {
/* reply should never be null. This function shouldn't be called
* until reply is valid or timeout has occurred. If reply is NULL
* here, something is seriously wrong and we should bail out.
*/
DEBUG(0, ("A reply callback was called but no reply was received"
" and no timeout occurred\n"));
/* Destroy this connection */
monitor_kill_service(svc);
goto done;
}
type = dbus_message_get_type(reply);
switch(type) {
case DBUS_MESSAGE_TYPE_METHOD_RETURN:
/* Ok, we received a confirmation of shutdown */
break;
default:
/* Something went wrong on the client side
* Time to forcibly kill the service
*/
DEBUG(0, ("Received an error shutting down service.\n"));
monitor_kill_service(svc);
}
/* No matter what happened here, we need to free the service */
talloc_free(svc);
done:
dbus_pending_call_unref(pending);
dbus_message_unref(reply);
}
/* monitor_shutdown_service
* Orders a monitored service to shut down cleanly
* This function will free the memory for svc once it
* completes.
*/
static int monitor_shutdown_service(struct mt_svc *svc)
{
DBusConnection *conn;
DBusMessage *msg;
DBusPendingCall *pending_reply;
dbus_bool_t dbret;
/* Stop the service checker */
conn = sbus_get_connection(svc->mt_conn->conn_ctx);
/* Construct a shutdown message */
msg = dbus_message_new_method_call(NULL,
SERVICE_PATH,
SERVICE_INTERFACE,
SERVICE_METHOD_SHUTDOWN);
if (!msg) {
DEBUG(0,("Out of memory?!\n"));
monitor_kill_service(svc);
talloc_free(svc);
return ENOMEM;
}
dbret = dbus_connection_send_with_reply(conn, msg, &pending_reply,
svc->mt_ctx->service_id_timeout);
if (!dbret) {
DEBUG(0, ("D-BUS send failed.\n"));
dbus_message_unref(msg);
monitor_kill_service(svc);
talloc_free(svc);
return EIO;
}
/* Set up the reply handler */
dbus_pending_call_set_notify(pending_reply, shutdown_reply, svc, NULL);
dbus_message_unref(msg);
return EOK;
}
static void reload_reply(DBusPendingCall *pending, void *data)
{
DBusMessage *reply;
struct sbus_conn_ctx *conn_ctx;
struct mt_svc *svc = talloc_get_type(data, struct mt_svc);
conn_ctx = svc->mt_conn->conn_ctx;
reply = dbus_pending_call_steal_reply(pending);
if (!reply) {
/* reply should never be null. This function shouldn't be called
* until reply is valid or timeout has occurred. If reply is NULL
* here, something is seriously wrong and we should bail out.
*/
DEBUG(0, ("A reply callback was called but no reply was received"
" and no timeout occurred\n"));
/* Destroy this connection */
sbus_disconnect(conn_ctx);
goto done;
}
/* TODO: Handle cases where the call has timed out or returned
* with an error.
*/
done:
dbus_pending_call_unref(pending);
dbus_message_unref(reply);
}
static int service_signal_reload(struct mt_svc *svc)
{
DBusMessage *msg;
dbus_bool_t dbret;
DBusConnection *conn;
DBusPendingCall *pending_reply;
conn = sbus_get_connection(svc->mt_conn->conn_ctx);
msg = dbus_message_new_method_call(NULL,
SERVICE_PATH,
SERVICE_INTERFACE,
SERVICE_METHOD_RELOAD);
if (!msg) {
DEBUG(0,("Out of memory?!\n"));
monitor_kill_service(svc);
talloc_free(svc);
return ENOMEM;
}
dbret = dbus_connection_send_with_reply(conn, msg, &pending_reply,
svc->mt_ctx->service_id_timeout);
if (!dbret) {
DEBUG(0, ("D-BUS send failed.\n"));
dbus_message_unref(msg);
monitor_kill_service(svc);
talloc_free(svc);
return EIO;
}
/* Set up the reply handler */
dbus_pending_call_set_notify(pending_reply, reload_reply, svc, NULL);
dbus_message_unref(msg);
return EOK;
}
int get_monitor_config(struct mt_ctx *ctx)
{
int ret;
ret = confdb_get_int(ctx->cdb, ctx,
MONITOR_CONF_ENTRY, "sbusTimeout",
-1, &ctx->service_id_timeout);
if (ret != EOK) {
return ret;
}
ret = confdb_get_param(ctx->cdb, ctx,
"config/services", "activeServices",
&ctx->services);
if (ctx->services[0] == NULL) {
DEBUG(0, ("No services configured!\n"));
return EINVAL;
}
ret = confdb_get_domains(ctx->cdb, ctx, &ctx->domains);
if (ret != EOK) {
DEBUG(2, ("No domains configured. LOCAL should always exist!\n"));
return ret;
}
return EOK;
}
static int get_service_config(struct mt_ctx *ctx, const char *name,
struct mt_svc **svc_cfg)
{
int ret;
char *path;
struct mt_svc *svc;
*svc_cfg = NULL;
svc = talloc_zero(ctx, struct mt_svc);
if (!svc) {
return ENOMEM;
}
svc->mt_ctx = ctx;
svc->name = talloc_strdup(svc, name);
if (!svc->name) {
talloc_free(svc);
return ENOMEM;
}
svc->identity = talloc_strdup(svc, name);
if (!svc->identity) {
talloc_free(svc);
return ENOMEM;
}
path = talloc_asprintf(svc, "config/services/%s", svc->name);
if (!path) {
talloc_free(svc);
return ENOMEM;
}
ret = confdb_get_string(ctx->cdb, svc, path, "command",
NULL, &svc->command);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
return ret;
}
if (!svc->command) {
svc->command = talloc_asprintf(svc, "%s/sssd_%s -d %d",
SSSD_LIBEXEC_PATH, svc->name,
debug_level);
if (!svc->command) {
talloc_free(svc);
return ENOMEM;
}
}
ret = confdb_get_int(ctx->cdb, svc, path, "timeout",
MONITOR_DEF_PING_TIME, &svc->ping_time);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
return ret;
}
*svc_cfg = svc;
talloc_free(path);
return EOK;
}
static int add_new_service (struct mt_ctx *ctx, const char *name)
{
int ret;
struct mt_svc *svc;
ret = get_service_config(ctx, name, &svc);
ret = start_service(svc);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
}
return ret;
}
static int get_provider_config(struct mt_ctx *ctx, const char *name,
struct mt_svc **svc_cfg)
{
int ret;
char *path;
struct mt_svc *svc;
*svc_cfg = NULL;
svc = talloc_zero(ctx, struct mt_svc);
if (!svc) {
return ENOMEM;
}
svc->mt_ctx = ctx;
svc->name = talloc_strdup(svc, name);
if (!svc->name) {
talloc_free(svc);
return ENOMEM;
}
svc->identity = talloc_asprintf(svc, "%%BE_%s", svc->name);
if (!svc->identity) {
talloc_free(svc);
return ENOMEM;
}
path = talloc_asprintf(svc, "config/domains/%s", name);
if (!path) {
talloc_free(svc);
return ENOMEM;
}
ret = confdb_get_string(ctx->cdb, svc, path,
"provider", NULL, &svc->provider);
if (ret != EOK) {
DEBUG(0, ("Failed to find provider from [%s] configuration\n", name));
talloc_free(svc);
return ret;
}
ret = confdb_get_string(ctx->cdb, svc, path,
"command", NULL, &svc->command);
if (ret != EOK) {
DEBUG(0, ("Failed to find command from [%s] configuration\n", name));
talloc_free(svc);
return ret;
}
ret = confdb_get_int(ctx->cdb, svc, path, "timeout",
MONITOR_DEF_PING_TIME, &svc->ping_time);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
return ret;
}
talloc_free(path);
/* if no provider is present do not run the domain */
if (!svc->provider) {
talloc_free(svc);
return EIO;
}
/* if there are no custom commands, build a default one */
if (!svc->command) {
svc->command = talloc_asprintf(svc,
"%s/sssd_be -d %d --provider %s --domain %s",
SSSD_LIBEXEC_PATH, debug_level,
svc->provider, svc->name);
if (!svc->command) {
talloc_free(svc);
return ENOMEM;
}
}
*svc_cfg = svc;
return EOK;
}
static int add_new_provider (struct mt_ctx *ctx, const char *name)
{
int ret;
struct mt_svc *svc;
ret = get_provider_config(ctx, name, &svc);
if (ret != EOK) {
DEBUG(0, ("Could not get provider configuration for [%s]\n",
name));
return ret;
}
ret = start_service(svc);
if (ret != EOK) {
DEBUG(0,("Failed to start service '%s'\n", svc->name));
talloc_free(svc);
}
return ret;
}
static void remove_service(struct mt_ctx *ctx, const char *name)
{
int ret;
struct mt_svc *cur_svc;
/* Locate the service object in the list */
cur_svc = ctx->svc_list;
while (cur_svc != NULL) {
if (strcasecmp(name, cur_svc->name) == 0)
break;
cur_svc = cur_svc->next;
}
if (cur_svc != NULL) {
/* Remove the service from the list */
DLIST_REMOVE(ctx->svc_list, cur_svc);
/* Shut it down */
ret = monitor_shutdown_service(cur_svc);
if (ret != EOK) {
DEBUG(0, ("Unable to shut down service [%s]!",
name));
/* TODO: Handle this better */
}
}
}
static int update_monitor_config(struct mt_ctx *ctx)
{
int ret, i, j;
struct mt_svc *cur_svc;
struct mt_svc *new_svc;
struct sss_domain_info *dom, *new_dom;
struct mt_ctx *new_config = talloc_zero(NULL, struct mt_ctx);
new_config->ev = ctx->ev;
new_config->cdb = ctx->cdb;
ret = get_monitor_config(new_config);
ctx->service_id_timeout = new_config->service_id_timeout;
/* Compare the old and new active services */
/* Have any services been shut down? */
for (i = 0; ctx->services[i]; i++) {
/* Search for this service in the new config */
for (j = 0; new_config->services[j]; j++) {
if (strcasecmp(ctx->services[i], new_config->services[j]) == 0)
break;
}
if (new_config->services[j] == NULL) {
/* This service is no longer configured.
* Shut it down.
*/
remove_service(ctx, ctx->services[i]);
}
}
/* Have any services been added or changed? */
for (i = 0; new_config->services[i]; i++) {
/* Search for this service in the old config */
for (j = 0; ctx->services[j]; j++) {
if (strcasecmp(new_config->services[i], ctx->services[j]) == 0)
break;
}
if (ctx->services[j] == NULL) {
/* New service added */
add_new_service(ctx, new_config->services[i]);
}
else {
/* Service already enabled, check for changes */
/* Locate the service object in the list */
cur_svc = ctx->svc_list;
for (cur_svc = ctx->svc_list; cur_svc; cur_svc = cur_svc->next) {
if (strcasecmp(ctx->services[i], cur_svc->name) == 0)
break;
}
if (cur_svc == NULL) {
DEBUG(0, ("Service entry missing data\n"));
/* This shouldn't be possible, but if it happens
* we'll throw an error
*/
talloc_free(new_config);
return EIO;
}
/* Read in the new configuration and compare it with the
* old one.
*/
ret = get_service_config(ctx, new_config->services[i], &new_svc);
if (ret != EOK) {
DEBUG(0, ("Unable to determine if service has changed.\n"));
DEBUG(0, ("Disabling service [%s].\n",
new_config->services[i]));
/* Not much we can do here, no way to know whether the
* current configuration is safe, and restarting the
* service won't work because the new startup requires
* this function to work. The only safe thing to do
* is stop the service.
*/
remove_service(ctx, new_config->services[i]);
continue;
}
if (strcmp(cur_svc->command, new_svc->command) != 0) {
/* The executable path has changed. We need to
* restart the binary completely. If we send a
* shutdown command, the monitor will automatically
* reload the process with the new command.
*/
talloc_free(cur_svc->command);
talloc_steal(cur_svc, new_svc->command);
cur_svc->command = new_svc->command;
/* TODO: be more graceful about this */
monitor_kill_service(cur_svc);
}
cur_svc->ping_time = new_svc->ping_time;
talloc_free(new_svc);
}
}
/* Replace the old service list with the new one */
talloc_free(ctx->services);
ctx->services = talloc_steal(ctx, new_config->services);
/* Compare data providers */
/* Have any providers been disabled? */
for (dom = ctx->domains; dom; dom = dom->next) {
for (new_dom = new_config->domains; new_dom; new_dom = new_dom->next) {
if (strcasecmp(dom->name, new_dom->name) == 0) break;
}
if (new_dom == NULL) {
/* This provider is no longer configured
* Shut it down
*/
remove_service(ctx, dom->name);
}
}
/* Have we added or changed any providers? */
for (new_dom = new_config->domains; new_dom; new_dom = new_dom->next) {
/* Search for this service in the old config */
for (dom = ctx->domains; dom; dom = dom->next) {
if (strcasecmp(dom->name, new_dom->name) == 0) break;
}
if (dom == NULL) {
/* New provider added */
add_new_provider(ctx, new_dom->name);
}
else {
/* Provider is already in the list.
* Check for changes.
*/
/* Locate the service object in the list */
cur_svc = ctx->svc_list;
while (cur_svc != NULL) {
if (strcasecmp(new_dom->name, cur_svc->name) == 0)
break;
cur_svc = cur_svc->next;
}
if (cur_svc == NULL) {
DEBUG(0, ("Service entry missing data\n"));
/* This shouldn't be possible
*/
talloc_free(new_config);
return EIO;
}
/* Read in the new configuration and compare it with
* the old one.
*/
ret = get_provider_config(ctx, new_dom->name, &new_svc);
if (ret != EOK) {
DEBUG(0, ("Unable to determine if service has changed.\n"));
DEBUG(0, ("Disabling service [%s].\n",
new_config->services[i]));
/* Not much we can do here, no way to know whether the
* current configuration is safe, and restarting the
* service won't work because the new startup requires
* this function to work. The only safe thing to do
* is stop the service.
*/
remove_service(ctx, dom->name);
continue;
}
if ((strcmp(cur_svc->command, new_svc->command) != 0) ||
(strcmp(cur_svc->provider, new_svc->provider) != 0)) {
/* The executable path or the provider has changed.
* We need to restart the binary completely. If we
* send a shutdown command, the monitor will
* automatically reload the process with the new
* command.
*/
talloc_free(cur_svc->command);
talloc_steal(cur_svc, new_svc->command);
cur_svc->command = new_svc->command;
/* TODO: be more graceful about this */
monitor_kill_service(cur_svc);
}
cur_svc->ping_time = new_svc->ping_time;
}
}
/* Replace the old domain list with the new one */
talloc_free(ctx->domains);
ctx->domains = talloc_steal(ctx, new_config->domains);
/* Signal all services to reload their configuration */
for(cur_svc = ctx->svc_list; cur_svc; cur_svc = cur_svc->next) {
service_signal_reload(cur_svc);
}
talloc_free(new_config);
return EOK;
}
static void monitor_hup(struct tevent_context *ev,
struct tevent_signal *se,
int signum,
int count,
void *siginfo,
void *private_data)
{
struct mt_ctx *ctx = talloc_get_type(private_data, struct mt_ctx);
DEBUG(1, ("Received SIGHUP. Rereading configuration.\n"));
update_monitor_config(ctx);
}
int monitor_process_init(TALLOC_CTX *mem_ctx,
struct tevent_context *event_ctx,
struct confdb_ctx *cdb)
{
struct mt_ctx *ctx;
struct sysdb_ctx *sysdb;
struct tevent_signal *tes;
int ret, i;
struct sss_domain_info *dom;
ctx = talloc_zero(mem_ctx, struct mt_ctx);
if (!ctx) {
DEBUG(0, ("fatal error initializing monitor!\n"));
return ENOMEM;
}
ctx->ev = event_ctx;
ctx->cdb = cdb;
ret = get_monitor_config(ctx);
if (ret != EOK)
return ret;
/* Avoid a startup race condition between InfoPipe
* and NSS. If the sysdb doesn't exist yet, both
* will try to create it at the same time. So
* we'll have the monitor create it before either of
* those processes start.
*/
ret = sysdb_init(mem_ctx, ctx->ev, ctx->cdb,
NULL, &sysdb);
if (ret != EOK)
return ret;
talloc_free(sysdb);
/* Initialize D-BUS Server
* The monitor will act as a D-BUS server for all
* SSSD processes */
ret = monitor_dbus_init(ctx);
if (ret != EOK) {
return ret;
}
/* start all services */
for (i = 0; ctx->services[i]; i++) {
add_new_service(ctx, ctx->services[i]);
}
/* now start the data providers */
for (dom = ctx->domains; dom; dom = dom->next) {
add_new_provider(ctx, dom->name);
}
/* now start checking for global events */
set_global_checker(ctx);
/* Set up an event handler for a SIGHUP */
tes = tevent_add_signal(ctx->ev, ctx, SIGHUP, 0,
monitor_hup, ctx);
if (tes == NULL) {
talloc_free(ctx);
return EIO;
}
return EOK;
}
static int mt_conn_destructor(void *ptr)
{
struct mt_conn *mt_conn;
struct mt_svc *svc;
mt_conn = talloc_get_type(ptr, struct mt_conn);
svc = mt_conn->svc_ptr;
/* now clear up so that the rest of the code will know there
* is no connection attached to the service anymore */
svc->mt_conn = NULL;
return 0;
}
/*
* dbus_service_init
* This function should initiate a query to the newly connected
* service to discover the service's identity (invoke the getIdentity
* method on the new client). The reply callback for this request
* should set the connection destructor appropriately.
*/
static int dbus_service_init(struct sbus_conn_ctx *conn_ctx, void *data)
{
struct mt_ctx *ctx;
struct mt_svc *svc;
struct mt_conn *mt_conn;
DBusMessage *msg;
DBusPendingCall *pending_reply;
DBusConnection *conn;
dbus_bool_t dbret;
DEBUG(3, ("Initializing D-BUS Service\n"));
ctx = talloc_get_type(data, struct mt_ctx);
conn = sbus_get_connection(conn_ctx);
/* hang off this memory to the connection so that when the connection
* is freed we can call a destructor to clear up the structure and
* have a way to know we need to restart the service */
mt_conn = talloc(conn_ctx, struct mt_conn);
if (!mt_conn) {
DEBUG(0,("Out of memory?!\n"));
talloc_free(conn_ctx);
return ENOMEM;
}
mt_conn->conn_ctx = conn_ctx;
/* at this stage we still do not know what service is this
* we will know only after we get its identity, so we make
* up a temporary fake service and complete the operation
* when we receive the reply */
svc = talloc_zero(mt_conn, struct mt_svc);
if (!svc) {
talloc_free(conn_ctx);
return ENOMEM;
}
svc->mt_ctx = ctx;
svc->mt_conn = mt_conn;
mt_conn->svc_ptr = svc;
talloc_set_destructor((TALLOC_CTX *)mt_conn, mt_conn_destructor);
/*
* Set up identity request
* This should be a well-known path and method
* for all services
*/
msg = dbus_message_new_method_call(NULL,
SERVICE_PATH,
SERVICE_INTERFACE,
SERVICE_METHOD_IDENTITY);
if (msg == NULL) {
DEBUG(0,("Out of memory?!\n"));
talloc_free(conn_ctx);
return ENOMEM;
}
dbret = dbus_connection_send_with_reply(conn, msg, &pending_reply,
ctx->service_id_timeout);
if (!dbret) {
/*
* Critical Failure
* We can't communicate on this connection
* We'll drop it using the default destructor.
*/
DEBUG(0, ("D-BUS send failed.\n"));
dbus_message_unref(msg);
talloc_free(conn_ctx);
return EIO;
}
/* Set up the reply handler */
dbus_pending_call_set_notify(pending_reply, identity_check, svc, NULL);
dbus_message_unref(msg);
return EOK;
}
static void identity_check(DBusPendingCall *pending, void *data)
{
struct mt_svc *fake_svc;
struct mt_svc *svc;
struct sbus_conn_ctx *conn_ctx;
DBusMessage *reply;
DBusError dbus_error;
dbus_uint16_t svc_ver;
char *svc_name;
dbus_bool_t ret;
int type;
fake_svc = talloc_get_type(data, struct mt_svc);
conn_ctx = fake_svc->mt_conn->conn_ctx;
dbus_error_init(&dbus_error);
reply = dbus_pending_call_steal_reply(pending);
if (!reply) {
/* reply should never be null. This function shouldn't be called
* until reply is valid or timeout has occurred. If reply is NULL
* here, something is seriously wrong and we should bail out.
*/
DEBUG(0, ("Serious error. A reply callback was called but no reply was received and no timeout occurred\n"));
/* Destroy this connection */
sbus_disconnect(conn_ctx);
goto done;
}
type = dbus_message_get_type(reply);
switch (type) {
case DBUS_MESSAGE_TYPE_METHOD_RETURN:
ret = dbus_message_get_args(reply, &dbus_error,
DBUS_TYPE_STRING, &svc_name,
DBUS_TYPE_UINT16, &svc_ver,
DBUS_TYPE_INVALID);
if (!ret) {
DEBUG(1,("Failed, to parse message, killing connection\n"));
if (dbus_error_is_set(&dbus_error)) dbus_error_free(&dbus_error);
sbus_disconnect(conn_ctx);
goto done;
}
/* search this service in the list */
svc = fake_svc->mt_ctx->svc_list;
while (svc) {
ret = strcasecmp(svc->identity, svc_name);
if (ret == 0) {
break;
}
svc = svc->next;
}
if (!svc) {
DEBUG(0,("Unable to find peer [%s] in list of services, killing connection!\n", svc_name));
sbus_disconnect(conn_ctx);
goto done;
}
/* transfer all from the fake service and get rid of it */
fake_svc->mt_conn->svc_ptr = svc;
svc->mt_conn = fake_svc->mt_conn;
talloc_free(fake_svc);
DEBUG(1, ("Service %s connected\n", svc->name));
/* Set up the destructor for this service */
break;
case DBUS_MESSAGE_TYPE_ERROR:
DEBUG(0,("getIdentity returned an error [%s], closing connection.\n",
dbus_message_get_error_name(reply)));
/* Falling through to default intentionally*/
default:
/*
* Timeout or other error occurred or something
* unexpected happened.
* It doesn't matter which, because either way we
* know that this connection isn't trustworthy.
* We'll destroy it now.
*/
sbus_disconnect(conn_ctx);
return;
}
done:
dbus_pending_call_unref(pending);
dbus_message_unref(reply);
}
/* service_send_ping
* this function send a dbus ping to a service.
* It returns EOK if all is fine or ENXIO if the connection is
* not available (either not yet set up or teared down).
* Returns e generic error in other cases.
*/
static int service_send_ping(struct mt_svc *svc)
{
DBusMessage *msg;
DBusPendingCall *pending_reply;
DBusConnection *conn;
dbus_bool_t dbret;
if (!svc->mt_conn) {
return ENXIO;
}
DEBUG(4,("Pinging %s\n", svc->name));
conn = sbus_get_connection(svc->mt_conn->conn_ctx);
/*
* Set up identity request
* This should be a well-known path and method
* for all services
*/
msg = dbus_message_new_method_call(NULL,
SERVICE_PATH,
SERVICE_INTERFACE,
SERVICE_METHOD_PING);
if (!msg) {
DEBUG(0,("Out of memory?!\n"));
talloc_free(svc->mt_conn->conn_ctx);
return ENOMEM;
}
dbret = dbus_connection_send_with_reply(conn, msg, &pending_reply,
svc->mt_ctx->service_id_timeout);
if (!dbret) {
/*
* Critical Failure
* We can't communicate on this connection
* We'll drop it using the default destructor.
*/
DEBUG(0, ("D-BUS send failed.\n"));
talloc_free(svc->mt_conn->conn_ctx);
return EIO;
}
/* Set up the reply handler */
dbus_pending_call_set_notify(pending_reply, ping_check, svc, NULL);
dbus_message_unref(msg);
return EOK;
}
static void ping_check(DBusPendingCall *pending, void *data)
{
struct mt_svc *svc;
struct sbus_conn_ctx *conn_ctx;
DBusMessage *reply;
const char *dbus_error_name;
int type;
svc = talloc_get_type(data, struct mt_svc);
conn_ctx = svc->mt_conn->conn_ctx;
reply = dbus_pending_call_steal_reply(pending);
if (!reply) {
/* reply should never be null. This function shouldn't be called
* until reply is valid or timeout has occurred. If reply is NULL
* here, something is seriously wrong and we should bail out.
*/
DEBUG(0, ("A reply callback was called but no reply was received"
" and no timeout occurred\n"));
/* Destroy this connection */
sbus_disconnect(conn_ctx);
goto done;
}
type = dbus_message_get_type(reply);
switch (type) {
case DBUS_MESSAGE_TYPE_METHOD_RETURN:
/* ok peer replied,
* set the reply timestamp into the service structure */
DEBUG(4,("Service %s replied to ping\n", svc->name));
svc->last_pong = time(NULL);
break;
case DBUS_MESSAGE_TYPE_ERROR:
dbus_error_name = dbus_message_get_error_name(reply);
/* timeouts are handled in the main service check function */
if (strcmp(dbus_error_name, DBUS_ERROR_TIMEOUT) == 0)
break;
DEBUG(0,("A service PING returned an error [%s], closing connection.\n",
dbus_error_name));
/* Falling through to default intentionally*/
default:
/*
* Timeout or other error occurred or something
* unexpected happened.
* It doesn't matter which, because either way we
* know that this connection isn't trustworthy.
* We'll destroy it now.
*/
sbus_disconnect(conn_ctx);
}
done:
dbus_pending_call_unref(pending);
dbus_message_unref(reply);
}
/* service_check_alive
* This function checks if the service child is still alive
*/
static int service_check_alive(struct mt_svc *svc)
{
int status;
pid_t pid;
DEBUG(4,("Checking service %s(%d) is still alive\n", svc->name, svc->pid));
pid = waitpid(svc->pid, &status, WNOHANG);
if (pid == 0) {
return EOK;
}
if (pid != svc->pid) {
DEBUG(1, ("bad return (%d) from waitpid() waiting for %d\n",
pid, svc->pid));
/* TODO: what do we do now ? */
return EINVAL;
}
if (WIFEXITED(status)) { /* children exited on it's own */
/* TODO: check configuration to see if it was removed
* from the list of process to run */
DEBUG(0,("Process [%s] exited\n", svc->name));
}
return ECHILD;
}
static void free_args(char **args)
{
int i;
if (args) {
for (i = 0; args[i]; i++) free(args[i]);
free(args);
}
}
/* parse a string into arguments.
* arguments are separated by a space
* '\' is an escape character and can be used only to escape
* itself or the white space.
*/
static char **parse_args(const char *str)
{
const char *p;
char **ret, **r;
char *tmp;
int num;
int i, e;
tmp = malloc(strlen(str) + 1);
if (!tmp) return NULL;
ret = NULL;
num = 0;
e = 0;
i = 0;
p = str;
while (*p) {
switch (*p) {
case '\\':
if (e) {
tmp[i] = '\\';
i++;
e = 0;
} else {
e = 1;
}
break;
case ' ':
if (e) {
tmp[i] = ' ';
i++;
e = 0;
} else {
tmp[i] = '\0';
i++;
}
break;
default:
if (e) {
tmp[i] = '\\';
i++;
e = 0;
}
tmp[i] = *p;
i++;
break;
}
p++;
/* check if this was the last char */
if (*p == '\0') {
if (e) {
tmp[i] = '\\';
i++;
e = 0;
}
tmp[i] = '\0';
i++;
}
if (tmp[i-1] != '\0' || strlen(tmp) == 0) {
/* check next char and skip multiple spaces */
continue;
}
r = realloc(ret, (num + 2) * sizeof(char *));
if (!r) goto fail;
ret = r;
ret[num+1] = NULL;
ret[num] = strdup(tmp);
if (!ret[num]) goto fail;
num++;
i = 0;
}
free(tmp);
return ret;
fail:
free(tmp);
free_args(ret);
return NULL;
}
static void service_startup_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr);
static int start_service(struct mt_svc *svc)
{
struct tevent_timer *te;
struct timeval tv;
DEBUG(4,("Queueing service %s for startup\n", svc->name));
/* Add a timed event to start up the service.
* We have to do this in order to avoid a race
* condition where the service being started forks
* and attempts to connect to the SBUS before
* the monitor is serving it.
*/
gettimeofday(&tv, NULL);
te = tevent_add_timer(svc->mt_ctx->ev, svc, tv,
service_startup_handler, svc);
if (te == NULL) {
DEBUG(0, ("Unable to queue service %s for startup\n", svc->name));
return ENOMEM;
}
return EOK;
}
static int delist_service(void *ptr) {
struct mt_svc *svc =
talloc_get_type(ptr, struct mt_svc);
DLIST_REMOVE(svc->mt_ctx->svc_list, svc);
return 0;
}
static void service_startup_handler(struct tevent_context *ev,
struct tevent_timer *te,
struct timeval t, void *ptr)
{
struct mt_svc *mt_svc;
char **args;
mt_svc = talloc_get_type(ptr, struct mt_svc);
if (mt_svc == NULL) {
return;
}
mt_svc->pid = fork();
if (mt_svc->pid != 0) {
if (mt_svc->pid == -1) {
DEBUG(0, ("Could not fork child to start service [%s]. Continuing.\n", mt_svc->name))
return;
}
/* Parent */
mt_svc->last_pong = time(NULL);
DLIST_ADD(mt_svc->mt_ctx->svc_list, mt_svc);
talloc_set_destructor((TALLOC_CTX *)mt_svc, delist_service);
set_tasks_checker(mt_svc);
return;
}
/* child */
args = parse_args(mt_svc->command);
execvp(args[0], args);
/* If we are here, exec() has failed
* Print errno and abort quickly */
DEBUG(0,("Could not exec %s, reason: %s\n", mt_svc->command, strerror(errno)));
/* We have to call _exit() instead of exit() here
* because a bug in D-BUS will cause the server to
* close its socket at exit() */
_exit(1);
}
int main(int argc, const char *argv[])
{
int opt;
poptContext pc;
int opt_daemon = 0;
int opt_interactive = 0;
int flags = 0;
struct main_context *main_ctx;
int ret;
struct poptOption long_options[] = {
POPT_AUTOHELP
SSSD_MAIN_OPTS
{"daemon", 'D', POPT_ARG_NONE, &opt_daemon, 0, \
"Become a daemon (default)", NULL }, \
{"interactive", 'i', POPT_ARG_NONE, &opt_interactive, 0, \
"Run interactive (not a daemon)", NULL}, \
{ NULL }
};
pc = poptGetContext(argv[0], argc, argv, long_options, 0);
while((opt = poptGetNextOpt(pc)) != -1) {
switch(opt) {
default:
fprintf(stderr, "\nInvalid option %s: %s\n\n",
poptBadOption(pc, 0), poptStrerror(opt));
poptPrintUsage(pc, stderr, 0);
return 1;
}
}
if (opt_daemon && opt_interactive) {
fprintf(stderr, "Option -i|--interactive is not allowed together with -D|--daemon\n");
poptPrintUsage(pc, stderr, 0);
return 1;
}
poptFreeContext(pc);
if (opt_daemon) flags |= FLAGS_DAEMON;
if (opt_interactive) flags |= FLAGS_INTERACTIVE;
/* we want a pid file check */
flags |= FLAGS_PID_FILE;
/* set up things like debug , signals, daemonization, etc... */
ret = server_setup("sssd", flags, MONITOR_CONF_ENTRY, &main_ctx);
if (ret != EOK) return 2;
ret = monitor_process_init(main_ctx,
main_ctx->event_ctx,
main_ctx->confdb_ctx);
if (ret != EOK) return 3;
/* loop on main */
server_loop(main_ctx);
return 0;
}