From c38fcbf3df0131caebec0ce0414fc409b63e9d93 Mon Sep 17 00:00:00 2001 From: Simo Sorce Date: Tue, 21 Oct 2008 13:57:35 -0400 Subject: Make return the pid when new process are started. Monitor each service and restart it conditionally if it fails. These monitoring is extremely simple at this moment and just uses waitpid() to check if the client is alive, there is no active probing, that will require dbus. Make nsssrv.c read the sss pipe config option for the config db. --- server/examples/config.ldif | 2 +- server/monitor.c | 206 +++++++++++++++++++++----------------------- server/monitor.h | 4 +- server/nss/nsssrv.c | 38 ++++++-- server/process.c | 7 +- server/process.h | 3 +- server/server.c | 19 ++-- server/service.c | 8 +- server/service.h | 6 +- server/service_task.c | 5 +- 10 files changed, 162 insertions(+), 136 deletions(-) (limited to 'server') diff --git a/server/examples/config.ldif b/server/examples/config.ldif index 0a0b929d..01811689 100644 --- a/server/examples/config.ldif +++ b/server/examples/config.ldif @@ -12,5 +12,5 @@ activeServices: nss dn: cn=nss,cn=services,cn=config cn: nss description: NSS Responder Configuration -unix-socket: /var/lib/sssd/nss/pipe +unixSocket: /var/lib/sss/pipes/nss diff --git a/server/monitor.c b/server/monitor.c index 93ebeddc..2d3283fd 100644 --- a/server/monitor.c +++ b/server/monitor.c @@ -19,55 +19,88 @@ along with this program. If not, see . */ -#include -#include -#include #include -#include -#include -#include +#include #include +#include #include "../events/events.h" #include "util/util.h" #include "service.h" +#include "confdb/confdb.h" struct mt_ctx { - struct task_server *task; - struct fd_event *test_fde; - int test_fd; + struct event_context *ev; + struct confdb_ctx *cdb; + char **services; }; -static void set_nonblocking(int fd) -{ - unsigned v; - v = fcntl(fd, F_GETFL, 0); - fcntl(fd, F_SETFL, v | O_NONBLOCK); -} +struct mt_srv { + const char *name; + struct mt_ctx *mt_ctx; + pid_t pid; + time_t last_restart; + int restarts; +}; -static void set_close_on_exec(int fd) +static void set_tasks_checker(struct mt_srv *srv); + +static void tasks_check_handler(struct event_context *ev, + struct timed_event *te, + struct timeval t, void *ptr) { - unsigned v; - v = fcntl(fd, F_GETFD, 0); - fcntl(fd, F_SETFD, v | FD_CLOEXEC); -} + struct mt_srv *srv = talloc_get_type(ptr, struct mt_srv); + time_t now = time(NULL); + int status; + pid_t pid; + int ret; + + pid = waitpid(srv->pid, &status, WNOHANG); + if (pid == 0) { + set_tasks_checker(srv); + return; + } -static void set_test_timed_event(struct event_context *ev, - struct mt_ctx *ctx); + if (pid != srv->pid) { + DEBUG(1, ("bad return (%d) from waitpid() waiting for %d\n", + pid, srv->pid)); + /* TODO: what do we do now ? */ + } -static void test_timed_handler(struct event_context *ev, - struct timed_event *te, - struct timeval t, void *ptr) -{ - struct mt_ctx *ctx = talloc_get_type(ptr, struct mt_ctx); + if (WIFEXITED(status)) { /* children exited on it's own ?? */ + /* TODO: check configuration to see if it was removed + * from the list of process to run */ + DEBUG(0,("Process [%s] exited on it's own ?!\n", srv->name)); + } - fprintf(stdout, "."); - fflush(stdout); + if (srv->last_restart != 0) { + if ((now - srv->last_restart) > 30) { /* TODO: get val from config */ + /* it was long ago reset restart threshold */ + srv->restarts = 0; + } + } + + /* restart the process */ + if (srv->restarts < 3) { /* TODO: get val from config */ + + ret = server_service_init(srv->name, srv->mt_ctx->ev, &srv->pid); + if (ret != EOK) { + DEBUG(0,("Failed to restart service '%s'\n", srv->name)); + talloc_free(srv); + return; + } - set_test_timed_event(ev, ctx); + srv->restarts++; + srv->last_restart = now; + + set_tasks_checker(srv); + return; + } + + DEBUG(0, ("Process [%s], definitely stopped!\n", srv->name)); + talloc_free(srv); } -static void set_test_timed_event(struct event_context *ev, - struct mt_ctx *ctx) +static void set_tasks_checker(struct mt_srv *srv) { struct timed_event *te = NULL; struct timeval tv; @@ -75,94 +108,55 @@ static void set_test_timed_event(struct event_context *ev, gettimeofday(&tv, NULL); tv.tv_sec += 2; tv.tv_usec = 0; - te = event_add_timed(ev, ctx, tv, test_timed_handler, ctx); + te = event_add_timed(srv->mt_ctx->ev, srv, tv, tasks_check_handler, srv); if (te == NULL) { - DEBUG(0, ("failed to add event!\n")); - task_server_terminate(ctx->task, "fatal error initializing service\n"); - } -} - -static void test_fd_handler(struct event_context *ev, - struct fd_event *fde, - uint16_t flags, void *ptr) -{ - /* accept and close */ - struct mt_ctx *ctx = talloc_get_type(ptr, struct mt_ctx); - struct sockaddr_un addr; - socklen_t len; - int fd; - - memset(&addr, 0, sizeof(addr)); - len = sizeof(addr); - fd = accept(ctx->test_fd, (struct sockaddr *)&addr, &len); - if (fd == -1) { - return; + DEBUG(0, ("failed to add event, monitor offline for [%s]!\n", + srv->name)); + /* FIXME: shutdown ? */ } - - close(fd); - return; } -/* create a unix socket and listen to it */ -static void set_test_fd_event(struct event_context *ev, - struct mt_ctx *ctx) +int start_monitor(TALLOC_CTX *mem_ctx, + struct event_context *event_ctx, + struct confdb_ctx *cdb) { - struct sockaddr_un addr; - const char *sock_name = "/tmp/foo/test_sock"; - - /* make sure we have no old sockets around */ - unlink(sock_name); + struct mt_ctx *ctx; + struct mt_srv *srv; + int ret, i; - ctx->test_fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (ctx->test_fd == -1) { - return; + ctx = talloc_zero(mem_ctx, struct mt_ctx); + if (!ctx) { + DEBUG(0, ("fatal error initializing monitor!\n")); + return ENOMEM; } + ctx->ev = event_ctx; - set_nonblocking(ctx->test_fd); - set_close_on_exec(ctx->test_fd); + ret = confdb_get_param(cdb, mem_ctx, "config.services", + "activeServices", &ctx->services); - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, sock_name, sizeof(addr.sun_path)); - - if (bind(ctx->test_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { - DEBUG(0,("Unable to bind on socket '%s'\n", sock_name)); - goto failed; - } - if (listen(ctx->test_fd, 10) != 0) { - DEBUG(0,("Unable to listen on socket '%s'\n", sock_name)); - goto failed; + if (ctx->services[0] == NULL) { + DEBUG(0, ("No services configured!\n")); + return EINVAL; } - ctx->test_fde = event_add_fd(ev, ctx, ctx->test_fd, - EVENT_FD_READ, test_fd_handler, ctx); + for (i = 0; ctx->services[i]; i++) { - return; + srv = talloc_zero(ctx, struct mt_srv); + if (!srv) { + talloc_free(ctx); + return ENOMEM; + } + srv->name = ctx->services[i]; + srv->mt_ctx = ctx; -failed: - close(ctx->test_fd); -} - -void monitor_task_init(struct task_server *task) -{ - struct mt_ctx *ctx; - - task_server_set_title(task, "sssd[monitor]"); + ret = server_service_init(srv->name, event_ctx, &srv->pid); + if (ret != EOK) { + DEBUG(0,("Failed to restart service '%s'\n", srv->name)); + talloc_free(srv); + } - ctx = talloc_zero(task, struct mt_ctx); - if (!ctx) { - task_server_terminate(task, "fatal error initializing mt_ctx\n"); - return; + set_tasks_checker(srv); } - ctx->task = task; - - /* without an fd event the event system just exits. - * We must always have at least one file base event around - */ - set_test_fd_event(task->event_ctx, ctx); - - /* our test timed event */ - set_test_timed_event(task->event_ctx, ctx); - fprintf(stdout, "test monitor process started!\n"); + return EOK; } diff --git a/server/monitor.h b/server/monitor.h index 024c29c2..17094a85 100644 --- a/server/monitor.h +++ b/server/monitor.h @@ -1 +1,3 @@ -void monitor_task_init(struct task_server *task); +int start_monitor(TALLOC_CTX *mem_ctx, + struct event_context *event_ctx, + struct confdb_ctx *cdb); diff --git a/server/nss/nsssrv.c b/server/nss/nsssrv.c index ae03c579..d1dfa344 100644 --- a/server/nss/nsssrv.c +++ b/server/nss/nsssrv.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -169,7 +170,7 @@ static void accept_fd_handler(struct event_context *ev, len = sizeof(cctx->addr); cctx->cfd = accept(nctx->lfd, (struct sockaddr *)&cctx->addr, &len); if (cctx->cfd == -1) { - DEBUG(0, ("Accept failed [%s]", strerror(errno))); + DEBUG(1, ("Accept failed [%s]", strerror(errno))); talloc_free(cctx); return; } @@ -179,7 +180,7 @@ static void accept_fd_handler(struct event_context *ev, if (!cctx->cfde) { close(cctx->cfd); talloc_free(cctx); - DEBUG(0, ("Failed to queue client handler\n")); + DEBUG(2, ("Failed to queue client handler\n")); } cctx->ev = ev; @@ -199,14 +200,15 @@ static void set_unix_socket(struct event_context *ev, { struct sockaddr_un addr; - /* make sure we have no old sockets around */ - unlink(sock_name); - nctx->lfd = socket(AF_UNIX, SOCK_STREAM, 0); if (nctx->lfd == -1) { return; } + /* Set the umask so that permissions are set right on the socket. + * It must be readable and writable by anybody on the system. */ + umask(0111); + set_nonblocking(nctx->lfd); set_close_on_exec(nctx->lfd); @@ -214,6 +216,9 @@ static void set_unix_socket(struct event_context *ev, addr.sun_family = AF_UNIX; strncpy(addr.sun_path, sock_name, sizeof(addr.sun_path)); + /* make sure we have no old sockets around */ + unlink(sock_name); + if (bind(nctx->lfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { DEBUG(0,("Unable to bind on socket '%s'\n", sock_name)); goto failed; @@ -226,9 +231,15 @@ static void set_unix_socket(struct event_context *ev, nctx->lfde = event_add_fd(ev, nctx, nctx->lfd, EVENT_FD_READ, accept_fd_handler, nctx); + /* we want default permissions on created files to be very strict, + so set our umask to 0177 */ + umask(0177); return; failed: + /* we want default permissions on created files to be very strict, + so set our umask to 0177 */ + umask(0177); close(nctx->lfd); } @@ -236,6 +247,8 @@ void nss_task_init(struct task_server *task) { struct confdb_ctx *cdb; struct nss_ctx *nctx; + const char *sock_name; + char **values; int ret; task_server_set_title(task, "sssd[nsssrv]"); @@ -253,7 +266,20 @@ void nss_task_init(struct task_server *task) } nctx->task = task; - set_unix_socket(task->event_ctx, nctx, SSS_NSS_SOCKET_NAME); + ret = confdb_get_param(cdb, nctx, + "config.services.nss", "unixSocket", &values); + if (ret != EOK) { + task_server_terminate(task, "fatal error reading configuration\n"); + return; + } + if (values[0]) { + sock_name = talloc_steal(nctx, values[0]); + } else { + sock_name = talloc_strdup(nctx, SSS_NSS_SOCKET_NAME); + } + talloc_free(values); + + set_unix_socket(task->event_ctx, nctx, sock_name); ret = nss_ldb_init(nctx, task->event_ctx, cdb, &nctx->lctx); if (ret != EOK) { diff --git a/server/process.c b/server/process.c index e2600a5d..ebe6ba1e 100644 --- a/server/process.c +++ b/server/process.c @@ -44,7 +44,8 @@ static int none_setproctitle(const char *fmt, ...) int process_new_task(struct event_context *ev, const char *service_name, void (*new_task)(struct event_context *, void *), - void *private) + void *private, + pid_t *rpid) { pid_t pid; struct event_context *ev2; @@ -62,6 +63,10 @@ int process_new_task(struct event_context *ev, res = ECHILD; } + if (rpid) { + *rpid = pid; + } + /* ... go back to the event loop */ return res; } diff --git a/server/process.h b/server/process.h index e5eeb5bc..be22a562 100644 --- a/server/process.h +++ b/server/process.h @@ -4,7 +4,8 @@ int process_new_task(struct event_context *ev, const char *service_name, void (*new_task)(struct event_context *, void *), - void *private); + void *private, + pid_t *rpid); void process_set_title(struct event_context *ev, const char *title); void process_terminate(struct event_context *ev, const char *reason); diff --git a/server/server.c b/server/server.c index 683b039d..b6664e1d 100644 --- a/server/server.c +++ b/server/server.c @@ -33,6 +33,7 @@ #include "../ldb/include/ldb.h" #include "service.h" #include "confdb/confdb.h" +#include "monitor.h" extern void monitor_task_init(struct task_server *task); extern void nss_task_init(struct task_server *task); @@ -119,7 +120,6 @@ int main(int argc, const char *argv[]) TALLOC_CTX *mem_ctx; uint16_t stdin_event_flags; int status; - char **services; enum { OPT_DAEMON = 1000, @@ -213,23 +213,14 @@ int main(int argc, const char *argv[]) discard_const(argv[0])); /* Services */ - register_server_service("monitor", monitor_task_init); register_server_service("nss", nss_task_init); - status = confdb_get_param(confdb_ctx, mem_ctx, "config.services", - "activeServices", &services); - - if (services[0] == NULL) { - DEBUG(0, ("No services configured!\n")); - return 2; + /* the monitor starts the services */ + status = start_monitor(mem_ctx, event_ctx, confdb_ctx); + if (status != EOK) { + return 1; } - status = server_service_startup(event_ctx, (const char **)services); - if (status != EOK) { - DEBUG(0,("Starting Services failed - %d\n", status)); - return 1; - } - /* wait for events - this is where smbd sits for most of its life */ event_loop_wait(event_ctx); diff --git a/server/service.c b/server/service.c index eaaa2c98..56a09712 100644 --- a/server/service.c +++ b/server/service.c @@ -53,14 +53,16 @@ int register_server_service(const char *name, /* initialise a server service */ -static int server_service_init(const char *name, struct event_context *ev) +int server_service_init(const char *name, + struct event_context *ev, + pid_t *rpid) { struct registered_server *srv; for (srv=registered_servers; srv; srv=srv->next) { if (strcasecmp(name, srv->service_name) == 0) { return task_server_startup(ev, srv->service_name, - srv->task_init); + srv->task_init, rpid); } } return EINVAL; @@ -83,7 +85,7 @@ int server_service_startup(struct event_context *event_ctx, for (i = 0; server_services[i]; i++) { int status; - status = server_service_init(server_services[i], event_ctx); + status = server_service_init(server_services[i], event_ctx, NULL); if (status != EOK) { DEBUG(0,("Failed to start service '%s'\n", server_services[i])); diff --git a/server/service.h b/server/service.h index 815493ba..6f1a1388 100644 --- a/server/service.h +++ b/server/service.h @@ -31,12 +31,16 @@ int register_server_service(const char *name, void (*task_init)(struct task_server *)); int server_service_startup(struct event_context *event_ctx, const char **server_services); +int server_service_init(const char *name, + struct event_context *ev, + pid_t *rpid); /* The following definitions come from service_task.c */ int task_server_startup(struct event_context *event_ctx, const char *service_name, - void (*task_init)(struct task_server *)); + void (*task_init)(struct task_server *), + pid_t *rpid); void task_server_set_title(struct task_server *task, const char *title); void task_server_terminate(struct task_server *task, const char *reason); diff --git a/server/service_task.c b/server/service_task.c index c2f4e4ac..364aa6f8 100644 --- a/server/service_task.c +++ b/server/service_task.c @@ -68,7 +68,8 @@ static void task_server_callback(struct event_context *event_ctx, void *private) */ int task_server_startup(struct event_context *event_ctx, const char *service_name, - void (*task_init)(struct task_server *)) + void (*task_init)(struct task_server *), + pid_t *rpid) { struct task_state *state; @@ -77,7 +78,7 @@ int task_server_startup(struct event_context *event_ctx, state->task_init = task_init; - return process_new_task(event_ctx, service_name, task_server_callback, state); + return process_new_task(event_ctx, service_name, task_server_callback, state, rpid); } /* -- cgit