From 0d72f05cc87f42a8c2856c96501c64d69541be00 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Fri, 16 Apr 2010 17:58:28 +0200 Subject: Support SRV servers in failover Adds a new failover API call fo_add_srv_server that allows the caller to specify a server that is later resolved into a list of specific servers using SRV requests. Also adds a new failover option that specifies how often should the servers resolved from SRV query considered valid until we need a refresh. The "real" servers to connect to are returned to the user as usual, using the fo_resolve_service_{send,recv} calls. Make SRV resolution work with c-ares 1.6 --- src/providers/fail_over.c | 560 +++++++++++++++++++++++++++++++++++++++++----- src/providers/fail_over.h | 14 ++ 2 files changed, 523 insertions(+), 51 deletions(-) (limited to 'src/providers') diff --git a/src/providers/fail_over.c b/src/providers/fail_over.c index 02d913b9..231b9f08 100644 --- a/src/providers/fail_over.c +++ b/src/providers/fail_over.c @@ -5,8 +5,9 @@ Authors: Martin Nagy + Jakub Hrozek - Copyright (C) Red Hat, Inc 2009 + Copyright (C) Red Hat, Inc 2010 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,6 +41,14 @@ #define DEFAULT_PORT_STATUS PORT_NEUTRAL #define DEFAULT_SERVER_STATUS SERVER_NAME_NOT_RESOLVED +#define DEFAULT_SRV_STATUS SRV_NEUTRAL + +enum srv_lookup_status { + SRV_NEUTRAL, /* We didn't try this SRV lookup yet */ + SRV_RESOLVED, /* This SRV lookup is resolved */ + SRV_NOT_RESOLVED, /* Could not resolve this SRV lookup */ + SRV_EXPIRED /* Need to refresh the SRV query */ +}; struct fo_ctx { struct fo_service *service_list; @@ -66,6 +75,7 @@ struct fo_server { void *user_data; int port; int port_status; + struct srv_data *srv_data; struct fo_service *service; struct timeval last_status_change; struct server_common *common; @@ -86,6 +96,17 @@ struct server_common { struct timeval last_status_change; }; +struct srv_data { + char *domain; + char *proto; + char *srv; + + struct fo_server *meta; + + int srv_lookup_status; + struct timeval last_status_change; +}; + struct resolve_service_request { struct resolve_service_request *prev; struct resolve_service_request *next; @@ -115,6 +136,7 @@ fo_context_init(TALLOC_CTX *mem_ctx, struct fo_options *opts) return NULL; } + ctx->opts->srv_retry_timeout = opts->srv_retry_timeout; ctx->opts->retry_timeout = opts->retry_timeout; ctx->opts->family_order = opts->family_order; @@ -138,6 +160,23 @@ str_port_status(enum port_status status) return "unknown port status"; } +static const char * +str_srv_data_status(enum srv_lookup_status status) +{ + switch (status) { + case SRV_NEUTRAL: + return "neutral"; + case SRV_RESOLVED: + return "resolved"; + case SRV_NOT_RESOLVED: + return "not resolved"; + case SRV_EXPIRED: + return "expired"; + } + + return "unknown SRV lookup status"; +} + static const char * str_server_status(enum server_status status) { @@ -157,6 +196,105 @@ str_server_status(enum server_status status) return "unknown server status"; } +int fo_is_srv_lookup(struct fo_server *s) +{ + return s && s->srv_data; +} + +static char * +get_srv_query(TALLOC_CTX *mem_ctx, struct fo_server *server) +{ + char *query; + + if (!fo_is_srv_lookup(server)) { + return NULL; + } + + query = talloc_asprintf(mem_ctx, "_%s._%s.%s", server->srv_data->srv, + server->srv_data->proto, + server->srv_data->domain); + return query; +} + +static struct fo_server * +collapse_srv_lookup(struct fo_server *server) +{ + struct fo_server *tmp, *meta; + + meta = server->srv_data->meta; + DEBUG(4, ("Need to refresh SRV lookup for domain %s\n", meta->srv_data->domain)) + + if (server != meta) { + while (server->prev && server->prev->srv_data == meta->srv_data) { + tmp = server->prev; + DLIST_REMOVE(server->service->server_list, tmp); + talloc_zfree(tmp); + } + while (server->next && server->next->srv_data == meta->srv_data) { + tmp = server->next; + DLIST_REMOVE(server->service->server_list, tmp); + talloc_zfree(tmp); + } + + if (server == server->service->active_server) { + server->service->active_server = NULL; + } + if (server == server->service->last_tried_server) { + server->service->last_tried_server = meta; + } + + /* add back the meta server to denote SRV lookup */ + DLIST_ADD_AFTER(server->service->server_list, meta, server); + DLIST_REMOVE(server->service->server_list, server); + talloc_zfree(server); + } + + meta->srv_data->srv_lookup_status = SRV_NEUTRAL; + meta->srv_data->last_status_change.tv_sec = 0; + + return meta; +} + +static enum srv_lookup_status +get_srv_data_status(struct srv_data *data) +{ + struct timeval tv; + time_t timeout; + + timeout = data->meta->service->ctx->opts->srv_retry_timeout; + gettimeofday(&tv, NULL); + + if (timeout && STATUS_DIFF(data, tv) > timeout) { + switch(data->srv_lookup_status) { + case SRV_EXPIRED: + case SRV_NEUTRAL: + break; + case SRV_RESOLVED: + data->srv_lookup_status = SRV_EXPIRED; + data->last_status_change.tv_sec = 0; + break; + case SRV_NOT_RESOLVED: + data->srv_lookup_status = SRV_NEUTRAL; + data->last_status_change.tv_sec = 0; + break; + default: + DEBUG(1, ("Unknown state for SRV server!\n")); + } + } + + return data->srv_lookup_status; +} + +static void +set_srv_data_status(struct srv_data *data, enum srv_lookup_status status) +{ + DEBUG(4, ("Marking SRV lookup of service '%s' as '%s'\n", + data->meta->service->name, str_srv_data_status(status))); + + gettimeofday(&data->last_status_change, NULL); + data->srv_lookup_status = status; +} + /* * This function will return the status of the server. If the status was * last updated a long time ago, we will first reset the status. @@ -359,22 +497,23 @@ create_server_common(TALLOC_CTX *mem_ctx, struct fo_ctx *ctx, const char *name) } int -fo_add_server(struct fo_service *service, const char *name, int port, - void *user_data) +fo_add_srv_server(struct fo_service *service, const char *srv, + const char *domain, const char *proto, void *user_data) { struct fo_server *server; - int ret; - DEBUG(3, ("Adding new server '%s', to service '%s'\n", - name ? name : "(no name)", service->name)); + DEBUG(3, ("Adding new SRV server in domain '%s', to service '%s'\n", + domain, service->name)); + DLIST_FOR_EACH(server, service->server_list) { - if (server->port != port || server->user_data != user_data) + if (server->user_data != user_data) continue; - if (name == NULL && server->common == NULL) { - return EEXIST; - } else if (name != NULL && server->common != NULL) { - if (!strcasecmp(name, server->common->name)) + + if (fo_is_srv_lookup(server)) { + if (strcasecmp(server->srv_data->domain, domain) == 0 && + strcasecmp(server->srv_data->proto, proto) == 0) { return EEXIST; + } } } @@ -382,6 +521,42 @@ fo_add_server(struct fo_service *service, const char *name, int port, if (server == NULL) return ENOMEM; + server->user_data = user_data; + server->service = service; + server->port_status = DEFAULT_PORT_STATUS; + + /* add the SRV-specific data */ + server->srv_data = talloc_zero(service, struct srv_data); + if (server->srv_data == NULL) + return ENOMEM; + + server->srv_data->domain = talloc_strdup(server->srv_data, domain); + server->srv_data->proto = talloc_strdup(server->srv_data, proto); + server->srv_data->srv = talloc_strdup(server->srv_data, srv); + if (server->srv_data->domain == NULL || + server->srv_data->proto == NULL || + server->srv_data->srv == NULL) + return ENOMEM; + + server->srv_data->meta = server; + server->srv_data->srv_lookup_status = DEFAULT_SRV_STATUS; + server->srv_data->last_status_change.tv_sec = 0; + + DLIST_ADD_END(service->server_list, server, struct fo_server *); + return EOK; +} + +static struct fo_server * +create_fo_server(struct fo_service *service, const char *name, + int port, void *user_data) +{ + struct fo_server *server; + int ret; + + server = talloc_zero(service, struct fo_server); + if (server == NULL) + return NULL; + server->port = port; server->user_data = user_data; server->service = service; @@ -393,14 +568,41 @@ fo_add_server(struct fo_service *service, const char *name, int port, server->common = create_server_common(server, service->ctx, name); if (server->common == NULL) { talloc_free(server); - return ENOMEM; + return NULL; } } else if (ret != EOK) { talloc_free(server); - return ret; + return NULL; } } + return server; +} + +int +fo_add_server(struct fo_service *service, const char *name, int port, + void *user_data) +{ + struct fo_server *server; + + DEBUG(3, ("Adding new server '%s', to service '%s'\n", + name ? name : "(no name)", service->name)); + DLIST_FOR_EACH(server, service->server_list) { + if (server->port != port || server->user_data != user_data) + continue; + if (name == NULL && server->common == NULL) { + return EEXIST; + } else if (name != NULL && server->common != NULL) { + if (!strcasecmp(name, server->common->name)) + return EEXIST; + } + } + + server = create_fo_server(service, name, port, user_data); + if (!server) { + return ENOMEM; + } + DLIST_ADD_END(service->server_list, server, struct fo_server *); return EOK; @@ -489,9 +691,24 @@ set_lookup_hook(struct fo_server *server, struct tevent_req *req) struct resolve_service_state { struct fo_server *server; + + struct resolv_ctx *resolv; + struct tevent_context *ev; + struct fo_ctx *fo_ctx; }; + +static void fo_resolve_service_cont(struct tevent_req *subreq); static void fo_resolve_service_done(struct tevent_req *subreq); +static bool fo_resolve_service_server(struct tevent_req *req); + +/* Forward declarations for SRV resolving */ +static struct tevent_req * +resolve_srv_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, + struct resolv_ctx *resolv, struct fo_ctx *ctx, + struct fo_server *server); +static int +resolve_srv_recv(struct tevent_req *req, struct fo_server **server); struct tevent_req * fo_resolve_service_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, @@ -509,83 +726,139 @@ fo_resolve_service_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, if (req == NULL) return NULL; + state->resolv = resolv; + state->ev = ev; + state->fo_ctx = ctx; + ret = get_first_server_entity(service, &server); if (ret != EOK) { DEBUG(1, ("No available servers for service '%s'\n", service->name)); goto done; } + if (fo_is_srv_lookup(server)) { + /* Don't know the server yet, must do a SRV lookup */ + subreq = resolve_srv_send(state, ev, resolv, + ctx, server); + if (subreq == NULL) { + ret = ENOMEM; + goto done; + } + + tevent_req_set_callback(subreq, + fo_resolve_service_cont, + req); + return req; + } + + /* This is a regular server, just do hostname lookup */ state->server = server; + if (fo_resolve_service_server(req)) { + tevent_req_post(req, ev); + } - if (server->common == NULL) { - /* This server doesn't have a name, we don't do name resolution. */ - tevent_req_done(req); + ret = EOK; +done: + if (ret != EOK) { + tevent_req_error(req, ret); tevent_req_post(req, ev); - return req; } + return req; +} - switch (get_server_status(server)) { +static void set_server_common_status(struct server_common *common, + enum server_status status); + +/* SRV resolving finished, see if we got server to work with */ +static void +fo_resolve_service_cont(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data(subreq, + struct tevent_req); + struct resolve_service_state *state = tevent_req_data(req, + struct resolve_service_state); + int ret; + + ret = resolve_srv_recv(subreq, &state->server); + talloc_zfree(subreq); + + if (ret) { + tevent_req_error(req, ret); + return; + } + + fo_resolve_service_server(req); +} + +static bool +fo_resolve_service_server(struct tevent_req *req) +{ + struct resolve_service_state *state = tevent_req_data(req, + struct resolve_service_state); + struct tevent_req *subreq; + int ret; + + switch (get_server_status(state->server)) { case SERVER_NAME_NOT_RESOLVED: /* Request name resolution. */ - subreq = resolv_gethostbyname_send(server->common, ev, resolv, - server->common->name, - ctx->opts->family_order); + subreq = resolv_gethostbyname_send(state->server->common, + state->ev, state->resolv, + state->server->common->name, + state->fo_ctx->opts->family_order); if (subreq == NULL) { - ret = ENOMEM; - goto done; + tevent_req_error(req, ENOMEM); + return true; } - tevent_req_set_callback(subreq, fo_resolve_service_done, server->common); - fo_set_server_status(server, SERVER_RESOLVING_NAME); + tevent_req_set_callback(subreq, fo_resolve_service_done, req); + fo_set_server_status(state->server, SERVER_RESOLVING_NAME); /* FALLTHROUGH */ case SERVER_RESOLVING_NAME: /* Name resolution is already under way. Just add ourselves into the * waiting queue so we get notified after the operation is finished. */ - ret = set_lookup_hook(server, req); - if (ret != EOK) - goto done; + ret = set_lookup_hook(state->server, req); + if (ret != EOK) { + tevent_req_error(req, ret); + return true; + } break; default: /* The name is already resolved. Return immediately. */ tevent_req_done(req); - tevent_req_post(req, ev); - break; + return true; } -done: - if (ret != EOK) { - tevent_req_error(req, ret); - tevent_req_post(req, ev); - } - return req; + return false; } -static void set_server_common_status(struct server_common *common, - enum server_status status); - static void fo_resolve_service_done(struct tevent_req *subreq) { + struct tevent_req *req = tevent_req_callback_data(subreq, + struct tevent_req); + struct resolve_service_state *state = tevent_req_data(req, + struct resolve_service_state); + struct server_common *common; int resolv_status; struct resolve_service_request *request; - struct server_common *common; int ret; - common = tevent_req_callback_data(subreq, struct server_common); - - if (common->hostent != NULL) { - talloc_zfree(common->hostent); + if (state->server->common->hostent != NULL) { + talloc_zfree(state->server->common->hostent); } - ret = resolv_gethostbyname_recv(subreq, common, - &resolv_status, NULL, &common->hostent); - talloc_free(subreq); + ret = resolv_gethostbyname_recv(subreq, state->server->common, + &resolv_status, NULL, + &state->server->common->hostent); + talloc_zfree(subreq); if (ret != EOK) { - DEBUG(1, ("Failed to resolve server '%s': %s\n", common->name, - resolv_strerror(resolv_status))); - set_server_common_status(common, SERVER_NOT_WORKING); + DEBUG(1, ("Failed to resolve server '%s': %s\n", + state->server->common->name, + resolv_strerror(resolv_status))); + set_server_common_status(state->server->common, SERVER_NOT_WORKING); } else { - set_server_common_status(common, SERVER_NAME_RESOLVED); + set_server_common_status(state->server->common, SERVER_NAME_RESOLVED); } /* Take care of all requests for this server. */ + common = state->server->common; /* state can disappear now */ while ((request = common->request_list) != NULL) { DLIST_REMOVE(common->request_list, request); if (resolv_status) { @@ -617,6 +890,187 @@ fo_resolve_service_recv(struct tevent_req *req, struct fo_server **server) return EOK; } +/******************************************************************* + * Resolve the server to connect to using a SRV query. * + *******************************************************************/ + +static void resolve_srv_done(struct tevent_req *subreq); + +struct resolve_srv_state { + struct fo_server *meta; + struct fo_service *service; + + struct fo_server *out; + + struct resolv_ctx *resolv; + struct tevent_context *ev; + struct fo_ctx *fo_ctx; +}; + +static struct tevent_req * +resolve_srv_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, + struct resolv_ctx *resolv, struct fo_ctx *ctx, + struct fo_server *server) +{ + int ret; + char *query; + struct tevent_req *req; + struct tevent_req *subreq; + struct resolve_srv_state *state; + int status; + + req = tevent_req_create(mem_ctx, &state, struct resolve_srv_state); + if (req == NULL) + return NULL; + + state->service = server->service; + state->ev = ev; + state->resolv = resolv; + state->fo_ctx = ctx; + state->meta = server; + + status = get_srv_data_status(server->srv_data); + DEBUG(6, ("The status of SRV lookup is %s\n", + str_srv_data_status(status))); + switch(status) { + case SRV_EXPIRED: /* Need a refresh */ + state->meta = collapse_srv_lookup(server); + /* FALLTHROUGH */ + case SRV_NEUTRAL: /* Request SRV lookup */ + query = get_srv_query(state, state->meta); + if (!query) { + ret = ENOMEM; + goto done; + } + DEBUG(4, ("Searching for servers via SRV query '%s'\n", query)); + + subreq = resolv_getsrv_send(state, ev, resolv, query); + if (subreq == NULL) { + ret = ENOMEM; + goto done; + } + tevent_req_set_callback(subreq, resolve_srv_done, req); + break; + case SRV_NOT_RESOLVED: /* query could not be resolved but don't retry yet */ + ret = EIO; + goto done; + case SRV_RESOLVED: /* The query is resolved and valid. Return. */ + state->out = server; + tevent_req_done(req); + tevent_req_post(req, state->ev); + return req; + default: + DEBUG(1, ("Unexpected status %d for a SRV server\n", status)); + ret = EIO; + break; + } + +done: + if (ret != EOK) { + tevent_req_error(req, ret); + tevent_req_post(req, ev); + } + return req; +} + +static void +resolve_srv_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data(subreq, + struct tevent_req); + struct resolve_srv_state *state = tevent_req_data(req, + struct resolve_srv_state); + struct ares_srv_reply *reply_list; + struct ares_srv_reply *reply; + struct fo_server *server = NULL; + struct fo_server *srv_list = NULL; + int ret; + int resolv_status; + + ret = resolv_getsrv_recv(state, subreq, + &resolv_status, NULL, &reply_list); + talloc_free(subreq); + if (ret != EOK) { + DEBUG(1, ("SRV query failed %s\n", + resolv_strerror(resolv_status))); + fo_set_port_status(state->meta, PORT_NOT_WORKING); + goto fail; + } + + ret = resolv_sort_srv_reply(state, &reply_list); + if (ret != EOK) { + DEBUG(1, ("Could not sort the answers from DNS [%d]: %s\n", + ret, strerror(ret))); + fo_set_port_status(state->meta, PORT_NOT_WORKING); + goto fail; + } + + for (reply = reply_list; reply; reply = reply->next) { + ret = EOK; + DLIST_FOR_EACH(server, state->service->server_list) { + if (server->port == reply->port) { + ret = EEXIST; + break; + } + } + if (ret == EEXIST) continue; + + server = create_fo_server(state->service, reply->host, + reply->port, state->meta->user_data); + if (!server) { + ret = ENOMEM; + goto fail; + } + server->srv_data = state->meta->srv_data; + + DLIST_ADD_END(srv_list, server, struct fo_server *); + DEBUG(6, ("Inserted server '%s:%d' for service %s\n", + server->common->name, + server->port, + state->service->name)); + } + + if (srv_list) { + DLIST_ADD_LIST_AFTER(state->service->server_list, state->meta, + srv_list, struct fo_server *); + + DLIST_REMOVE(state->service->server_list, state->meta); + if (state->service->last_tried_server == state->meta) { + state->service->last_tried_server = srv_list; + } + + state->out = srv_list; + set_srv_data_status(state->meta->srv_data, SRV_RESOLVED); + tevent_req_done(req); + return; + } else { + ret = EIO; + goto fail; + } + +fail: + state->out = state->meta; + set_srv_data_status(state->meta->srv_data, SRV_NOT_RESOLVED); + tevent_req_error(req, ret); +} + +static int +resolve_srv_recv(struct tevent_req *req, struct fo_server **server) +{ + struct resolve_srv_state *state = tevent_req_data(req, + struct resolve_srv_state); + + /* always return the server if asked for, otherwise the caller + * cannot mark it as faulty in case we return an error */ + if (server) { + *server = state->out; + } + + TEVENT_REQ_RETURN_ON_ERROR(req); + + return EOK; +} + static void set_server_common_status(struct server_common *common, enum server_status status) @@ -667,6 +1121,10 @@ fo_get_server_port(struct fo_server *server) const char *fo_get_server_name(struct fo_server *server) { + if (!server->common && fo_is_srv_lookup(server)) { + return "SRV lookup meta-server"; + } + return server->common->name; } diff --git a/src/providers/fail_over.h b/src/providers/fail_over.h index ffcd0687..70e694fe 100644 --- a/src/providers/fail_over.h +++ b/src/providers/fail_over.h @@ -30,6 +30,9 @@ #include "resolv/async_resolv.h" +#define FO_PROTO_TCP "tcp" +#define FO_PROTO_UDP "udp" + /* Some forward declarations that don't have to do anything with fail over. */ struct hostent; struct tevent_context; @@ -60,10 +63,14 @@ struct fo_server; * duration in seconds of how long a server or port will be considered * non-working after being marked as such. * + * The 'srv_retry_timeout' member specifies how long a SRV lookup + * is considered valid until we ask the server again. + * * The family_order member specifies the order of address families to * try when looking up the service. */ struct fo_options { + time_t srv_retry_timeout; time_t retry_timeout; enum restrict_family family_order; }; @@ -101,6 +108,13 @@ int fo_add_server(struct fo_service *service, int port, void *user_data); + +int fo_add_srv_server(struct fo_service *service, + const char *srv, + const char *domain, + const char *proto, + void *user_data); + /* * Request the first server from the service's list of servers. It is only * considered if it is not marked as not working (or the retry interval already -- cgit