From 6504900f1f52927adab3489b8d04b6644ceaee7d Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 10 Jul 2007 08:06:51 +0000 Subject: r23806: update Samba4 with the latest ctdb code. This doesn't get the ctdb code fully working in Samba4, it just gets it building and not breaking non-clustered use of Samba. It will take a bit longer to update some of the calling ctdb_cluster.c code to make it work correctly in Samba4. Note also that Samba4 now only links to the client portion of ctdb. For the moment I am leaving the ctdbd as a separate daemon, which you install separately from http://ctdb.samba.org/. (This used to be commit b196077cbb55cbecad87065133c2d67198e31066) --- source4/cluster/ctdb/ib/README.txt | 9 ++--- source4/cluster/ctdb/ib/ibw_ctdb.c | 20 +++++++++- source4/cluster/ctdb/ib/ibw_ctdb.h | 5 +++ source4/cluster/ctdb/ib/ibw_ctdb_init.c | 68 +++++++++++++++++++++----------- source4/cluster/ctdb/ib/ibwrapper.c | 64 +++++++++++++++++++----------- source4/cluster/ctdb/ib/ibwrapper_test.c | 31 ++++++++++++--- 6 files changed, 140 insertions(+), 57 deletions(-) (limited to 'source4/cluster/ctdb/ib') diff --git a/source4/cluster/ctdb/ib/README.txt b/source4/cluster/ctdb/ib/README.txt index 74fc129c35..40419829ca 100644 --- a/source4/cluster/ctdb/ib/README.txt +++ b/source4/cluster/ctdb/ib/README.txt @@ -11,10 +11,9 @@ After then: Example for testing =================== -bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1:9001 -bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2:9001 +bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1 +bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2 where 2nodes_rm.txt: -10.0.0.1:9001 -10.0.0.2:9001 - +10.0.0.1 +10.0.0.2 diff --git a/source4/cluster/ctdb/ib/ibw_ctdb.c b/source4/cluster/ctdb/ib/ibw_ctdb.c index c3d1dc368c..5822ffb186 100644 --- a/source4/cluster/ctdb/ib/ibw_ctdb.c +++ b/source4/cluster/ctdb/ib/ibw_ctdb.c @@ -28,6 +28,21 @@ #include "ibwrapper.h" #include "ibw_ctdb.h" +int ctdb_ibw_get_address(struct ctdb_context *ctdb, + const char *address, struct in_addr *addr) +{ + if (inet_pton(AF_INET, address, addr) <= 0) { + struct hostent *he = gethostbyname(address); + if (he == NULL || he->h_length > sizeof(*addr)) { + ctdb_set_error(ctdb, "invalid nework address '%s'\n", + address); + return -1; + } + memcpy(addr, he->h_addr, he->h_length); + } + return 0; +} + int ctdb_ibw_node_connect(struct ctdb_node *node) { struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node); @@ -38,9 +53,12 @@ int ctdb_ibw_node_connect(struct ctdb_node *node) struct sockaddr_in sock_out; memset(&sock_out, 0, sizeof(struct sockaddr_in)); - inet_pton(AF_INET, node->address.address, &sock_out.sin_addr); sock_out.sin_port = htons(node->address.port); sock_out.sin_family = PF_INET; + if (ctdb_ibw_get_address(node->ctdb, node->address.address, &sock_out.sin_addr)) { + DEBUG(0, ("ctdb_ibw_node_connect failed\n")); + return -1; + } rc = ibw_connect(cn->conn, &sock_out, node); if (rc) { diff --git a/source4/cluster/ctdb/ib/ibw_ctdb.h b/source4/cluster/ctdb/ib/ibw_ctdb.h index 461a884c17..98ea102eac 100644 --- a/source4/cluster/ctdb/ib/ibw_ctdb.h +++ b/source4/cluster/ctdb/ib/ibw_ctdb.h @@ -35,6 +35,9 @@ struct ctdb_ibw_node { int qcnt; }; +int ctdb_ibw_get_address(struct ctdb_context *ctdb, + const char *address, struct in_addr *addr); + int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn); int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n); @@ -43,3 +46,5 @@ void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *t struct timeval t, void *private_data); int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn); + +int ctdb_ibw_init(struct ctdb_context *ctdb); diff --git a/source4/cluster/ctdb/ib/ibw_ctdb_init.c b/source4/cluster/ctdb/ib/ibw_ctdb_init.c index 523a02c4df..8dbb9c241c 100644 --- a/source4/cluster/ctdb/ib/ibw_ctdb_init.c +++ b/source4/cluster/ctdb/ib/ibw_ctdb_init.c @@ -37,7 +37,8 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) memset(&my_addr, 0, sizeof(struct sockaddr_in)); my_addr.sin_port = htons(ctdb->address.port); my_addr.sin_family = PF_INET; - inet_pton(AF_INET, ctdb->address.address, &my_addr.sin_addr); + if (ctdb_ibw_get_address(ctdb, ctdb->address.address, &my_addr.sin_addr)) + return -1; if (ibw_bind(ictx, &my_addr)) { DEBUG(0, ("ctdb_ibw_listen: ibw_bind failed\n")); @@ -53,41 +54,63 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) } /* - * Start infiniband + * initialise ibw portion of a ctdb node */ -static int ctdb_ibw_start(struct ctdb_context *ctdb) +static int ctdb_ibw_add_node(struct ctdb_node *node) +{ + struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx); + struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node); + + assert(cn!=NULL); + cn->conn = ibw_conn_new(ictx, node); + node->private_data = (void *)cn; + + return (cn->conn!=NULL ? 0 : -1); +} + +/* + * initialise infiniband + */ +static int ctdb_ibw_initialise(struct ctdb_context *ctdb) { - int i; + int i, ret; + + ret = ctdb_ibw_init(ctdb); + if (ret != 0) { + return ret; + } + + for (i=0; inum_nodes; i++) { + if (ctdb_ibw_add_node(ctdb->nodes[i]) != 0) { + DEBUG(0, ("methods->add_node failed at %d\n", i)); + return -1; + } + } /* listen on our own address */ if (ctdb_ibw_listen(ctdb, 10)) /* TODO: backlog as param */ return -1; - /* everything async here */ - for (i=0;inum_nodes;i++) { - struct ctdb_node *node = ctdb->nodes[i]; - if (!(ctdb->flags & CTDB_FLAG_SELF_CONNECT) && - ctdb_same_address(&ctdb->address, &node->address)) - continue; - ctdb_ibw_node_connect(node); - } - return 0; } + /* - * initialise ibw portion of a ctdb node + * Start infiniband */ -static int ctdb_ibw_add_node(struct ctdb_node *node) +static int ctdb_ibw_start(struct ctdb_context *ctdb) { - struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx); - struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node); + int i, ret; - assert(cn!=NULL); - cn->conn = ibw_conn_new(ictx, node); - node->private_data = (void *)cn; + /* everything async here */ + for (i=0;inum_nodes;i++) { + struct ctdb_node *node = ctdb->nodes[i]; + if (!ctdb_same_address(&ctdb->address, &node->address)) { + ctdb_ibw_node_connect(node); + } + } - return (cn->conn!=NULL ? 0 : -1); + return 0; } static int ctdb_ibw_send_pkt(struct ibw_conn *conn, uint8_t *data, uint32_t length) @@ -176,9 +199,10 @@ static int ctdb_ibw_stop(struct ctdb_context *cctx) #endif /* __NOTDEF__ */ static const struct ctdb_methods ctdb_ibw_methods = { + .initialise= ctdb_ibw_initialise, .start = ctdb_ibw_start, - .add_node = ctdb_ibw_add_node, .queue_pkt = ctdb_ibw_queue_pkt, + .add_node = ctdb_ibw_add_node, .allocate_pkt = ctdb_ibw_allocate_pkt, // .stop = ctdb_ibw_stop diff --git a/source4/cluster/ctdb/ib/ibwrapper.c b/source4/cluster/ctdb/ib/ibwrapper.c index 8ef798c88b..31acbc4a2d 100644 --- a/source4/cluster/ctdb/ib/ibwrapper.c +++ b/source4/cluster/ctdb/ib/ibwrapper.c @@ -37,8 +37,9 @@ #include "lib/events/events.h" #include "ibwrapper.h" +#include +#include #include -#include "infiniband/sa-kern-abi.h" #include "ibwrapper_internal.h" #include "lib/util/dlinklist.h" @@ -82,7 +83,7 @@ static void *ibw_alloc_mr(struct ibw_ctx_priv *pctx, struct ibw_conn_priv *pconn static void ibw_free_mr(char **ppbuf, struct ibv_mr **ppmr) { - DEBUG(10, ("ibw_free_mr(%u %u)\n", (uint32_t)*ppbuf, (uint32_t)*ppmr)); + DEBUG(10, ("ibw_free_mr(%p %p)\n", *ppbuf, *ppmr)); if (*ppmr!=NULL) { ibv_dereg_mr(*ppmr); *ppmr = NULL; @@ -132,7 +133,7 @@ static int ibw_init_memory(struct ibw_conn *conn) static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx) { - DEBUG(10, ("ibw_ctx_priv_destruct(%u)\n", (uint32_t)pctx)); + DEBUG(10, ("ibw_ctx_priv_destruct(%p)\n", pctx)); /* destroy cm */ if (pctx->cm_channel) { @@ -154,7 +155,7 @@ static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx) static int ibw_ctx_destruct(struct ibw_ctx *ctx) { - DEBUG(10, ("ibw_ctx_destruct(%u)\n", (uint32_t)ctx)); + DEBUG(10, ("ibw_ctx_destruct(%p)\n", ctx)); return 0; } @@ -216,7 +217,7 @@ static int ibw_wr_destruct(struct ibw_wr *wr) static int ibw_conn_destruct(struct ibw_conn *conn) { - DEBUG(10, ("ibw_conn_destruct(%u)\n", (uint32_t)conn)); + DEBUG(10, ("ibw_conn_destruct(%p)\n", conn)); /* important here: ctx is a talloc _parent_ */ DLIST_REMOVE(conn->ctx->conn_list, conn); @@ -427,6 +428,7 @@ static void ibw_event_handler_cm(struct event_context *ev, rc = rdma_get_cm_event(pctx->cm_channel, &event); if (rc) { ctx->state = IBWS_ERROR; + event = NULL; sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc); goto error; } @@ -492,7 +494,7 @@ static void ibw_event_handler_cm(struct event_context *ev, case RDMA_CM_EVENT_ESTABLISHED: /* expected after ibw_accept and ibw_connect[not directly] */ - DEBUG(0, ("ESTABLISHED (conn: %p)\n", cma_id->context)); + DEBUG(1, ("ESTABLISHED (conn: %p)\n", cma_id->context)); conn = talloc_get_type(cma_id->context, struct ibw_conn); assert(conn!=NULL); /* important assumption */ @@ -513,17 +515,21 @@ static void ibw_event_handler_cm(struct event_context *ev, sprintf(ibw_lasterr, "RDMA_CM_EVENT_CONNECT_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_UNREACHABLE: sprintf(ibw_lasterr, "RDMA_CM_EVENT_UNREACHABLE, error %d\n", event->status); + goto error; case RDMA_CM_EVENT_REJECTED: sprintf(ibw_lasterr, "RDMA_CM_EVENT_REJECTED, error %d\n", event->status); + DEBUG(1, ("cm event handler: %s", ibw_lasterr)); conn = talloc_get_type(cma_id->context, struct ibw_conn); if (conn) { + /* must be done BEFORE connstate */ if ((rc=rdma_ack_cm_event(event))) DEBUG(0, ("reject/rdma_ack_cm_event failed with %d\n", rc)); - event = NULL; - pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); - ibw_conn_priv_destruct(pconn); + event = NULL; /* not to touch cma_id or conn */ + conn->state = IBWC_ERROR; + /* it should free the conn */ + pctx->connstate_func(NULL, conn); } - goto error; + break; /* this is not strictly an error */ case RDMA_CM_EVENT_DISCONNECTED: DEBUG(11, ("RDMA_CM_EVENT_DISCONNECTED\n")); @@ -555,22 +561,26 @@ static void ibw_event_handler_cm(struct event_context *ev, return; error: - if (event!=NULL && (rc=rdma_ack_cm_event(event))) { - sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc); - goto error; - } - DEBUG(0, ("cm event handler: %s", ibw_lasterr)); - if (cma_id!=pctx->cm_id) { - conn = talloc_get_type(cma_id->context, struct ibw_conn); - if (conn) - conn->state = IBWC_ERROR; - pctx->connstate_func(NULL, conn); - } else { - ctx->state = IBWS_ERROR; - pctx->connstate_func(ctx, NULL); + if (event!=NULL) { + if (cma_id!=NULL && cma_id!=pctx->cm_id) { + conn = talloc_get_type(cma_id->context, struct ibw_conn); + if (conn) { + conn->state = IBWC_ERROR; + pctx->connstate_func(NULL, conn); + } + } else { + ctx->state = IBWS_ERROR; + pctx->connstate_func(ctx, NULL); + } + + if ((rc=rdma_ack_cm_event(event))!=0) { + DEBUG(0, ("rdma_ack_cm_event failed with %d\n", rc)); + } } + + return; } static void ibw_event_handler_verbs(struct event_context *ev, @@ -966,7 +976,11 @@ struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr, pctx->cm_channel_event = event_add_fd(pctx->ectx, pctx, pctx->cm_channel->fd, EVENT_FD_READ, ibw_event_handler_cm, ctx); +#if RDMA_USER_CM_MAX_ABI_VERSION >= 2 rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx, RDMA_PS_TCP); +#else + rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx); +#endif if (rc) { rc = errno; sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc); @@ -1087,7 +1101,11 @@ int ibw_connect(struct ibw_conn *conn, struct sockaddr_in *serv_addr, void *conn } /* init cm */ +#if RDMA_USER_CM_MAX_ABI_VERSION >= 2 rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn, RDMA_PS_TCP); +#else + rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn); +#endif if (rc) { rc = errno; sprintf(ibw_lasterr, "ibw_connect/rdma_create_id error %d\n", rc); diff --git a/source4/cluster/ctdb/ib/ibwrapper_test.c b/source4/cluster/ctdb/ib/ibwrapper_test.c index 2942d26ed6..07c78e7709 100644 --- a/source4/cluster/ctdb/ib/ibwrapper_test.c +++ b/source4/cluster/ctdb/ib/ibwrapper_test.c @@ -289,7 +289,7 @@ int ibwtest_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) talloc_free(conn); break; case IBWC_ERROR: - DEBUG(10, ("test IBWC_ERROR\n")); + DEBUG(10, ("test IBWC_ERROR %s\n", ibw_getLastError())); break; default: assert(0); @@ -335,7 +335,7 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n) ibw_stop(tcx->ibwctx); goto error; } - } else { + } else if (op!=TESTOP_SEND_ID) { char *buf2; void *key2; @@ -465,6 +465,19 @@ int ibwtest_parse_attrs(struct ibwtest_ctx *tcx, char *optext, return 0; } +static int ibwtest_get_address(const char *address, struct in_addr *addr) +{ + if (inet_pton(AF_INET, address, addr) <= 0) { + struct hostent *he = gethostbyname(address); + if (he == NULL || he->h_length > sizeof(*addr)) { + DEBUG(0, ("invalid nework address '%s'\n", address)); + return -1; + } + memcpy(addr, he->h_addr, he->h_length); + } + return 0; +} + int ibwtest_getdests(struct ibwtest_ctx *tcx, char op) { int i; @@ -482,7 +495,8 @@ int ibwtest_getdests(struct ibwtest_ctx *tcx, char op) for(i=0; inaddrs; i++) { p = tcx->addrs + i; p->sin_family = AF_INET; - p->sin_addr.s_addr = inet_addr(attrs[i].name); + if (ibwtest_get_address(attrs[i].name, &p->sin_addr)) + return -1; p->sin_port = htons(atoi(attrs[i].value)); } @@ -516,13 +530,14 @@ void ibwtest_usage(struct ibwtest_ctx *tcx, char *name) printf("\t%s -i -o {name:value} -d {addr:port} -t nsec -s\n", name); printf("\t-i is a free text, acting as a server id, max 23 chars [mandatory]\n"); printf("\t-o name1:value1,name2:value2,... is a list of (name, value) pairs\n"); - printf("\t-d addr1:port1,addr2:port2,... is a list of destination ip addresses\n"); + printf("\t-a addr1:port1,addr2:port2,... is a list of destination ip addresses\n"); printf("\t-t nsec delta time between sends in nanosec [default %d]\n", tcx->nsec); printf("\t\t send message periodically and endless when nsec is non-zero\n"); printf("\t-s server mode (you have to give exactly one -d address:port in this case)\n"); printf("\t-n number of messages to send [default %d]\n", tcx->nmsg); printf("\t-l usec time to sleep in the main loop [default %d]\n", tcx->sleep_usec); printf("\t-v max variable msg size in bytes [default %d], 0=don't send var. size\n", tcx->maxsize); + printf("\t-d LogLevel [default %d]\n", LogLevel); printf("Press ctrl+C to stop the program.\n"); } @@ -538,13 +553,14 @@ int main(int argc, char *argv[]) memset(tcx, 0, sizeof(struct ibwtest_ctx)); tcx->nsec = 0; tcx->nmsg = 1000; + LogLevel = 0; /* here is the only case we can't avoid using global... */ testctx = tcx; signal(SIGINT, ibwtest_sigint_handler); srand((unsigned)time(NULL)); - while ((op=getopt(argc, argv, "i:o:d:m:st:n:l:v:")) != -1) { + while ((op=getopt(argc, argv, "i:o:d:m:st:n:l:v:a:")) != -1) { switch (op) { case 'i': tcx->id = talloc_strdup(tcx, optarg); @@ -555,7 +571,7 @@ int main(int argc, char *argv[]) &tcx->nattrs, op)) goto cleanup; break; - case 'd': + case 'a': if (ibwtest_getdests(tcx, op)) goto cleanup; break; @@ -574,6 +590,9 @@ int main(int argc, char *argv[]) case 'v': tcx->maxsize = (unsigned int)atoi(optarg); break; + case 'd': + LogLevel = atoi(optarg); + break; default: fprintf(stderr, "ERROR: unknown option -%c\n", (char)op); ibwtest_usage(tcx, argv[0]); -- cgit