summaryrefslogtreecommitdiff
path: root/source4/cluster/ctdb/ib
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2007-07-10 08:06:51 +0000
committerGerald (Jerry) Carter <jerry@samba.org>2007-10-10 14:59:20 -0500
commit6504900f1f52927adab3489b8d04b6644ceaee7d (patch)
tree893cadba231a529f320046429767cedf8b0eec30 /source4/cluster/ctdb/ib
parentd4e2d157cbce5a1cdc75efb5ced75d9608a0f7df (diff)
downloadsamba-6504900f1f52927adab3489b8d04b6644ceaee7d.tar.gz
samba-6504900f1f52927adab3489b8d04b6644ceaee7d.tar.bz2
samba-6504900f1f52927adab3489b8d04b6644ceaee7d.zip
r23806: update Samba4 with the latest ctdb code.
This doesn't get the ctdb code fully working in Samba4, it just gets it building and not breaking non-clustered use of Samba. It will take a bit longer to update some of the calling ctdb_cluster.c code to make it work correctly in Samba4. Note also that Samba4 now only links to the client portion of ctdb. For the moment I am leaving the ctdbd as a separate daemon, which you install separately from http://ctdb.samba.org/. (This used to be commit b196077cbb55cbecad87065133c2d67198e31066)
Diffstat (limited to 'source4/cluster/ctdb/ib')
-rw-r--r--source4/cluster/ctdb/ib/README.txt9
-rw-r--r--source4/cluster/ctdb/ib/ibw_ctdb.c20
-rw-r--r--source4/cluster/ctdb/ib/ibw_ctdb.h5
-rw-r--r--source4/cluster/ctdb/ib/ibw_ctdb_init.c68
-rw-r--r--source4/cluster/ctdb/ib/ibwrapper.c64
-rw-r--r--source4/cluster/ctdb/ib/ibwrapper_test.c31
6 files changed, 140 insertions, 57 deletions
diff --git a/source4/cluster/ctdb/ib/README.txt b/source4/cluster/ctdb/ib/README.txt
index 74fc129c35..40419829ca 100644
--- a/source4/cluster/ctdb/ib/README.txt
+++ b/source4/cluster/ctdb/ib/README.txt
@@ -11,10 +11,9 @@ After then:
Example for testing
===================
-bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1:9001
-bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2:9001
+bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1
+bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2
where 2nodes_rm.txt:
-10.0.0.1:9001
-10.0.0.2:9001
-
+10.0.0.1
+10.0.0.2
diff --git a/source4/cluster/ctdb/ib/ibw_ctdb.c b/source4/cluster/ctdb/ib/ibw_ctdb.c
index c3d1dc368c..5822ffb186 100644
--- a/source4/cluster/ctdb/ib/ibw_ctdb.c
+++ b/source4/cluster/ctdb/ib/ibw_ctdb.c
@@ -28,6 +28,21 @@
#include "ibwrapper.h"
#include "ibw_ctdb.h"
+int ctdb_ibw_get_address(struct ctdb_context *ctdb,
+ const char *address, struct in_addr *addr)
+{
+ if (inet_pton(AF_INET, address, addr) <= 0) {
+ struct hostent *he = gethostbyname(address);
+ if (he == NULL || he->h_length > sizeof(*addr)) {
+ ctdb_set_error(ctdb, "invalid nework address '%s'\n",
+ address);
+ return -1;
+ }
+ memcpy(addr, he->h_addr, he->h_length);
+ }
+ return 0;
+}
+
int ctdb_ibw_node_connect(struct ctdb_node *node)
{
struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
@@ -38,9 +53,12 @@ int ctdb_ibw_node_connect(struct ctdb_node *node)
struct sockaddr_in sock_out;
memset(&sock_out, 0, sizeof(struct sockaddr_in));
- inet_pton(AF_INET, node->address.address, &sock_out.sin_addr);
sock_out.sin_port = htons(node->address.port);
sock_out.sin_family = PF_INET;
+ if (ctdb_ibw_get_address(node->ctdb, node->address.address, &sock_out.sin_addr)) {
+ DEBUG(0, ("ctdb_ibw_node_connect failed\n"));
+ return -1;
+ }
rc = ibw_connect(cn->conn, &sock_out, node);
if (rc) {
diff --git a/source4/cluster/ctdb/ib/ibw_ctdb.h b/source4/cluster/ctdb/ib/ibw_ctdb.h
index 461a884c17..98ea102eac 100644
--- a/source4/cluster/ctdb/ib/ibw_ctdb.h
+++ b/source4/cluster/ctdb/ib/ibw_ctdb.h
@@ -35,6 +35,9 @@ struct ctdb_ibw_node {
int qcnt;
};
+int ctdb_ibw_get_address(struct ctdb_context *ctdb,
+ const char *address, struct in_addr *addr);
+
int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn);
int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n);
@@ -43,3 +46,5 @@ void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *t
struct timeval t, void *private_data);
int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn);
+
+int ctdb_ibw_init(struct ctdb_context *ctdb);
diff --git a/source4/cluster/ctdb/ib/ibw_ctdb_init.c b/source4/cluster/ctdb/ib/ibw_ctdb_init.c
index 523a02c4df..8dbb9c241c 100644
--- a/source4/cluster/ctdb/ib/ibw_ctdb_init.c
+++ b/source4/cluster/ctdb/ib/ibw_ctdb_init.c
@@ -37,7 +37,8 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog)
memset(&my_addr, 0, sizeof(struct sockaddr_in));
my_addr.sin_port = htons(ctdb->address.port);
my_addr.sin_family = PF_INET;
- inet_pton(AF_INET, ctdb->address.address, &my_addr.sin_addr);
+ if (ctdb_ibw_get_address(ctdb, ctdb->address.address, &my_addr.sin_addr))
+ return -1;
if (ibw_bind(ictx, &my_addr)) {
DEBUG(0, ("ctdb_ibw_listen: ibw_bind failed\n"));
@@ -53,41 +54,63 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog)
}
/*
- * Start infiniband
+ * initialise ibw portion of a ctdb node
*/
-static int ctdb_ibw_start(struct ctdb_context *ctdb)
+static int ctdb_ibw_add_node(struct ctdb_node *node)
+{
+ struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx);
+ struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node);
+
+ assert(cn!=NULL);
+ cn->conn = ibw_conn_new(ictx, node);
+ node->private_data = (void *)cn;
+
+ return (cn->conn!=NULL ? 0 : -1);
+}
+
+/*
+ * initialise infiniband
+ */
+static int ctdb_ibw_initialise(struct ctdb_context *ctdb)
{
- int i;
+ int i, ret;
+
+ ret = ctdb_ibw_init(ctdb);
+ if (ret != 0) {
+ return ret;
+ }
+
+ for (i=0; i<ctdb->num_nodes; i++) {
+ if (ctdb_ibw_add_node(ctdb->nodes[i]) != 0) {
+ DEBUG(0, ("methods->add_node failed at %d\n", i));
+ return -1;
+ }
+ }
/* listen on our own address */
if (ctdb_ibw_listen(ctdb, 10)) /* TODO: backlog as param */
return -1;
- /* everything async here */
- for (i=0;i<ctdb->num_nodes;i++) {
- struct ctdb_node *node = ctdb->nodes[i];
- if (!(ctdb->flags & CTDB_FLAG_SELF_CONNECT) &&
- ctdb_same_address(&ctdb->address, &node->address))
- continue;
- ctdb_ibw_node_connect(node);
- }
-
return 0;
}
+
/*
- * initialise ibw portion of a ctdb node
+ * Start infiniband
*/
-static int ctdb_ibw_add_node(struct ctdb_node *node)
+static int ctdb_ibw_start(struct ctdb_context *ctdb)
{
- struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx);
- struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node);
+ int i, ret;
- assert(cn!=NULL);
- cn->conn = ibw_conn_new(ictx, node);
- node->private_data = (void *)cn;
+ /* everything async here */
+ for (i=0;i<ctdb->num_nodes;i++) {
+ struct ctdb_node *node = ctdb->nodes[i];
+ if (!ctdb_same_address(&ctdb->address, &node->address)) {
+ ctdb_ibw_node_connect(node);
+ }
+ }
- return (cn->conn!=NULL ? 0 : -1);
+ return 0;
}
static int ctdb_ibw_send_pkt(struct ibw_conn *conn, uint8_t *data, uint32_t length)
@@ -176,9 +199,10 @@ static int ctdb_ibw_stop(struct ctdb_context *cctx)
#endif /* __NOTDEF__ */
static const struct ctdb_methods ctdb_ibw_methods = {
+ .initialise= ctdb_ibw_initialise,
.start = ctdb_ibw_start,
- .add_node = ctdb_ibw_add_node,
.queue_pkt = ctdb_ibw_queue_pkt,
+ .add_node = ctdb_ibw_add_node,
.allocate_pkt = ctdb_ibw_allocate_pkt,
// .stop = ctdb_ibw_stop
diff --git a/source4/cluster/ctdb/ib/ibwrapper.c b/source4/cluster/ctdb/ib/ibwrapper.c
index 8ef798c88b..31acbc4a2d 100644
--- a/source4/cluster/ctdb/ib/ibwrapper.c
+++ b/source4/cluster/ctdb/ib/ibwrapper.c
@@ -37,8 +37,9 @@
#include "lib/events/events.h"
#include "ibwrapper.h"
+#include <infiniband/kern-abi.h>
+#include <rdma/rdma_cma_abi.h>
#include <rdma/rdma_cma.h>
-#include "infiniband/sa-kern-abi.h"
#include "ibwrapper_internal.h"
#include "lib/util/dlinklist.h"
@@ -82,7 +83,7 @@ static void *ibw_alloc_mr(struct ibw_ctx_priv *pctx, struct ibw_conn_priv *pconn
static void ibw_free_mr(char **ppbuf, struct ibv_mr **ppmr)
{
- DEBUG(10, ("ibw_free_mr(%u %u)\n", (uint32_t)*ppbuf, (uint32_t)*ppmr));
+ DEBUG(10, ("ibw_free_mr(%p %p)\n", *ppbuf, *ppmr));
if (*ppmr!=NULL) {
ibv_dereg_mr(*ppmr);
*ppmr = NULL;
@@ -132,7 +133,7 @@ static int ibw_init_memory(struct ibw_conn *conn)
static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx)
{
- DEBUG(10, ("ibw_ctx_priv_destruct(%u)\n", (uint32_t)pctx));
+ DEBUG(10, ("ibw_ctx_priv_destruct(%p)\n", pctx));
/* destroy cm */
if (pctx->cm_channel) {
@@ -154,7 +155,7 @@ static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx)
static int ibw_ctx_destruct(struct ibw_ctx *ctx)
{
- DEBUG(10, ("ibw_ctx_destruct(%u)\n", (uint32_t)ctx));
+ DEBUG(10, ("ibw_ctx_destruct(%p)\n", ctx));
return 0;
}
@@ -216,7 +217,7 @@ static int ibw_wr_destruct(struct ibw_wr *wr)
static int ibw_conn_destruct(struct ibw_conn *conn)
{
- DEBUG(10, ("ibw_conn_destruct(%u)\n", (uint32_t)conn));
+ DEBUG(10, ("ibw_conn_destruct(%p)\n", conn));
/* important here: ctx is a talloc _parent_ */
DLIST_REMOVE(conn->ctx->conn_list, conn);
@@ -427,6 +428,7 @@ static void ibw_event_handler_cm(struct event_context *ev,
rc = rdma_get_cm_event(pctx->cm_channel, &event);
if (rc) {
ctx->state = IBWS_ERROR;
+ event = NULL;
sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc);
goto error;
}
@@ -492,7 +494,7 @@ static void ibw_event_handler_cm(struct event_context *ev,
case RDMA_CM_EVENT_ESTABLISHED:
/* expected after ibw_accept and ibw_connect[not directly] */
- DEBUG(0, ("ESTABLISHED (conn: %p)\n", cma_id->context));
+ DEBUG(1, ("ESTABLISHED (conn: %p)\n", cma_id->context));
conn = talloc_get_type(cma_id->context, struct ibw_conn);
assert(conn!=NULL); /* important assumption */
@@ -513,17 +515,21 @@ static void ibw_event_handler_cm(struct event_context *ev,
sprintf(ibw_lasterr, "RDMA_CM_EVENT_CONNECT_ERROR, error %d\n", event->status);
case RDMA_CM_EVENT_UNREACHABLE:
sprintf(ibw_lasterr, "RDMA_CM_EVENT_UNREACHABLE, error %d\n", event->status);
+ goto error;
case RDMA_CM_EVENT_REJECTED:
sprintf(ibw_lasterr, "RDMA_CM_EVENT_REJECTED, error %d\n", event->status);
+ DEBUG(1, ("cm event handler: %s", ibw_lasterr));
conn = talloc_get_type(cma_id->context, struct ibw_conn);
if (conn) {
+ /* must be done BEFORE connstate */
if ((rc=rdma_ack_cm_event(event)))
DEBUG(0, ("reject/rdma_ack_cm_event failed with %d\n", rc));
- event = NULL;
- pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
- ibw_conn_priv_destruct(pconn);
+ event = NULL; /* not to touch cma_id or conn */
+ conn->state = IBWC_ERROR;
+ /* it should free the conn */
+ pctx->connstate_func(NULL, conn);
}
- goto error;
+ break; /* this is not strictly an error */
case RDMA_CM_EVENT_DISCONNECTED:
DEBUG(11, ("RDMA_CM_EVENT_DISCONNECTED\n"));
@@ -555,22 +561,26 @@ static void ibw_event_handler_cm(struct event_context *ev,
return;
error:
- if (event!=NULL && (rc=rdma_ack_cm_event(event))) {
- sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc);
- goto error;
- }
-
DEBUG(0, ("cm event handler: %s", ibw_lasterr));
- if (cma_id!=pctx->cm_id) {
- conn = talloc_get_type(cma_id->context, struct ibw_conn);
- if (conn)
- conn->state = IBWC_ERROR;
- pctx->connstate_func(NULL, conn);
- } else {
- ctx->state = IBWS_ERROR;
- pctx->connstate_func(ctx, NULL);
+ if (event!=NULL) {
+ if (cma_id!=NULL && cma_id!=pctx->cm_id) {
+ conn = talloc_get_type(cma_id->context, struct ibw_conn);
+ if (conn) {
+ conn->state = IBWC_ERROR;
+ pctx->connstate_func(NULL, conn);
+ }
+ } else {
+ ctx->state = IBWS_ERROR;
+ pctx->connstate_func(ctx, NULL);
+ }
+
+ if ((rc=rdma_ack_cm_event(event))!=0) {
+ DEBUG(0, ("rdma_ack_cm_event failed with %d\n", rc));
+ }
}
+
+ return;
}
static void ibw_event_handler_verbs(struct event_context *ev,
@@ -966,7 +976,11 @@ struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr,
pctx->cm_channel_event = event_add_fd(pctx->ectx, pctx,
pctx->cm_channel->fd, EVENT_FD_READ, ibw_event_handler_cm, ctx);
+#if RDMA_USER_CM_MAX_ABI_VERSION >= 2
rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx, RDMA_PS_TCP);
+#else
+ rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx);
+#endif
if (rc) {
rc = errno;
sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc);
@@ -1087,7 +1101,11 @@ int ibw_connect(struct ibw_conn *conn, struct sockaddr_in *serv_addr, void *conn
}
/* init cm */
+#if RDMA_USER_CM_MAX_ABI_VERSION >= 2
rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn, RDMA_PS_TCP);
+#else
+ rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn);
+#endif
if (rc) {
rc = errno;
sprintf(ibw_lasterr, "ibw_connect/rdma_create_id error %d\n", rc);
diff --git a/source4/cluster/ctdb/ib/ibwrapper_test.c b/source4/cluster/ctdb/ib/ibwrapper_test.c
index 2942d26ed6..07c78e7709 100644
--- a/source4/cluster/ctdb/ib/ibwrapper_test.c
+++ b/source4/cluster/ctdb/ib/ibwrapper_test.c
@@ -289,7 +289,7 @@ int ibwtest_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
talloc_free(conn);
break;
case IBWC_ERROR:
- DEBUG(10, ("test IBWC_ERROR\n"));
+ DEBUG(10, ("test IBWC_ERROR %s\n", ibw_getLastError()));
break;
default:
assert(0);
@@ -335,7 +335,7 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n)
ibw_stop(tcx->ibwctx);
goto error;
}
- } else {
+ } else if (op!=TESTOP_SEND_ID) {
char *buf2;
void *key2;
@@ -465,6 +465,19 @@ int ibwtest_parse_attrs(struct ibwtest_ctx *tcx, char *optext,
return 0;
}
+static int ibwtest_get_address(const char *address, struct in_addr *addr)
+{
+ if (inet_pton(AF_INET, address, addr) <= 0) {
+ struct hostent *he = gethostbyname(address);
+ if (he == NULL || he->h_length > sizeof(*addr)) {
+ DEBUG(0, ("invalid nework address '%s'\n", address));
+ return -1;
+ }
+ memcpy(addr, he->h_addr, he->h_length);
+ }
+ return 0;
+}
+
int ibwtest_getdests(struct ibwtest_ctx *tcx, char op)
{
int i;
@@ -482,7 +495,8 @@ int ibwtest_getdests(struct ibwtest_ctx *tcx, char op)
for(i=0; i<tcx->naddrs; i++) {
p = tcx->addrs + i;
p->sin_family = AF_INET;
- p->sin_addr.s_addr = inet_addr(attrs[i].name);
+ if (ibwtest_get_address(attrs[i].name, &p->sin_addr))
+ return -1;
p->sin_port = htons(atoi(attrs[i].value));
}
@@ -516,13 +530,14 @@ void ibwtest_usage(struct ibwtest_ctx *tcx, char *name)
printf("\t%s -i <id> -o {name:value} -d {addr:port} -t nsec -s\n", name);
printf("\t-i <id> is a free text, acting as a server id, max 23 chars [mandatory]\n");
printf("\t-o name1:value1,name2:value2,... is a list of (name, value) pairs\n");
- printf("\t-d addr1:port1,addr2:port2,... is a list of destination ip addresses\n");
+ printf("\t-a addr1:port1,addr2:port2,... is a list of destination ip addresses\n");
printf("\t-t nsec delta time between sends in nanosec [default %d]\n", tcx->nsec);
printf("\t\t send message periodically and endless when nsec is non-zero\n");
printf("\t-s server mode (you have to give exactly one -d address:port in this case)\n");
printf("\t-n number of messages to send [default %d]\n", tcx->nmsg);
printf("\t-l usec time to sleep in the main loop [default %d]\n", tcx->sleep_usec);
printf("\t-v max variable msg size in bytes [default %d], 0=don't send var. size\n", tcx->maxsize);
+ printf("\t-d LogLevel [default %d]\n", LogLevel);
printf("Press ctrl+C to stop the program.\n");
}
@@ -538,13 +553,14 @@ int main(int argc, char *argv[])
memset(tcx, 0, sizeof(struct ibwtest_ctx));
tcx->nsec = 0;
tcx->nmsg = 1000;
+ LogLevel = 0;
/* here is the only case we can't avoid using global... */
testctx = tcx;
signal(SIGINT, ibwtest_sigint_handler);
srand((unsigned)time(NULL));
- while ((op=getopt(argc, argv, "i:o:d:m:st:n:l:v:")) != -1) {
+ while ((op=getopt(argc, argv, "i:o:d:m:st:n:l:v:a:")) != -1) {
switch (op) {
case 'i':
tcx->id = talloc_strdup(tcx, optarg);
@@ -555,7 +571,7 @@ int main(int argc, char *argv[])
&tcx->nattrs, op))
goto cleanup;
break;
- case 'd':
+ case 'a':
if (ibwtest_getdests(tcx, op))
goto cleanup;
break;
@@ -574,6 +590,9 @@ int main(int argc, char *argv[])
case 'v':
tcx->maxsize = (unsigned int)atoi(optarg);
break;
+ case 'd':
+ LogLevel = atoi(optarg);
+ break;
default:
fprintf(stderr, "ERROR: unknown option -%c\n", (char)op);
ibwtest_usage(tcx, argv[0]);