From 5cb78383fafa15c2ff7a4ccd194cccd5cf5cd263 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 19 Jan 2007 03:54:48 +0000 Subject: r20889: import ctdb cluster backend from bzr it will be interesting to see how the build farm handles this (This used to be commit 53be449630bd67d649a9e70cc7e25a9799c0616b) --- source4/cluster/ctdb/tcp/tcp_connect.c | 191 +++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 source4/cluster/ctdb/tcp/tcp_connect.c (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c new file mode 100644 index 0000000000..2404144ac1 --- /dev/null +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -0,0 +1,191 @@ +/* + ctdb over TCP + + Copyright (C) Andrew Tridgell 2006 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "includes.h" +#include "lib/events/events.h" +#include "lib/tdb/include/tdb.h" +#include "system/network.h" +#include "system/filesys.h" +#include "cluster/ctdb/include/ctdb_private.h" +#include "ctdb_tcp.h" + +static void set_nonblocking(int fd) +{ + unsigned v; + v = fcntl(fd, F_GETFL, 0); + fcntl(fd, F_SETFL, v | O_NONBLOCK); +} + + +/* + called when socket becomes writeable on connect +*/ +static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *fde, + uint16_t flags, void *private) +{ + struct ctdb_node *node = talloc_get_type(private, struct ctdb_node); + struct ctdb_tcp_node *tnode = talloc_get_type(node->private, + struct ctdb_tcp_node); + struct ctdb_context *ctdb = node->ctdb; + int error = 0; + socklen_t len = sizeof(error); + + if (getsockopt(tnode->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0 || + error != 0) { + talloc_free(fde); + close(tnode->fd); + tnode->fd = -1; + event_add_timed(ctdb->ev, node, timeval_current_ofs(1, 0), + ctdb_tcp_node_connect, node); + return; + } + + talloc_free(fde); + tnode->fde = event_add_fd(node->ctdb->ev, node, tnode->fd, EVENT_FD_READ, + ctdb_tcp_node_write, node); + + /* tell the ctdb layer we are connected */ + node->ctdb->upcalls->node_connected(node); + + if (tnode->queue) { + EVENT_FD_WRITEABLE(tnode->fde); + } +} + +/* + called when we should try and establish a tcp connection to a node +*/ +void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private) +{ + struct ctdb_node *node = talloc_get_type(private, struct ctdb_node); + struct ctdb_tcp_node *tnode = talloc_get_type(node->private, + struct ctdb_tcp_node); + struct ctdb_context *ctdb = node->ctdb; + struct sockaddr_in sock_out; + + tnode->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + + set_nonblocking(tnode->fd); + + inet_pton(AF_INET, node->address.address, &sock_out.sin_addr); + sock_out.sin_port = htons(node->address.port); + sock_out.sin_family = PF_INET; + + if (connect(tnode->fd, (struct sockaddr *)&sock_out, sizeof(sock_out)) != 0 && + errno != EINPROGRESS) { + /* try again once a second */ + close(tnode->fd); + event_add_timed(ctdb->ev, node, timeval_current_ofs(1, 0), + ctdb_tcp_node_connect, node); + return; + } + + /* non-blocking connect - wait for write event */ + event_add_fd(node->ctdb->ev, node, tnode->fd, EVENT_FD_WRITE|EVENT_FD_READ, + ctdb_node_connect_write, node); +} + +/* + destroy a ctdb_incoming structure +*/ +static int ctdb_incoming_destructor(struct ctdb_incoming *in) +{ + close(in->fd); + in->fd = -1; + return 0; +} + +/* + called when we get contacted by another node + currently makes no attempt to check if the connection is really from a ctdb + node in our cluster +*/ +static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, + uint16_t flags, void *private) +{ + struct ctdb_context *ctdb; + struct ctdb_tcp *ctcp; + struct sockaddr_in addr; + socklen_t len; + int fd; + struct ctdb_incoming *in; + + ctdb = talloc_get_type(private, struct ctdb_context); + ctcp = talloc_get_type(ctdb->private, struct ctdb_tcp); + memset(&addr, 0, sizeof(addr)); + len = sizeof(addr); + fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len); + if (fd == -1) return; + + in = talloc_zero(ctdb, struct ctdb_incoming); + in->fd = fd; + in->ctdb = ctdb; + + set_nonblocking(in->fd); + + event_add_fd(ctdb->ev, in, in->fd, EVENT_FD_READ, + ctdb_tcp_incoming_read, in); + + talloc_set_destructor(in, ctdb_incoming_destructor); +} + + +/* + listen on our own address +*/ +int ctdb_tcp_listen(struct ctdb_context *ctdb) +{ + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private, struct ctdb_tcp); + struct sockaddr_in sock; + int one = 1; + + sock.sin_port = htons(ctdb->address.port); + sock.sin_family = PF_INET; + inet_pton(AF_INET, ctdb->address.address, &sock.sin_addr); + + ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (ctcp->listen_fd == -1) { + ctdb_set_error(ctdb, "socket failed\n"); + return -1; + } + + setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one)); + + if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) != 0) { + ctdb_set_error(ctdb, "bind failed\n"); + close(ctcp->listen_fd); + ctcp->listen_fd = -1; + return -1; + } + + if (listen(ctcp->listen_fd, 10) == -1) { + ctdb_set_error(ctdb, "listen failed\n"); + close(ctcp->listen_fd); + ctcp->listen_fd = -1; + return -1; + } + + event_add_fd(ctdb->ev, ctdb, ctcp->listen_fd, EVENT_FD_READ, + ctdb_listen_event, ctdb); + + return 0; +} + -- cgit From e7d0d22806e249e315c0cb6ebed4caa93b80e8e5 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 24 Jan 2007 04:34:19 +0000 Subject: r20991: use relative includes for ctdb headers. This works with both standalone and built-in ctdb (This used to be commit 3e5f29bddfd83914eeec706367b2b1bd30aba31e) --- source4/cluster/ctdb/tcp/tcp_connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index 2404144ac1..e828bb7cbb 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -23,7 +23,7 @@ #include "lib/tdb/include/tdb.h" #include "system/network.h" #include "system/filesys.h" -#include "cluster/ctdb/include/ctdb_private.h" +#include "../include/ctdb_private.h" #include "ctdb_tcp.h" static void set_nonblocking(int fd) -- cgit From a78be2150ba06738f4c7e85ca2980200d4a3c533 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 4 Apr 2007 04:57:30 +0000 Subject: r22070: merge in the changes from the bzr ctdb tree, and convert the brlock ctdb backend to use the updated multi-database API (This used to be commit 44dcac9e4d81bfc078512248967b6240db9d1bd8) --- source4/cluster/ctdb/tcp/tcp_connect.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index e828bb7cbb..fe0fc210ba 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -46,6 +46,7 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f struct ctdb_context *ctdb = node->ctdb; int error = 0; socklen_t len = sizeof(error); + int one = 1; if (getsockopt(tnode->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0 || error != 0) { @@ -64,11 +65,29 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f /* tell the ctdb layer we are connected */ node->ctdb->upcalls->node_connected(node); + setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one)); + if (tnode->queue) { EVENT_FD_WRITEABLE(tnode->fde); } } + +static int ctdb_tcp_get_address(struct ctdb_context *ctdb, + const char *address, struct in_addr *addr) +{ + if (inet_pton(AF_INET, address, addr) <= 0) { + struct hostent *he = gethostbyname(address); + if (he == NULL || he->h_length > sizeof(*addr)) { + ctdb_set_error(ctdb, "invalid nework address '%s'\n", + address); + return -1; + } + memcpy(addr, he->h_addr, he->h_length); + } + return 0; +} + /* called when we should try and establish a tcp connection to a node */ @@ -85,7 +104,9 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, set_nonblocking(tnode->fd); - inet_pton(AF_INET, node->address.address, &sock_out.sin_addr); + if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out.sin_addr) != 0) { + return; + } sock_out.sin_port = htons(node->address.port); sock_out.sin_family = PF_INET; @@ -159,7 +180,9 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb) sock.sin_port = htons(ctdb->address.port); sock.sin_family = PF_INET; - inet_pton(AF_INET, ctdb->address.address, &sock.sin_addr); + if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock.sin_addr) != 0) { + return -1; + } ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (ctcp->listen_fd == -1) { -- cgit From a9044f92c9f6eb74c5f7cb56f43a6f8ea25ccb2a Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 6 Apr 2007 04:43:54 +0000 Subject: r22098: merged from bzr tree (This used to be commit a57af1ff8234ab590ebf28b3316953bec880005c) --- source4/cluster/ctdb/tcp/tcp_connect.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index fe0fc210ba..85fffc2f70 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -98,6 +98,7 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, struct ctdb_tcp_node *tnode = talloc_get_type(node->private, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; + struct sockaddr_in sock_in; struct sockaddr_in sock_out; tnode->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); @@ -109,7 +110,21 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, } sock_out.sin_port = htons(node->address.port); sock_out.sin_family = PF_INET; - + + + /* Bind our side of the socketpair to the same address we use to listen + * on incoming CTDB traffic. + * We must specify this address to make sure that the address we expose to + * the remote side is actually routable in case CTDB traffic will run on + * a dedicated non-routeable network. + */ + if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in.sin_addr) != 0) { + return; + } + sock_in.sin_port = htons(0); /* INPORT_ANY is not always available */ + sock_in.sin_family = PF_INET; + bind(tnode->fd, (struct sockaddr *)&sock_in, sizeof(sock_in)); + if (connect(tnode->fd, (struct sockaddr *)&sock_out, sizeof(sock_out)) != 0 && errno != EINPROGRESS) { /* try again once a second */ -- cgit From c9f04d8648cfdd573d45d47467bc964ef01f754d Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 16 Apr 2007 00:18:54 +0000 Subject: r22231: merge from bzr ctdb tree (This used to be commit 807b959082d3b9a929c9f6597714e636638a940e) --- source4/cluster/ctdb/tcp/tcp_connect.c | 57 ++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 20 deletions(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index 85fffc2f70..a1f2d331cf 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -34,14 +34,33 @@ static void set_nonblocking(int fd) } +/* + called when a complete packet has come in - should not happen on this socket + */ +void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) +{ + struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node); + struct ctdb_tcp_node *tnode = talloc_get_type( + node->private_data, struct ctdb_tcp_node); + + /* start a new connect cycle to try to re-establish the + link */ + close(tnode->fd); + ctdb_queue_set_fd(tnode->queue, -1); + tnode->fd = -1; + event_add_timed(node->ctdb->ev, node, timeval_zero(), + ctdb_tcp_node_connect, node); +} + /* called when socket becomes writeable on connect */ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *fde, - uint16_t flags, void *private) + uint16_t flags, void *private_data) { - struct ctdb_node *node = talloc_get_type(private, struct ctdb_node); - struct ctdb_tcp_node *tnode = talloc_get_type(node->private, + struct ctdb_node *node = talloc_get_type(private_data, + struct ctdb_node); + struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; int error = 0; @@ -59,17 +78,13 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f } talloc_free(fde); - tnode->fde = event_add_fd(node->ctdb->ev, node, tnode->fd, EVENT_FD_READ, - ctdb_tcp_node_write, node); + + setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one)); + + ctdb_queue_set_fd(tnode->queue, tnode->fd); /* tell the ctdb layer we are connected */ node->ctdb->upcalls->node_connected(node); - - setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one)); - - if (tnode->queue) { - EVENT_FD_WRITEABLE(tnode->fde); - } } @@ -92,10 +107,11 @@ static int ctdb_tcp_get_address(struct ctdb_context *ctdb, called when we should try and establish a tcp connection to a node */ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, - struct timeval t, void *private) + struct timeval t, void *private_data) { - struct ctdb_node *node = talloc_get_type(private, struct ctdb_node); - struct ctdb_tcp_node *tnode = talloc_get_type(node->private, + struct ctdb_node *node = talloc_get_type(private_data, + struct ctdb_node); + struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; struct sockaddr_in sock_in; @@ -155,7 +171,7 @@ static int ctdb_incoming_destructor(struct ctdb_incoming *in) node in our cluster */ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, - uint16_t flags, void *private) + uint16_t flags, void *private_data) { struct ctdb_context *ctdb; struct ctdb_tcp *ctcp; @@ -164,8 +180,8 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, int fd; struct ctdb_incoming *in; - ctdb = talloc_get_type(private, struct ctdb_context); - ctcp = talloc_get_type(ctdb->private, struct ctdb_tcp); + ctdb = talloc_get_type(private_data, struct ctdb_context); + ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); memset(&addr, 0, sizeof(addr)); len = sizeof(addr); fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len); @@ -177,8 +193,8 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, set_nonblocking(in->fd); - event_add_fd(ctdb->ev, in, in->fd, EVENT_FD_READ, - ctdb_tcp_incoming_read, in); + in->queue = ctdb_queue_setup(ctdb, in, in->fd, CTDB_TCP_ALIGNMENT, + ctdb_tcp_read_cb, in); talloc_set_destructor(in, ctdb_incoming_destructor); } @@ -189,7 +205,8 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, */ int ctdb_tcp_listen(struct ctdb_context *ctdb) { - struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private, struct ctdb_tcp); + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, + struct ctdb_tcp); struct sockaddr_in sock; int one = 1; -- cgit From 650d81b252cc669ef848448afad7e9bb79c4f20e Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 21 Apr 2007 07:23:42 +0000 Subject: r22421: merged in latest ctdb changes from bzr (This used to be commit 3633f862b966866819c9a0a6ad0238a858e15e62) --- source4/cluster/ctdb/tcp/tcp_connect.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index a1f2d331cf..4d9d8e8386 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -43,6 +43,10 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) struct ctdb_tcp_node *tnode = talloc_get_type( node->private_data, struct ctdb_tcp_node); + if (data == NULL) { + node->ctdb->upcalls->node_dead(node); + } + /* start a new connect cycle to try to re-establish the link */ close(tnode->fd); -- cgit From b8d69a7ea2505b706ff7c74d7c97bc89d82dfa07 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 10 Jul 2007 02:46:15 +0000 Subject: r23795: more v2->v3 conversion (This used to be commit 84b468b2f8f2dffda89593f816e8bc6a8b6d42ac) --- source4/cluster/ctdb/tcp/tcp_connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index 4d9d8e8386..4ab1dae8fa 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -6,7 +6,7 @@ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. + version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -- cgit From 6c973f4e8ccbcb6c9275f8a54e26abb19df7e15a Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 10 Jul 2007 03:42:26 +0000 Subject: r23798: updated old Temple Place FSF addresses to new URL (This used to be commit 40c0919aaa9c1b14bbaebb95ecce53eb0380fdbb) --- source4/cluster/ctdb/tcp/tcp_connect.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index 4ab1dae8fa..c0a0da5d47 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -14,8 +14,7 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + License along with this library; if not, see . */ #include "includes.h" -- cgit From 6504900f1f52927adab3489b8d04b6644ceaee7d Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Tue, 10 Jul 2007 08:06:51 +0000 Subject: r23806: update Samba4 with the latest ctdb code. This doesn't get the ctdb code fully working in Samba4, it just gets it building and not breaking non-clustered use of Samba. It will take a bit longer to update some of the calling ctdb_cluster.c code to make it work correctly in Samba4. Note also that Samba4 now only links to the client portion of ctdb. For the moment I am leaving the ctdbd as a separate daemon, which you install separately from http://ctdb.samba.org/. (This used to be commit b196077cbb55cbecad87065133c2d67198e31066) --- source4/cluster/ctdb/tcp/tcp_connect.c | 247 ++++++++++++++++++++++++--------- 1 file changed, 184 insertions(+), 63 deletions(-) (limited to 'source4/cluster/ctdb/tcp/tcp_connect.c') diff --git a/source4/cluster/ctdb/tcp/tcp_connect.c b/source4/cluster/ctdb/tcp/tcp_connect.c index c0a0da5d47..2f828e5717 100644 --- a/source4/cluster/ctdb/tcp/tcp_connect.c +++ b/source4/cluster/ctdb/tcp/tcp_connect.c @@ -3,18 +3,18 @@ Copyright (C) Andrew Tridgell 2006 - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 3 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, see . + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ #include "includes.h" @@ -25,14 +25,6 @@ #include "../include/ctdb_private.h" #include "ctdb_tcp.h" -static void set_nonblocking(int fd) -{ - unsigned v; - v = fcntl(fd, F_GETFL, 0); - fcntl(fd, F_SETFL, v | O_NONBLOCK); -} - - /* called when a complete packet has come in - should not happen on this socket */ @@ -48,10 +40,9 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data) /* start a new connect cycle to try to re-establish the link */ - close(tnode->fd); - ctdb_queue_set_fd(tnode->queue, -1); + ctdb_queue_set_fd(tnode->out_queue, -1); tnode->fd = -1; - event_add_timed(node->ctdb->ev, node, timeval_zero(), + event_add_timed(node->ctdb->ev, tnode, timeval_zero(), ctdb_tcp_node_connect, node); } @@ -70,12 +61,15 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f socklen_t len = sizeof(error); int one = 1; + talloc_free(tnode->connect_te); + tnode->connect_te = NULL; + if (getsockopt(tnode->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0 || error != 0) { talloc_free(fde); close(tnode->fd); tnode->fd = -1; - event_add_timed(ctdb->ev, node, timeval_current_ofs(1, 0), + event_add_timed(ctdb->ev, tnode, timeval_current_ofs(1, 0), ctdb_tcp_node_connect, node); return; } @@ -83,8 +77,9 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f talloc_free(fde); setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one)); + setsockopt(tnode->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one)); - ctdb_queue_set_fd(tnode->queue, tnode->fd); + ctdb_queue_set_fd(tnode->out_queue, tnode->fd); /* tell the ctdb layer we are connected */ node->ctdb->upcalls->node_connected(node); @@ -120,10 +115,22 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, struct sockaddr_in sock_in; struct sockaddr_in sock_out; + if (tnode->fd != -1) { + talloc_free(tnode->connect_fde); + tnode->connect_fde = NULL; + close(tnode->fd); + tnode->fd = -1; + } + tnode->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); set_nonblocking(tnode->fd); + set_close_on_exec(tnode->fd); + ZERO_STRUCT(sock_out); +#ifdef HAVE_SOCK_SIN_LEN + sock_out.sin_len = sizeof(sock_out); +#endif if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out.sin_addr) != 0) { return; } @@ -137,6 +144,10 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, * the remote side is actually routable in case CTDB traffic will run on * a dedicated non-routeable network. */ + ZERO_STRUCT(sock_in); +#ifdef HAVE_SOCK_SIN_LEN + sock_in.sin_len = sizeof(sock_in); +#endif if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in.sin_addr) != 0) { return; } @@ -148,24 +159,22 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, errno != EINPROGRESS) { /* try again once a second */ close(tnode->fd); - event_add_timed(ctdb->ev, node, timeval_current_ofs(1, 0), + tnode->fd = -1; + event_add_timed(ctdb->ev, tnode, timeval_current_ofs(1, 0), ctdb_tcp_node_connect, node); return; } /* non-blocking connect - wait for write event */ - event_add_fd(node->ctdb->ev, node, tnode->fd, EVENT_FD_WRITE|EVENT_FD_READ, - ctdb_node_connect_write, node); -} - -/* - destroy a ctdb_incoming structure -*/ -static int ctdb_incoming_destructor(struct ctdb_incoming *in) -{ - close(in->fd); - in->fd = -1; - return 0; + tnode->connect_fde = event_add_fd(node->ctdb->ev, tnode, tnode->fd, + EVENT_FD_WRITE|EVENT_FD_READ, + ctdb_node_connect_write, node); + + /* don't give it long to connect - retry in one second. This ensures + that we find a node is up quickly (tcp normally backs off a syn reply + delay by quite a lot) */ + tnode->connect_te = event_add_timed(ctdb->ev, tnode, timeval_current_ofs(1, 0), + ctdb_tcp_node_connect, node); } /* @@ -176,30 +185,129 @@ static int ctdb_incoming_destructor(struct ctdb_incoming *in) static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, uint16_t flags, void *private_data) { - struct ctdb_context *ctdb; - struct ctdb_tcp *ctcp; + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); struct sockaddr_in addr; socklen_t len; - int fd; + int fd, nodeid; struct ctdb_incoming *in; + int one = 1; + const char *incoming_node; - ctdb = talloc_get_type(private_data, struct ctdb_context); - ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); memset(&addr, 0, sizeof(addr)); len = sizeof(addr); fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len); if (fd == -1) return; - in = talloc_zero(ctdb, struct ctdb_incoming); + incoming_node = inet_ntoa(addr.sin_addr); + for (nodeid=0;nodeidnum_nodes;nodeid++) { + if (!strcmp(incoming_node, ctdb->nodes[nodeid]->address.address)) { + DEBUG(0, ("Incoming connection from node:%d %s\n",nodeid,incoming_node)); + break; + } + } + if (nodeid>=ctdb->num_nodes) { + DEBUG(0, ("Refused connection from unknown node %s\n", incoming_node)); + close(fd); + return; + } + + in = talloc_zero(ctcp, struct ctdb_incoming); in->fd = fd; in->ctdb = ctdb; set_nonblocking(in->fd); + set_close_on_exec(in->fd); + + setsockopt(in->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one)); in->queue = ctdb_queue_setup(ctdb, in, in->fd, CTDB_TCP_ALIGNMENT, ctdb_tcp_read_cb, in); +} + + +/* + automatically find which address to listen on +*/ +static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) +{ + struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, + struct ctdb_tcp); + struct sockaddr_in sock; + int lock_fd, i; + const char *lock_path = "/tmp/.ctdb_socket_lock"; + struct flock lock; + + /* in order to ensure that we don't get two nodes with the + same adddress, we must make the bind() and listen() calls + atomic. The SO_REUSEADDR setsockopt only prevents double + binds if the first socket is in LISTEN state */ + lock_fd = open(lock_path, O_RDWR|O_CREAT, 0666); + if (lock_fd == -1) { + DEBUG(0,("Unable to open %s\n", lock_path)); + return -1; + } + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 1; + lock.l_pid = 0; + + if (fcntl(lock_fd, F_SETLKW, &lock) != 0) { + DEBUG(0,("Unable to lock %s\n", lock_path)); + close(lock_fd); + return -1; + } + + for (i=0;inum_nodes;i++) { + ZERO_STRUCT(sock); +#ifdef HAVE_SOCK_SIN_LEN + sock.sin_len = sizeof(sock); +#endif + sock.sin_port = htons(ctdb->nodes[i]->address.port); + sock.sin_family = PF_INET; + if (ctdb_tcp_get_address(ctdb, ctdb->nodes[i]->address.address, + &sock.sin_addr) != 0) { + continue; + } + + if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, + sizeof(sock)) == 0) { + break; + } + } + + if (i == ctdb->num_nodes) { + DEBUG(0,("Unable to bind to any of the node addresses - giving up\n")); + goto failed; + } + ctdb->address = ctdb->nodes[i]->address; + ctdb->name = talloc_asprintf(ctdb, "%s:%u", + ctdb->address.address, + ctdb->address.port); + ctdb->vnn = ctdb->nodes[i]->vnn; + ctdb->nodes[i]->flags &= ~NODE_FLAGS_DISCONNECTED; + DEBUG(1,("ctdb chose network address %s:%u vnn %u\n", + ctdb->address.address, + ctdb->address.port, + ctdb->vnn)); + + if (listen(ctcp->listen_fd, 10) == -1) { + goto failed; + } + + event_add_fd(ctdb->ev, ctcp, ctcp->listen_fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, + ctdb_listen_event, ctdb); - talloc_set_destructor(in, ctdb_incoming_destructor); + close(lock_fd); + return 0; + +failed: + close(lock_fd); + close(ctcp->listen_fd); + ctcp->listen_fd = -1; + return -1; } @@ -213,37 +321,50 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb) struct sockaddr_in sock; int one = 1; - sock.sin_port = htons(ctdb->address.port); - sock.sin_family = PF_INET; - if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock.sin_addr) != 0) { + ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (ctcp->listen_fd == -1) { + ctdb_set_error(ctdb, "socket failed\n"); return -1; } - ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - if (ctcp->listen_fd == -1) { - ctdb_set_error(ctdb, "socket failed\n"); - return -1; - } + set_close_on_exec(ctcp->listen_fd); setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one)); - if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) != 0) { - ctdb_set_error(ctdb, "bind failed\n"); - close(ctcp->listen_fd); - ctcp->listen_fd = -1; - return -1; - } + /* we can either auto-bind to the first available address, or we can + use a specified address */ + if (!ctdb->address.address) { + return ctdb_tcp_listen_automatic(ctdb); + } + + ZERO_STRUCT(sock); +#ifdef HAVE_SOCK_SIN_LEN + sock.sin_len = sizeof(sock); +#endif + sock.sin_port = htons(ctdb->address.port); + sock.sin_family = PF_INET; + + if (ctdb_tcp_get_address(ctdb, ctdb->address.address, + &sock.sin_addr) != 0) { + goto failed; + } + + if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) != 0) { + goto failed; + } if (listen(ctcp->listen_fd, 10) == -1) { - ctdb_set_error(ctdb, "listen failed\n"); - close(ctcp->listen_fd); - ctcp->listen_fd = -1; - return -1; + goto failed; } - event_add_fd(ctdb->ev, ctdb, ctcp->listen_fd, EVENT_FD_READ, + event_add_fd(ctdb->ev, ctcp, ctcp->listen_fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, ctdb_listen_event, ctdb); return 0; + +failed: + close(ctcp->listen_fd); + ctcp->listen_fd = -1; + return -1; } -- cgit