diff options
author | Andrew Tridgell <tridge@samba.org> | 2007-07-10 08:06:51 +0000 |
---|---|---|
committer | Gerald (Jerry) Carter <jerry@samba.org> | 2007-10-10 14:59:20 -0500 |
commit | 6504900f1f52927adab3489b8d04b6644ceaee7d (patch) | |
tree | 893cadba231a529f320046429767cedf8b0eec30 /source4/cluster/ctdb/takeover | |
parent | d4e2d157cbce5a1cdc75efb5ced75d9608a0f7df (diff) | |
download | samba-6504900f1f52927adab3489b8d04b6644ceaee7d.tar.gz samba-6504900f1f52927adab3489b8d04b6644ceaee7d.tar.bz2 samba-6504900f1f52927adab3489b8d04b6644ceaee7d.zip |
r23806: update Samba4 with the latest ctdb code.
This doesn't get the ctdb code fully working in Samba4, it just gets
it building and not breaking non-clustered use of Samba. It will take
a bit longer to update some of the calling ctdb_cluster.c code to make
it work correctly in Samba4.
Note also that Samba4 now only links to the client portion of
ctdb. For the moment I am leaving the ctdbd as a separate daemon,
which you install separately from http://ctdb.samba.org/.
(This used to be commit b196077cbb55cbecad87065133c2d67198e31066)
Diffstat (limited to 'source4/cluster/ctdb/takeover')
-rw-r--r-- | source4/cluster/ctdb/takeover/ctdb_takeover.c | 583 | ||||
-rw-r--r-- | source4/cluster/ctdb/takeover/system.c | 381 |
2 files changed, 964 insertions, 0 deletions
diff --git a/source4/cluster/ctdb/takeover/ctdb_takeover.c b/source4/cluster/ctdb/takeover/ctdb_takeover.c new file mode 100644 index 0000000000..862382d90a --- /dev/null +++ b/source4/cluster/ctdb/takeover/ctdb_takeover.c @@ -0,0 +1,583 @@ +/* + ctdb recovery code + + Copyright (C) Ronnie Sahlberg 2007 + Copyright (C) Andrew Tridgell 2007 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "includes.h" +#include "lib/events/events.h" +#include "lib/tdb/include/tdb.h" +#include "system/network.h" +#include "system/filesys.h" +#include "system/wait.h" +#include "../include/ctdb_private.h" + + +#define TAKEOVER_TIMEOUT() timeval_current_ofs(5,0) + +#define CTDB_ARP_INTERVAL 1 +#define CTDB_ARP_REPEAT 3 + +struct ctdb_takeover_arp { + struct ctdb_context *ctdb; + uint32_t count; + struct sockaddr_in sin; + struct ctdb_tcp_list *tcp_list; +}; + +/* + lists of tcp endpoints + */ +struct ctdb_tcp_list { + struct ctdb_tcp_list *prev, *next; + uint32_t vnn; + struct sockaddr_in saddr; + struct sockaddr_in daddr; +}; + + + +/* + send a gratuitous arp + */ +static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_takeover_arp *arp = talloc_get_type(private_data, + struct ctdb_takeover_arp); + int ret; + struct ctdb_tcp_list *tcp; + + ret = ctdb_sys_send_arp(&arp->sin, arp->ctdb->takeover.interface); + if (ret != 0) { + DEBUG(0,(__location__ "sending of arp failed (%s)\n", strerror(errno))); + } + + for (tcp=arp->tcp_list;tcp;tcp=tcp->next) { + DEBUG(2,("sending tcp tickle ack for %u->%s:%u\n", + (unsigned)ntohs(tcp->daddr.sin_port), + inet_ntoa(tcp->saddr.sin_addr), + (unsigned)ntohs(tcp->saddr.sin_port))); + ret = ctdb_sys_send_ack(&tcp->saddr, &tcp->daddr); + if (ret != 0) { + DEBUG(0,(__location__ " Failed to send tcp tickle ack for %s\n", + inet_ntoa(tcp->saddr.sin_addr))); + } + } + + arp->count++; + + if (arp->count == CTDB_ARP_REPEAT) { + talloc_free(arp); + return; + } + + event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx, + timeval_current_ofs(CTDB_ARP_INTERVAL, 0), + ctdb_control_send_arp, arp); +} + + +/* + take over an ip address + */ +int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata) +{ + int ret; + struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr; + struct ctdb_takeover_arp *arp; + char *ip = inet_ntoa(sin->sin_addr); + struct ctdb_tcp_list *tcp; + + if (ctdb_sys_have_ip(ip)) { + return 0; + } + + DEBUG(0,("Takover of IP %s/%u on interface %s\n", + ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, + ctdb->takeover.interface)); + ret = ctdb_event_script(ctdb, "takeip %s %s %u", + ctdb->takeover.interface, + ip, + ctdb->nodes[ctdb->vnn]->public_netmask_bits); + if (ret != 0) { + DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n", + ip, ctdb->takeover.interface)); + return -1; + } + + if (!ctdb->takeover.last_ctx) { + ctdb->takeover.last_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, ctdb->takeover.last_ctx); + } + + arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp); + CTDB_NO_MEMORY(ctdb, arp); + + arp->ctdb = ctdb; + arp->sin = *sin; + + /* add all of the known tcp connections for this IP to the + list of tcp connections to send tickle acks for */ + for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) { + if (sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) { + struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list); + CTDB_NO_MEMORY(ctdb, t2); + *t2 = *tcp; + DLIST_ADD(arp->tcp_list, t2); + } + } + + event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx, + timeval_zero(), ctdb_control_send_arp, arp); + + return ret; +} + +/* + release an ip address + */ +int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr; + TDB_DATA data; + char *ip = inet_ntoa(sin->sin_addr); + int ret; + struct ctdb_tcp_list *tcp; + + if (!ctdb_sys_have_ip(ip)) { + return 0; + } + + DEBUG(0,("Release of IP %s/%u on interface %s\n", + ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, + ctdb->takeover.interface)); + + /* stop any previous arps */ + talloc_free(ctdb->takeover.last_ctx); + ctdb->takeover.last_ctx = NULL; + + ret = ctdb_event_script(ctdb, "releaseip %s %s %u", + ctdb->takeover.interface, + ip, + ctdb->nodes[ctdb->vnn]->public_netmask_bits); + if (ret != 0) { + DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n", + ip, ctdb->takeover.interface)); + return -1; + } + + /* send a message to all clients of this node telling them + that the cluster has been reconfigured and they should + release any sockets on this IP */ + data.dptr = (uint8_t *)ip; + data.dsize = strlen(ip)+1; + + ctdb_daemon_send_message(ctdb, ctdb->vnn, CTDB_SRVID_RELEASE_IP, data); + + /* tell other nodes about any tcp connections we were holding with this IP */ + for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) { + if (tcp->vnn == ctdb->vnn && + sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) { + struct ctdb_control_tcp_vnn t; + + t.vnn = ctdb->vnn; + t.src = tcp->saddr; + t.dest = tcp->daddr; + + data.dptr = (uint8_t *)&t; + data.dsize = sizeof(t); + + ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0, + CTDB_CONTROL_TCP_ADD, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + } + } + + + return 0; +} + + +/* + setup the event script +*/ +int ctdb_set_event_script(struct ctdb_context *ctdb, const char *script) +{ + ctdb->takeover.event_script = talloc_strdup(ctdb, script); + CTDB_NO_MEMORY(ctdb, ctdb->takeover.event_script); + return 0; +} + +/* + setup the public address list from a file +*/ +int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist) +{ + char **lines; + int nlines; + int i; + + lines = file_lines_load(alist, &nlines, ctdb); + if (lines == NULL) { + ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist); + return -1; + } + while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) { + nlines--; + } + + if (nlines != ctdb->num_nodes) { + DEBUG(0,("Number of lines in %s does not match number of nodes!\n", alist)); + talloc_free(lines); + return -1; + } + + for (i=0;i<nlines;i++) { + char *p; + struct in_addr in; + + ctdb->nodes[i]->public_address = talloc_strdup(ctdb->nodes[i], lines[i]); + CTDB_NO_MEMORY(ctdb, ctdb->nodes[i]->public_address); + ctdb->nodes[i]->takeover_vnn = -1; + + /* see if they supplied a netmask length */ + p = strchr(ctdb->nodes[i]->public_address, '/'); + if (!p) { + DEBUG(0,("You must supply a netmask for public address %s\n", + ctdb->nodes[i]->public_address)); + return -1; + } + *p = 0; + ctdb->nodes[i]->public_netmask_bits = atoi(p+1); + + if (ctdb->nodes[i]->public_netmask_bits > 32) { + DEBUG(0, ("Illegal netmask for IP %s\n", ctdb->nodes[i]->public_address)); + return -1; + } + + if (inet_aton(ctdb->nodes[i]->public_address, &in) == 0) { + DEBUG(0,("Badly formed IP '%s' in public address list\n", ctdb->nodes[i]->public_address)); + return -1; + } + } + + talloc_free(lines); + return 0; +} + +/* + see if two IPs are on the same subnet + */ +static bool ctdb_same_subnet(const char *ip1, const char *ip2, uint8_t netmask_bits) +{ + struct in_addr in1, in2; + uint32_t mask; + + inet_aton(ip1, &in1); + inet_aton(ip2, &in2); + + mask = ~((1LL<<(32-netmask_bits))-1); + + if ((ntohl(in1.s_addr) & mask) != (ntohl(in2.s_addr) & mask)) { + return false; + } + + return true; +} + +/* + make any IP alias changes for public addresses that are necessary + */ +int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) +{ + int i, j; + int ret; + + /* work out which node will look after each public IP */ + for (i=0;i<nodemap->num;i++) { + if (nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) { + ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn; + } else { + /* assign this dead nodes IP to the next higher node */ + for (j=(i+1)%nodemap->num; + j != i; + j=(j+1)%nodemap->num) { + if ((nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED) && + ctdb_same_subnet(ctdb->nodes[j]->public_address, + ctdb->nodes[i]->public_address, + ctdb->nodes[j]->public_netmask_bits)) { + ctdb->nodes[i]->takeover_vnn = nodemap->nodes[j].vnn; + break; + } + } + if (j == i) { + DEBUG(0,(__location__ " No node available on same network to take %s\n", + ctdb->nodes[i]->public_address)); + ctdb->nodes[i]->takeover_vnn = -1; + } + } + } + + /* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */ + + + /* now tell all nodes to delete any alias that they should not + have. This will be a NOOP on nodes that don't currently + hold the given alias */ + for (i=0;i<nodemap->num;i++) { + /* don't talk to unconnected nodes */ + if (!(nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED)) continue; + + /* tell this node to delete all of the aliases that it should not have */ + for (j=0;j<nodemap->num;j++) { + if (ctdb->nodes[j]->takeover_vnn != nodemap->nodes[i].vnn) { + ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(), + nodemap->nodes[i].vnn, + ctdb->nodes[j]->public_address); + if (ret != 0) { + DEBUG(0,("Failed to tell vnn %u to release IP %s\n", + nodemap->nodes[i].vnn, + ctdb->nodes[j]->public_address)); + return -1; + } + } + } + } + + /* tell all nodes to get their own IPs */ + for (i=0;i<nodemap->num;i++) { + ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(), + ctdb->nodes[i]->takeover_vnn, + ctdb->nodes[i]->public_address); + if (ret != 0) { + DEBUG(0,("Failed asking vnn %u to take over IP %s\n", + ctdb->nodes[i]->takeover_vnn, + ctdb->nodes[i]->public_address)); + return -1; + } + } + + return 0; +} + + +/* + called by a client to inform us of a TCP connection that it is managing + that should tickled with an ACK when IP takeover is done + */ +int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, uint32_t vnn, + TDB_DATA indata) +{ + struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); + struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr; + struct ctdb_tcp_list *tcp; + struct ctdb_control_tcp_vnn t; + int ret; + TDB_DATA data; + + tcp = talloc(client, struct ctdb_tcp_list); + CTDB_NO_MEMORY(ctdb, tcp); + + tcp->vnn = vnn; + tcp->saddr = p->src; + tcp->daddr = p->dest; + + DLIST_ADD(client->tcp_list, tcp); + + t.vnn = vnn; + t.src = p->src; + t.dest = p->dest; + + data.dptr = (uint8_t *)&t; + data.dsize = sizeof(t); + + /* tell all nodes about this tcp connection */ + ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0, + CTDB_CONTROL_TCP_ADD, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + if (ret != 0) { + DEBUG(0,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n")); + return -1; + } + + return 0; +} + +/* + see if two sockaddr_in are the same + */ +static bool same_sockaddr_in(struct sockaddr_in *in1, struct sockaddr_in *in2) +{ + return in1->sin_family == in2->sin_family && + in1->sin_port == in2->sin_port && + in1->sin_addr.s_addr == in2->sin_addr.s_addr; +} + +/* + find a tcp address on a list + */ +static struct ctdb_tcp_list *ctdb_tcp_find(struct ctdb_tcp_list *list, + struct ctdb_tcp_list *tcp) +{ + while (list) { + if (same_sockaddr_in(&list->saddr, &tcp->saddr) && + same_sockaddr_in(&list->daddr, &tcp->daddr)) { + return list; + } + list = list->next; + } + return NULL; +} + +/* + called by a daemon to inform us of a TCP connection that one of its + clients managing that should tickled with an ACK when IP takeover is + done + */ +int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr; + struct ctdb_tcp_list *tcp; + + tcp = talloc(ctdb, struct ctdb_tcp_list); + CTDB_NO_MEMORY(ctdb, tcp); + + tcp->vnn = p->vnn; + tcp->saddr = p->src; + tcp->daddr = p->dest; + + if (NULL == ctdb_tcp_find(ctdb->tcp_list, tcp)) { + DLIST_ADD(ctdb->tcp_list, tcp); + DEBUG(2,("Added tickle info for %s:%u from vnn %u\n", + inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port), + tcp->vnn)); + } else { + DEBUG(4,("Already had tickle info for %s:%u from vnn %u\n", + inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port), + tcp->vnn)); + } + + return 0; +} + +/* + called by a daemon to inform us of a TCP connection that one of its + clients managing that should tickled with an ACK when IP takeover is + done + */ +int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr; + struct ctdb_tcp_list t, *tcp; + + t.vnn = p->vnn; + t.saddr = p->src; + t.daddr = p->dest; + + tcp = ctdb_tcp_find(ctdb->tcp_list, &t); + if (tcp) { + DEBUG(2,("Removed tickle info for %s:%u from vnn %u\n", + inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port), + tcp->vnn)); + DLIST_REMOVE(ctdb->tcp_list, tcp); + talloc_free(tcp); + } + + return 0; +} + + +/* + called when a daemon restarts - wipes all tcp entries from that vnn + */ +int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn) +{ + struct ctdb_tcp_list *tcp, *next; + for (tcp=ctdb->tcp_list;tcp;tcp=next) { + next = tcp->next; + if (tcp->vnn == vnn) { + DLIST_REMOVE(ctdb->tcp_list, tcp); + talloc_free(tcp); + } + + /* and tell the new guy about any that he should have + from us */ + if (tcp->vnn == ctdb->vnn) { + struct ctdb_control_tcp_vnn t; + TDB_DATA data; + + t.vnn = tcp->vnn; + t.src = tcp->saddr; + t.dest = tcp->daddr; + + data.dptr = (uint8_t *)&t; + data.dsize = sizeof(t); + + ctdb_daemon_send_control(ctdb, vnn, 0, + CTDB_CONTROL_TCP_ADD, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + } + } + return 0; +} + + +/* + called when a client structure goes away - hook to remove + elements from the tcp_list in all daemons + */ +void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) +{ + while (client->tcp_list) { + TDB_DATA data; + struct ctdb_control_tcp_vnn p; + struct ctdb_tcp_list *tcp = client->tcp_list; + DLIST_REMOVE(client->tcp_list, tcp); + p.vnn = tcp->vnn; + p.src = tcp->saddr; + p.dest = tcp->daddr; + data.dptr = (uint8_t *)&p; + data.dsize = sizeof(p); + ctdb_daemon_send_control(client->ctdb, CTDB_BROADCAST_VNNMAP, 0, + CTDB_CONTROL_TCP_REMOVE, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + talloc_free(tcp); + } +} + + +/* + release all IPs on shutdown + */ +void ctdb_release_all_ips(struct ctdb_context *ctdb) +{ + int i; + + if (!ctdb->takeover.enabled) { + return; + } + + for (i=0;i<ctdb->num_nodes;i++) { + struct ctdb_node *node = ctdb->nodes[i]; + if (ctdb_sys_have_ip(node->public_address)) { + ctdb_event_script(ctdb, "releaseip %s %s %u", + ctdb->takeover.interface, + node->public_address, + node->public_netmask_bits); + } + } +} diff --git a/source4/cluster/ctdb/takeover/system.c b/source4/cluster/ctdb/takeover/system.c new file mode 100644 index 0000000000..13f3d1501b --- /dev/null +++ b/source4/cluster/ctdb/takeover/system.c @@ -0,0 +1,381 @@ +/* + ctdb recovery code + + Copyright (C) Ronnie Sahlberg 2007 + Copyright (C) Andrew Tridgell 2007 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "system/network.h" +#include "system/filesys.h" +#include "system/wait.h" +#include "../include/ctdb_private.h" +#include "lib/events/events.h" +#include <net/ethernet.h> +#include <net/if_arp.h> + + + +/* + send gratuitous arp reply after we have taken over an ip address + + saddr is the address we are trying to claim + iface is the interface name we will be using to claim the address + */ +int ctdb_sys_send_arp(const struct sockaddr_in *saddr, const char *iface) +{ + int s, ret; + struct sockaddr sa; + struct ether_header *eh; + struct arphdr *ah; + struct ifreq if_hwaddr; + unsigned char buffer[64]; /*minimum eth frame size */ + char *ptr; + + /* for now, we only handle AF_INET addresses */ + if (saddr->sin_family != AF_INET) { + DEBUG(0,(__location__ " not an ipv4 address\n")); + return -1; + } + + s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_ARP)); + if (s == -1){ + DEBUG(0,(__location__ " failed to open raw socket\n")); + return -1; + } + + /* get the mac address */ + strcpy(if_hwaddr.ifr_name, iface); + ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr); + if ( ret < 0 ) { + close(s); + DEBUG(0,(__location__ " ioctl failed\n")); + return -1; + } + if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) { + close(s); + DEBUG(0,(__location__ " not an ethernet address\n")); + return -1; + } + + + memset(buffer, 0 , 64); + eh = (struct ether_header *)buffer; + memset(eh->ether_dhost, 0xff, ETH_ALEN); + memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN); + eh->ether_type = htons(ETHERTYPE_ARP); + + ah = (struct arphdr *)&buffer[sizeof(struct ether_header)]; + ah->ar_hrd = htons(ARPHRD_ETHER); + ah->ar_pro = htons(ETH_P_IP); + ah->ar_hln = ETH_ALEN; + ah->ar_pln = 4; + + /* send a gratious arp */ + ah->ar_op = htons(ARPOP_REQUEST); + ptr = (char *)&ah[1]; + memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN); + ptr+=ETH_ALEN; + memcpy(ptr, &saddr->sin_addr, 4); + ptr+=4; + memset(ptr, 0, ETH_ALEN); + ptr+=ETH_ALEN; + memcpy(ptr, &saddr->sin_addr, 4); + ptr+=4; + + strncpy(sa.sa_data, iface, sizeof(sa.sa_data)); + ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa)); + if (ret < 0 ){ + close(s); + DEBUG(0,(__location__ " failed sendto\n")); + return -1; + } + + /* send unsolicited arp reply broadcast */ + ah->ar_op = htons(ARPOP_REPLY); + ptr = (char *)&ah[1]; + memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN); + ptr+=ETH_ALEN; + memcpy(ptr, &saddr->sin_addr, 4); + ptr+=4; + memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN); + ptr+=ETH_ALEN; + memcpy(ptr, &saddr->sin_addr, 4); + ptr+=4; + + strncpy(sa.sa_data, iface, sizeof(sa.sa_data)); + ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa)); + if (ret < 0 ){ + DEBUG(0,(__location__ " failed sendto\n")); + return -1; + } + + close(s); + return 0; +} + + +/* + uint16 checksum for n bytes + */ +static uint32_t uint16_checksum(uint16_t *data, size_t n) +{ + uint32_t sum=0; + while (n>=2) { + sum += (uint32_t)ntohs(*data); + data++; + n -= 2; + } + if (n == 1) { + sum += (uint32_t)ntohs(*(uint8_t *)data); + } + return sum; +} + +/* + simple TCP checksum - assumes data is multiple of 2 bytes long + */ +static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip) +{ + uint32_t sum = uint16_checksum(data, n); + uint16_t sum2; + sum += uint16_checksum((uint16_t *)&ip->saddr, sizeof(ip->saddr)); + sum += uint16_checksum((uint16_t *)&ip->daddr, sizeof(ip->daddr)); + sum += ip->protocol + n; + sum = (sum & 0xFFFF) + (sum >> 16); + sum = (sum & 0xFFFF) + (sum >> 16); + sum2 = htons(sum); + sum2 = ~sum2; + if (sum2 == 0) { + return 0xFFFF; + } + return sum2; +} + +/* + send tcp ack packet from the specified IP/port to the specified + destination IP/port. + + This is used to trigger the receiving host into sending its own ACK, + which should trigger early detection of TCP reset by the client + after IP takeover + */ +int ctdb_sys_send_ack(const struct sockaddr_in *dest, + const struct sockaddr_in *src) +{ + int s, ret; + uint32_t one = 1; + struct { + struct iphdr ip; + struct tcphdr tcp; + } pkt; + + /* for now, we only handle AF_INET addresses */ + if (src->sin_family != AF_INET || dest->sin_family != AF_INET) { + DEBUG(0,(__location__ " not an ipv4 address\n")); + return -1; + } + + s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW)); + if (s == -1) { + DEBUG(0,(__location__ " failed to open raw socket (%s)\n", + strerror(errno))); + return -1; + } + + ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one)); + if (ret != 0) { + DEBUG(0,(__location__ " failed to setup IP headers (%s)\n", + strerror(errno))); + close(s); + return -1; + } + + ZERO_STRUCT(pkt); + pkt.ip.version = 4; + pkt.ip.ihl = sizeof(pkt.ip)/4; + pkt.ip.tot_len = htons(sizeof(pkt)); + pkt.ip.ttl = 255; + pkt.ip.protocol = IPPROTO_TCP; + pkt.ip.saddr = src->sin_addr.s_addr; + pkt.ip.daddr = dest->sin_addr.s_addr; + pkt.ip.check = 0; + + pkt.tcp.source = src->sin_port; + pkt.tcp.dest = dest->sin_port; + pkt.tcp.ack = 1; + pkt.tcp.doff = sizeof(pkt.tcp)/4; + pkt.tcp.window = htons(1234); + pkt.tcp.check = tcp_checksum((uint16_t *)&pkt.tcp, sizeof(pkt.tcp), &pkt.ip); + + ret = sendto(s, &pkt, sizeof(pkt), 0, dest, sizeof(*dest)); + if (ret != sizeof(pkt)) { + DEBUG(0,(__location__ " failed sendto (%s)\n", strerror(errno))); + close(s); + return -1; + } + + close(s); + return 0; +} + + +/* + see if we currently have an interface with the given IP + + we try to bind to it, and if that fails then we don't have that IP + on an interface + */ +bool ctdb_sys_have_ip(const char *ip) +{ + struct sockaddr_in sin; + int s; + int ret; + + sin.sin_port = 0; + inet_aton(ip, &sin.sin_addr); + sin.sin_family = AF_INET; + s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (s == -1) { + return false; + } + ret = bind(s, (struct sockaddr *)&sin, sizeof(sin)); + close(s); + return ret == 0; +} + +/* + run the event script - varargs version + */ +static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_list ap) +{ + char *options, *cmdstr; + int ret; + va_list ap2; + struct stat st; + + if (stat(ctdb->takeover.event_script, &st) != 0 && + errno == ENOENT) { + DEBUG(0,("No event script found at '%s'\n", ctdb->takeover.event_script)); + return 0; + } + + va_copy(ap2, ap); + options = talloc_vasprintf(ctdb, fmt, ap2); + va_end(ap2); + CTDB_NO_MEMORY(ctdb, options); + + cmdstr = talloc_asprintf(ctdb, "%s %s", ctdb->takeover.event_script, options); + CTDB_NO_MEMORY(ctdb, cmdstr); + + ret = system(cmdstr); + if (ret != -1) { + ret = WEXITSTATUS(ret); + } + + talloc_free(cmdstr); + talloc_free(options); + + return ret; +} + +/* + run the event script + */ +int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = ctdb_event_script_v(ctdb, fmt, ap); + va_end(ap); + + return ret; +} + + +struct ctdb_event_script_state { + struct ctdb_context *ctdb; + pid_t child; + void (*callback)(struct ctdb_context *, int); + int fd[2]; +}; + +/* called when child is finished */ +static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde, + uint16_t flags, void *p) +{ + struct ctdb_event_script_state *state = + talloc_get_type(p, struct ctdb_event_script_state); + int status = -1; + waitpid(state->child, &status, 0); + if (status != -1) { + status = WEXITSTATUS(status); + } + state->callback(state->ctdb, status); + talloc_free(state); +} + + +/* + run the event script in the background, calling the callback when + finished + */ +int ctdb_event_script_callback(struct ctdb_context *ctdb, + void (*callback)(struct ctdb_context *, int), + const char *fmt, ...) +{ + struct ctdb_event_script_state *state; + va_list ap; + int ret; + + state = talloc(ctdb, struct ctdb_event_script_state); + CTDB_NO_MEMORY(ctdb, state); + + state->ctdb = ctdb; + state->callback = callback; + + ret = pipe(state->fd); + if (ret != 0) { + talloc_free(state); + return -1; + } + + state->child = fork(); + + if (state->child == (pid_t)-1) { + close(state->fd[0]); + close(state->fd[1]); + talloc_free(state); + return -1; + } + + if (state->child == 0) { + close(state->fd[0]); + va_start(ap, fmt); + ret = ctdb_event_script_v(ctdb, fmt, ap); + va_end(ap); + _exit(ret); + } + + close(state->fd[1]); + + event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE, + ctdb_event_script_handler, state); + + return 0; +} |