summaryrefslogtreecommitdiff
path: root/source3/smbd
diff options
context:
space:
mode:
authorVolker Lendecke <vl@samba.org>2010-03-02 17:02:01 +0100
committerVolker Lendecke <vl@samba.org>2010-03-10 16:07:10 +0100
commit89c785c47aa0fceba148297116e863f46941bd42 (patch)
treef1c4812e6467614bd542abda6f8bab198af1889d /source3/smbd
parent184afeb57d0daeacfacdb8c976032697efe00214 (diff)
downloadsamba-89c785c47aa0fceba148297116e863f46941bd42.tar.gz
samba-89c785c47aa0fceba148297116e863f46941bd42.tar.bz2
samba-89c785c47aa0fceba148297116e863f46941bd42.zip
s3: Fix a long-standing problem with recycled PIDs
When a samba server process dies hard, it has no chance to clean up its entries in locking.tdb, brlock.tdb, connections.tdb and sessionid.tdb. For locking.tdb and brlock.tdb Samba is robust by checking every time we read an entry from the database if the corresponding process still exists. If it does not exist anymore, the entry is deleted. This is not 100% failsafe though: On systems with a limited PID space there is a non-zero chance that between the smbd's death and the fresh access, the PID is recycled by another long-running process. This renders all files that had been locked by the killed smbd potentially unusable until the new process also dies. This patch is supposed to fix the problem the following way: Every process ID in every database is augmented by a random 64-bit number that is stored in a serverid.tdb. Whenever we need to check if a process still exists we know its PID and the 64-bit number. We look up the PID in serverid.tdb and compare the 64-bit number. If it's the same, the process still is a valid smbd holding the lock. If it is different, a new smbd has taken over. I believe this is safe against an smbd that has died hard and the PID has been taken over by a non-samba process. This process would not have registered itself with a fresh 64-bit number in serverid.tdb, so the old one still exists in serverid.tdb. We protect against this case by the parent smbd taking care of deregistering PIDs from serverid.tdb and the fact that serverid.tdb is CLEAR_IF_FIRST. CLEAR_IF_FIRST does not work in a cluster, so the automatic cleanup does not work when all smbds are restarted. For this, "net serverid wipe" has to be run before smbd starts up. As a convenience, "net serverid wipedbs" also cleans up sessionid.tdb and connections.tdb. While there, this also cleans up overloading connections.tdb with all the process entries just for messaging_send_all(). Volker
Diffstat (limited to 'source3/smbd')
-rw-r--r--source3/smbd/negprot.c4
-rw-r--r--source3/smbd/server.c39
2 files changed, 37 insertions, 6 deletions
diff --git a/source3/smbd/negprot.c b/source3/smbd/negprot.c
index 81d29d90f9..32714fd828 100644
--- a/source3/smbd/negprot.c
+++ b/source3/smbd/negprot.c
@@ -667,8 +667,8 @@ void reply_negprot(struct smb_request *req)
when the client connects to port 445. Of course there is a small
window where we are listening to messages -- jerry */
- claim_connection(
- NULL,"",FLAG_MSG_GENERAL|FLAG_MSG_SMBD|FLAG_MSG_PRINT_GENERAL);
+ serverid_register_self(FLAG_MSG_GENERAL|FLAG_MSG_SMBD
+ |FLAG_MSG_PRINT_GENERAL);
/* Check for protocols, most desirable first */
for (protocol = 0; supported_protocols[protocol].proto_name; protocol++) {
diff --git a/source3/smbd/server.c b/source3/smbd/server.c
index f685385043..5347bb9c03 100644
--- a/source3/smbd/server.c
+++ b/source3/smbd/server.c
@@ -249,6 +249,7 @@ static void remove_child_pid(pid_t pid, bool unclean_shutdown)
{
struct child_pid *child;
static struct timed_event *cleanup_te;
+ struct server_id child_id;
if (unclean_shutdown) {
/* a child terminated uncleanly so tickle all
@@ -268,6 +269,14 @@ static void remove_child_pid(pid_t pid, bool unclean_shutdown)
}
}
+ child_id = procid_self(); /* Just initialize pid and potentially vnn */
+ child_id.pid = pid;
+
+ if (!serverid_deregister(&child_id)) {
+ DEBUG(1, ("Could not remove pid %d from serverid.tdb\n",
+ (int)pid));
+ }
+
for (child = children; child != NULL; child = child->next) {
if (child->pid == pid) {
struct child_pid *tmp = child;
@@ -374,6 +383,7 @@ static void smbd_accept_connection(struct tevent_context *ev,
struct sockaddr_storage addr;
socklen_t in_addrlen = sizeof(addr);
pid_t pid = 0;
+ uint64_t unique_id;
smbd_set_server_fd(accept(s->fd,(struct sockaddr *)&addr,&in_addrlen));
@@ -398,12 +408,21 @@ static void smbd_accept_connection(struct tevent_context *ev,
return;
}
+ /*
+ * Generate a unique id in the parent process so that we use
+ * the global random state in the parent.
+ */
+ generate_random_buffer((uint8_t *)&unique_id, sizeof(unique_id));
+
pid = sys_fork();
if (pid == 0) {
NTSTATUS status = NT_STATUS_OK;
+
/* Child code ... */
am_parent = 0;
+ set_my_unique_id(unique_id);
+
/* Stop zombies, the parent explicitly handles
* them, counting worker smbds. */
CatchChild();
@@ -435,6 +454,13 @@ static void smbd_accept_connection(struct tevent_context *ev,
smbd_setup_sig_term_handler();
smbd_setup_sig_hup_handler();
+ if (!serverid_register_self(FLAG_MSG_GENERAL|FLAG_MSG_SMBD
+ |FLAG_MSG_DBWRAP
+ |FLAG_MSG_PRINT_GENERAL)) {
+ exit_server_cleanly("Could not register myself in "
+ "serverid.tdb");
+ }
+
smbd_process();
exit:
exit_server_cleanly("end of child");
@@ -665,8 +691,13 @@ static bool open_sockets_smbd(struct smbd_parent_context *parent,
clustered mode, ctdb won't allow us to start doing database
operations until it has gone thru a full startup, which
includes checking to see that smbd is listening. */
- claim_connection(NULL,"",
- FLAG_MSG_GENERAL|FLAG_MSG_SMBD|FLAG_MSG_DBWRAP);
+
+ if (!serverid_register_self(FLAG_MSG_GENERAL|FLAG_MSG_SMBD
+ |FLAG_MSG_DBWRAP)) {
+ DEBUG(0, ("open_sockets_smbd: Failed to register "
+ "myself in serverid.tdb\n"));
+ return false;
+ }
/* Listen to messages */
@@ -852,8 +883,8 @@ static void exit_server_common(enum server_exit_reason how,
/* 3 second timeout. */
print_notify_send_messages(smbd_messaging_context(), 3);
- /* delete our entry in the connections database. */
- yield_connection(NULL,"");
+ /* delete our entry in the serverid database. */
+ serverid_deregister_self();
#ifdef WITH_DFS
if (dcelogin_atmost_once) {