diff options
author | James Peach <jpeach@samba.org> | 2006-04-04 00:27:50 +0000 |
---|---|---|
committer | Gerald (Jerry) Carter <jerry@samba.org> | 2007-10-10 11:15:53 -0500 |
commit | 4fa555980070d78b39711ef21d77628d26055bc2 (patch) | |
tree | 38dd03bac5a57b45404f620ddf4eda2dd6ea06b0 /source3/lib | |
parent | f5e7376bca7ab02ef48110ef6c0fb394851a606c (diff) | |
download | samba-4fa555980070d78b39711ef21d77628d26055bc2.tar.gz samba-4fa555980070d78b39711ef21d77628d26055bc2.tar.bz2 samba-4fa555980070d78b39711ef21d77628d26055bc2.zip |
r14898: This change is an attempt to improve the quality of the information that
is produced when a process exits abnormally.
First, we coalesce the core dumping code so that we greatly improve our
odds of being able to produce a core file, even in the case of a memory
fault. I've removed duplicates of dump_core() and split it in two to
reduce the amount of work needed to actually do the dump.
Second, we refactor the exit_server code path to always log an explanation
and a stack trace. My goal is to always produce enough log information
for us to be able to explain any server exit, though there is a risk
that this could produce too much log information on a flaky network.
Finally, smbcontrol has gained a smbd fault injection operation to test
the changes above. This is only enabled for developer builds.
(This used to be commit 56bc02d64498eb3faf89f0c5452b9299daea8e95)
Diffstat (limited to 'source3/lib')
-rw-r--r-- | source3/lib/fault.c | 97 | ||||
-rw-r--r-- | source3/lib/util.c | 65 |
2 files changed, 125 insertions, 37 deletions
diff --git a/source3/lib/fault.c b/source3/lib/fault.c index 3cb6684639..8ae45f2435 100644 --- a/source3/lib/fault.c +++ b/source3/lib/fault.c @@ -20,7 +20,12 @@ #include "includes.h" +#ifdef HAVE_SYS_PRCTL_H +#include <sys/prctl.h> +#endif + static void (*cont_fn)(void *); +static pstring corepath; /******************************************************************* report a fault @@ -33,11 +38,11 @@ static void fault_report(int sig) counter++; - DEBUG(0,("===============================================================\n")); + DEBUGSEP(0); DEBUG(0,("INTERNAL ERROR: Signal %d in pid %d (%s)",sig,(int)sys_getpid(),SAMBA_VERSION_STRING)); DEBUG(0,("\nPlease read the Trouble-Shooting section of the Samba3-HOWTO\n")); DEBUG(0,("\nFrom: http://www.samba.org/samba/docs/Samba3-HOWTO.pdf\n")); - DEBUG(0,("===============================================================\n")); + DEBUGSEP(0); smb_panic("internal error"); @@ -82,3 +87,91 @@ void fault_setup(void (*fn)(void *)) CatchSignal(SIGABRT,SIGNAL_CAST sig_fault); #endif } + +/******************************************************************* +make all the preparations to safely dump a core file +********************************************************************/ + +void dump_core_setup(const char *progname) +{ + pstring logbase; + char * end; + + if (lp_logfile() && *lp_logfile()) { + snprintf(logbase, sizeof(logbase), "%s", lp_logfile()); + if ((end = strrchr_m(logbase, '/'))) { + *end = '\0'; + } + } else { + /* We will end up here is the log file is given on the command + * line by the -l option but the "log file" option is not set + * in smb.conf. + */ + snprintf(logbase, sizeof(logbase), "%s", dyn_LOGFILEBASE); + } + + SMB_ASSERT(progname != NULL); + + snprintf(corepath, sizeof(corepath), "%s/cores", logbase); + mkdir(corepath,0700); + + snprintf(corepath, sizeof(corepath), "%s/cores/%s", + logbase, progname); + mkdir(corepath,0700); + + sys_chown(corepath,getuid(),getgid()); + chmod(corepath,0700); + +#ifdef HAVE_GETRLIMIT +#ifdef RLIMIT_CORE + { + struct rlimit rlp; + getrlimit(RLIMIT_CORE, &rlp); + rlp.rlim_cur = MAX(16*1024*1024,rlp.rlim_cur); + setrlimit(RLIMIT_CORE, &rlp); + getrlimit(RLIMIT_CORE, &rlp); + DEBUG(3,("Maximum core file size limits now %d(soft) %d(hard)\n", + (int)rlp.rlim_cur,(int)rlp.rlim_max)); + } +#endif +#endif + +#if defined(HAVE_PRCTL) && defined(PR_SET_DUMPABLE) + /* On Linux we lose the ability to dump core when we change our user + * ID. We know how to dump core safely, so let's make sure we have our + * dumpable flag set. + */ + prctl(PR_SET_DUMPABLE, 1); +#endif + + /* FIXME: if we have a core-plus-pid facility, configurably set + * this up here. + */ +} + + void dump_core(void) +{ + if (*corepath != '\0') { + /* The chdir might fail if we dump core before we finish + * processing the config file. + */ + if (chdir(corepath) != 0) { + DEBUG(0, ("unable to change to %s", corepath)); + DEBUGADD(0, ("refusing to dump core\n")); + exit(1); + } + + DEBUG(0,("dumping core in %s\n", corepath)); + } + + umask(~(0700)); + dbgflush(); + + /* Ensure we don't have a signal handler for abort. */ +#ifdef SIGABRT + CatchSignal(SIGABRT,SIGNAL_CAST SIG_DFL); +#endif + + abort(); +} + diff --git a/source3/lib/util.c b/source3/lib/util.c index 0b831ea335..0fbe4a13d3 100644 --- a/source3/lib/util.c +++ b/source3/lib/util.c @@ -1545,19 +1545,10 @@ gid_t nametogid(const char *name) Something really nasty happened - panic ! ********************************************************************/ -#ifdef HAVE_LIBEXC_H -#include <libexc.h> -#endif - -static void smb_panic2(const char *why, BOOL decrement_pid_count ) +void smb_panic(const char *const why) { char *cmd; int result; -#ifdef HAVE_BACKTRACE_SYMBOLS - void *backtrace_stack[BACKTRACE_STACK_SIZE]; - size_t backtrace_size; - char **backtrace_strings; -#endif #ifdef DEVELOPER { @@ -1570,9 +1561,12 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count ) } #endif + DEBUG(0,("PANIC (pid %llu): %s\n", + (unsigned long long)sys_getpid(), why)); + log_stack_trace(); + /* only smbd needs to decrement the smbd counter in connections.tdb */ - if ( decrement_pid_count ) - decrement_smbd_process_count(); + decrement_smbd_process_count(); cmd = lp_panic_action(); if (cmd && *cmd) { @@ -1586,9 +1580,27 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count ) DEBUG(0, ("smb_panic(): action returned status %d\n", WEXITSTATUS(result))); } - DEBUG(0,("PANIC: %s\n", why)); + dump_core(); +} + +/******************************************************************* + Print a backtrace of the stack to the debug log. This function + DELIBERATELY LEAKS MEMORY. The expectation is that you should + exit shortly after calling it. +********************************************************************/ + +#ifdef HAVE_LIBEXC_H +#include <libexc.h> +#endif + +void log_stack_trace(void) +{ #ifdef HAVE_BACKTRACE_SYMBOLS + void *backtrace_stack[BACKTRACE_STACK_SIZE]; + size_t backtrace_size; + char **backtrace_strings; + /* get the backtrace (stack frames) */ backtrace_size = backtrace(backtrace_stack,BACKTRACE_STACK_SIZE); backtrace_strings = backtrace_symbols(backtrace_stack, backtrace_size); @@ -1607,16 +1619,14 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count ) #elif HAVE_LIBEXC -#define NAMESIZE 32 /* Arbitrary */ - /* The IRIX libexc library provides an API for unwinding the stack. See * libexc(3) for details. Apparantly trace_back_stack leaks memory, but * since we are about to abort anyway, it hardly matters. - * - * Note that if we paniced due to a SIGSEGV or SIGBUS (or similar) this - * will fail with a nasty message upon failing to open the /proc entry. */ { + +#define NAMESIZE 32 /* Arbitrary */ + __uint64_t addrs[BACKTRACE_STACK_SIZE]; char * names[BACKTRACE_STACK_SIZE]; char namebuf[BACKTRACE_STACK_SIZE * NAMESIZE]; @@ -1646,24 +1656,9 @@ static void smb_panic2(const char *why, BOOL decrement_pid_count ) } } #undef NAMESIZE +#else + DEBUG(0, ("unable to produce a stack trace on this platform\n")); #endif - - dbgflush(); -#ifdef SIGABRT - CatchSignal(SIGABRT,SIGNAL_CAST SIG_DFL); -#endif - abort(); -} - -/******************************************************************* - wrapper for smb_panic2() -********************************************************************/ - - void smb_panic( const char *why ) -{ - smb_panic2( why, True ); - /* Notreached. */ - abort(); } /******************************************************************* |