diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2010-02-24 10:53:05 +1030 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2010-02-24 10:53:05 +1030 |
commit | ececeffd85db1b27c07cdf91a921fd203006daf6 (patch) | |
tree | 6eed31503cfe5af4c348bb00d4db8cc92cf9b7e7 /lib | |
parent | 8c3fda4318adc71899bc41486d5616da3a91a688 (diff) | |
download | samba-ececeffd85db1b27c07cdf91a921fd203006daf6.tar.gz samba-ececeffd85db1b27c07cdf91a921fd203006daf6.tar.bz2 samba-ececeffd85db1b27c07cdf91a921fd203006daf6.zip |
tdb: add -k option to tdbtorture
To test the case of death of a process during transaction commit, add
a -k (kill random) option to tdbtorture. The easiest way to do this
is to make every worker a child (unless there's only one child), which
is why this patch is bigger than you might expect.
Using -k without -t (always transactions) you expect corruption, though
it doesn't happen every time. With -t, we currently get corruption but
the next patch fixes that.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/tdb/tools/tdbtorture.c | 199 |
1 files changed, 142 insertions, 57 deletions
diff --git a/lib/tdb/tools/tdbtorture.c b/lib/tdb/tools/tdbtorture.c index b0221a2503..5ee6991825 100644 --- a/lib/tdb/tools/tdbtorture.c +++ b/lib/tdb/tools/tdbtorture.c @@ -30,6 +30,10 @@ static struct tdb_context *db; static int in_transaction; static int error_count; static int always_transaction = 0; +static int hash_size = 2; +static int loopnum; +static int count_pipe; +static struct tdb_logging_context log_ctx; #ifdef PRINTF_ATTRIBUTE static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4); @@ -48,8 +52,9 @@ static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const c va_end(ap); fflush(stdout); #if 0 - { + if (level != TDB_DEBUG_TRACE) { char *ptr; + signal(SIGUSR1, SIG_IGN); asprintf(&ptr,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid()); system(ptr); free(ptr); @@ -211,24 +216,74 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, static void usage(void) { - printf("Usage: tdbtorture [-t] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); + printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n"); exit(0); } - int main(int argc, char * const *argv) +static void send_count_and_suicide(int sig) +{ + /* This ensures our successor can continue where we left off. */ + write(count_pipe, &loopnum, sizeof(loopnum)); + /* This gives a unique signature. */ + kill(getpid(), SIGUSR2); +} + +static int run_child(int i, int seed, unsigned num_loops, unsigned start) +{ + db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT, + O_RDWR | O_CREAT, 0600, &log_ctx, NULL); + if (!db) { + fatal("db open failed"); + } + + srand(seed + i); + srandom(seed + i); + + /* Set global, then we're ready to handle being killed. */ + loopnum = start; + signal(SIGUSR1, send_count_and_suicide); + + for (;loopnum<num_loops && error_count == 0;loopnum++) { + addrec_db(); + } + + if (error_count == 0) { + tdb_traverse_read(db, NULL, NULL); + if (always_transaction) { + while (in_transaction) { + tdb_transaction_cancel(db); + in_transaction--; + } + if (tdb_transaction_start(db) != 0) + fatal("tdb_transaction_start failed"); + } + tdb_traverse(db, traverse_fn, NULL); + tdb_traverse(db, traverse_fn, NULL); + if (always_transaction) { + if (tdb_transaction_commit(db) != 0) + fatal("tdb_transaction_commit failed"); + } + } + + tdb_close(db); + + return (error_count < 100 ? error_count : 100); +} + +int main(int argc, char * const *argv) { int i, seed = -1; - int num_procs = 3; int num_loops = 5000; - int hash_size = 2; - int c; + int num_procs = 3; + int c, pfds[2]; extern char *optarg; pid_t *pids; + int kill_random = 0; + int *done; - struct tdb_logging_context log_ctx; log_ctx.log_fn = tdb_log; - while ((c = getopt(argc, argv, "n:l:s:H:th")) != -1) { + while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) { switch (c) { case 'n': num_procs = strtol(optarg, NULL, 0); @@ -245,6 +300,9 @@ static void usage(void) case 't': always_transaction = 1; break; + case 'k': + kill_random = 1; + break; default: usage(); } @@ -252,93 +310,120 @@ static void usage(void) unlink("torture.tdb"); - pids = (pid_t *)calloc(sizeof(pid_t), num_procs); - pids[0] = getpid(); - - for (i=0;i<num_procs-1;i++) { - if ((pids[i+1]=fork()) == 0) break; - } - - db = tdb_open_ex("torture.tdb", hash_size, TDB_CLEAR_IF_FIRST, - O_RDWR | O_CREAT, 0600, &log_ctx, NULL); - if (!db) { - fatal("db open failed"); - } - if (seed == -1) { seed = (getpid() + time(NULL)) & 0x7FFFFFFF; } - if (i == 0) { - printf("testing with %d processes, %d loops, %d hash_size, seed=%d%s\n", - num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : ""); + if (num_procs == 1 && !kill_random) { + /* Don't fork for this case, makes debugging easier. */ + error_count = run_child(0, seed, num_loops, 0); + goto done; } - srand(seed + i); - srandom(seed + i); + pids = (pid_t *)calloc(sizeof(pid_t), num_procs); + done = (int *)calloc(sizeof(int), num_procs); - for (i=0;i<num_loops && error_count == 0;i++) { - addrec_db(); + if (pipe(pfds) != 0) { + perror("Creating pipe"); + exit(1); } - - if (error_count == 0) { - tdb_traverse_read(db, NULL, NULL); - if (always_transaction) { - while (in_transaction) { - tdb_transaction_cancel(db); - in_transaction--; + count_pipe = pfds[1]; + + for (i=0;i<num_procs;i++) { + if ((pids[i]=fork()) == 0) { + close(pfds[0]); + if (i == 0) { + printf("testing with %d processes, %d loops, %d hash_size, seed=%d%s\n", + num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : ""); } - if (tdb_transaction_start(db) != 0) - fatal("tdb_transaction_start failed"); - } - tdb_traverse(db, traverse_fn, NULL); - tdb_traverse(db, traverse_fn, NULL); - if (always_transaction) { - if (tdb_transaction_commit(db) != 0) - fatal("tdb_transaction_commit failed"); + exit(run_child(i, seed, num_loops, 0)); } } - tdb_close(db); - - if (getpid() != pids[0]) { - return error_count; - } - - for (i=1;i<num_procs;i++) { + while (num_procs) { int status, j; pid_t pid; + if (error_count != 0) { /* try and stop the test on any failure */ - for (j=1;j<num_procs;j++) { + for (j=0;j<num_procs;j++) { if (pids[j] != 0) { kill(pids[j], SIGTERM); } } } - pid = waitpid(-1, &status, 0); + + pid = waitpid(-1, &status, kill_random ? WNOHANG : 0); + if (pid == 0) { + struct timespec ts; + + /* Sleep for 1/10 second. */ + ts.tv_sec = 0; + ts.tv_nsec = 100000000; + nanosleep(&ts, NULL); + + /* Kill someone. */ + kill(pids[random() % num_procs], SIGUSR1); + continue; + } + if (pid == -1) { perror("failed to wait for child\n"); exit(1); } - for (j=1;j<num_procs;j++) { + + for (j=0;j<num_procs;j++) { if (pids[j] == pid) break; } if (j == num_procs) { printf("unknown child %d exited!?\n", (int)pid); exit(1); } - if (WEXITSTATUS(status) != 0) { - printf("child %d exited with status %d\n", - (int)pid, WEXITSTATUS(status)); + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) == SIGUSR2 + || WTERMSIG(status) == SIGUSR1) { + /* SIGUSR2 means they wrote to pipe. */ + if (WTERMSIG(status) == SIGUSR2) { + read(pfds[0], &done[j], + sizeof(done[j])); + } + pids[j] = fork(); + if (pids[j] == 0) + exit(run_child(j, seed, num_loops, + done[j])); + printf("Restarting child %i for %u-%u\n", + j, done[j], num_loops); + continue; + } + printf("child %d exited with signal %d\n", + (int)pid, WTERMSIG(status)); error_count++; + } else { + if (WEXITSTATUS(status) != 0) { + printf("child %d exited with status %d\n", + (int)pid, WEXITSTATUS(status)); + error_count++; + } } - pids[j] = 0; + memmove(&pids[j], &pids[j+1], + (num_procs - j - 1)*sizeof(pids[0])); + num_procs--; } free(pids); +done: if (error_count == 0) { + db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT, + O_RDWR, 0, &log_ctx, NULL); + if (!db) { + fatal("db open failed"); + } + if (tdb_check(db, NULL, NULL) == -1) { + printf("db check failed"); + exit(1); + } + tdb_close(db); printf("OK\n"); } |