/*
* The main pounder process controller and scheduler program.
* Author: Darrick Wong <djwong@us.ibm.com>
*/
/*
* Copyright (C) 2003-2006 IBM
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#include <errno.h>
#include <signal.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdarg.h>
#include <sys/time.h>
#include <time.h>
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include "proclist.h"
#include "debug.h"
// List of subprocesses to wait upon
struct proclist_t wait_ons = { NULL };
struct proclist_t daemons = { NULL };
static int is_leader = 0;
static char *pidfile = "";
static inline int is_executable(const char *fname);
static inline int is_directory(const char *fname);
static inline int test_filter(const struct dirent *p);
static inline int test_sort(const struct dirent **a, const struct dirent **b);
static int wait_for_pids(void);
static void wait_for_daemons(void);
static void note_process(pid_t pid, char *name);
static void note_daemon(pid_t pid, char *name);
static void kill_tests(void);
static void kill_daemons(void);
static int process_dir(const char *fname);
static pid_t spawn_test(char *fname);
static void note_child(pid_t pid, char *fname, char type);
static int child_finished(const char *name, int stat);
static char *progname;
#define TEST_PATH_LEN 512
#define TEST_FORK_WAIT 100
/**
* Kill everything upon ^C.
*/
static void jump_out(int signum)
{
pounder_fprintf(stdout, "Control-C received; aborting!\n");
//unlink("pounder_pgrp");
kill_tests();
kill_daemons();
if (is_leader) {
unlink(pidfile);
}
exit(0);
}
/**
* Kills tests launched from within.
*/
static void kill_tests(void)
{
struct proclist_item_t *curr;
curr = wait_ons.head;
while (curr != NULL) {
kill(-curr->pid, SIGTERM);
curr = curr->next;
}
}
/**
* Kills daemons launched from within.
*/
static void kill_daemons(void)
{
struct proclist_item_t *curr;
curr = daemons.head;
while (curr != NULL) {
kill(-curr->pid, SIGTERM);
curr = curr->next;
}
}
/**
* Record the pounder leader's PID in a file.
*/
static void record_pid(void)
{
FILE *fp;
pidfile = getenv("POUNDER_PIDFILE");
if (pidfile == NULL) {
pidfile = "pounder.pid";
}
fp = fopen(pidfile, "w");
if (fp == NULL) {
perror(pidfile);
}
fprintf(fp, "%d", getpid());
fclose(fp);
}
/**
* Main program. Returns 1 if all programs run successfully, 0 if
* something failed and -1 if there was an error running programs.
*/
int main(int argc, char *argv[])
{
int retcode;
struct sigaction zig;
pid_t pid;
char *c;
/* Check parameters */
if (argc < 2) {
fprintf(stderr, "Usage: %s test_prog\n", argv[0]);
return 1;
}
if (argc > 2 && strcmp(argv[2], "--leader") == 0) {
pounder_fprintf(stdout,
"Logging this test output to %s/POUNDERLOG.\n",
getenv("POUNDER_LOGDIR"));
is_leader = 1;
record_pid();
}
progname = argv[0];
/* Set up signals */
memset(&zig, 0x00, sizeof(zig));
zig.sa_handler = jump_out;
sigaction(SIGHUP, &zig, NULL);
sigaction(SIGINT, &zig, NULL);
sigaction(SIGTERM, &zig, NULL);
if (is_directory(argv[1])) {
retcode = process_dir(argv[1]);
} else {
if (is_executable(argv[1])) {
c = rindex(argv[1], '/');
c++;
// Start the test
pid = spawn_test(argv[1]);
if (pid < 0) {
perror("fork");
retcode = -1;
goto out;
}
// Track the test
note_process(pid, argv[1]);
if (wait_for_pids() == 0) {
retcode = 1;
} else {
retcode = 0;
}
} else {
pounder_fprintf(stderr,
"%s: Not a directory or a test.\n",
argv[1]);
retcode = -1;
}
}
out:
kill_daemons();
wait_for_daemons();
if (is_leader) {
if (retcode == 0) {
pounder_fprintf(stdout, "%s: %s.\n", argv[1], pass_msg);
} else if (retcode < 0 || retcode == 255) {
pounder_fprintf(stdout, "%s: %s with code %d.\n",
argv[1], abort_msg, retcode);
} else {
pounder_fprintf(stdout, "%s: %s with code %d.\n",
argv[1], fail_msg, retcode);
}
unlink(pidfile);
}
exit(retcode);
}
/**
* Helper function to determine if a file is executable.
* Returns 1 if yes, 0 if no and -1 if error.
*/
static inline int is_executable(const char *fname)
{
struct stat tmp;
if (stat(fname, &tmp) < 0) {
return -1;
}
if (geteuid() == 0) {
return 1;
} else if (geteuid() == tmp.st_uid) {
return tmp.st_mode & S_IXUSR;
} else if (getegid() == tmp.st_gid) {
return tmp.st_mode & S_IXGRP;
} else {
return tmp.st_mode & S_IXOTH;
}
}
/**
* Helper function to determine if a file is a directory.
* Returns 1 if yes, 0 if no and -1 if error.
*/
static inline int is_directory(const char *fname)
{
struct stat tmp;
if (stat(fname, &tmp) < 0) {
return 0;
}
return S_ISDIR(tmp.st_mode);
}
/**
* Returns 1 if the directory entry's filename fits the test name pattern.
*/
static inline int test_filter(const struct dirent *p)
{
return ((p->d_name[0] == 'T' || p->d_name[0] == 'D')
&& isdigit(p->d_name[1]) && isdigit(p->d_name[2]));
}
/**
* Simple routine to compare two tests names such that lower number/name pairs
* are considered "lesser" values.
*/
//static inline int test_sort(const struct dirent **a, const struct dirent **b) {
static inline int test_sort(const struct dirent **a, const struct dirent **b)
{
return strcmp(&(*b)->d_name[1], &(*a)->d_name[1]);
}
/**
* Takes the wait() status integer and prints a log message.
* Returns 1 if there was a failure.
*/
static int child_finished(const char *name, int stat)
{
int x;
// did we sig-exit?
if (WIFSIGNALED(stat)) {
pounder_fprintf(stdout, "%s: %s on signal %d.\n",
name, fail_msg, WTERMSIG(stat));
return 1;
} else {
x = WEXITSTATUS(stat);
if (x == 0) {
pounder_fprintf(stdout, "%s: %s.\n", name, pass_msg);
return 0;
} else if (x < 0 || x == 255) {
pounder_fprintf(stdout, "%s: %s with code %d.\n",
name, abort_msg, x);
return 1;
// FIXME: add test to blacklist
} else {
pounder_fprintf(stdout, "%s: %s with code %d.\n",
name, fail_msg, x);
return 1;
}
}
}
/**
* Wait for some number of PIDs. If any of them return nonzero, we
* assume that there was some kind of failure and return 0. Otherwise,
* we return 1 to indicate success.
*/
static int wait_for_pids(void)
{
struct proclist_item_t *curr;
int i, stat, res, nprocs;
pid_t pid;
res = 1;
// figure out how many times we have to wait...
curr = wait_ons.head;
nprocs = 0;
while (curr != NULL) {
nprocs++;
curr = curr->next;
}
// now wait for children.
for (i = 0; i < nprocs;) {
pid = wait(&stat);
if (pid < 0) {
perror("wait");
return 0;
}
// go find the child
curr = wait_ons.head;
while (curr != NULL) {
if (curr->pid == pid) {
res =
(child_finished(curr->name, stat) ? 0 :
res);
// one less pid to wait for
i++;
// stop observing
remove_from_proclist(&wait_ons, curr);
free(curr->name);
free(curr);
break;
}
curr = curr->next;
}
curr = daemons.head;
while (curr != NULL) {
if (curr->pid == pid) {
child_finished(curr->name, stat);
remove_from_proclist(&daemons, curr);
free(curr->name);
free(curr);
break;
}
curr = curr->next;
}
}
return res;
}
/**
* Wait for daemons to finish. This function does NOT wait for wait_ons.
*/
static void wait_for_daemons(void)
{
struct proclist_item_t *curr;
int i, stat, res, nprocs;
pid_t pid;
res = 1;
// figure out how many times we have to wait...
curr = daemons.head;
nprocs = 0;
while (curr != NULL) {
nprocs++;
curr = curr->next;
}
// now wait for daemons.
for (i = 0; i < nprocs;) {
pid = wait(&stat);
if (pid < 0) {
perror("wait");
if (errno == ECHILD) {
return;
}
}
curr = daemons.head;
while (curr != NULL) {
if (curr->pid == pid) {
child_finished(curr->name, stat);
i++;
remove_from_proclist(&daemons, curr);
free(curr->name);
free(curr);
break;
}
curr = curr->next;
}
}
}
/**
* Creates a record of processes that we want to watch for.
*/
static void note_process(pid_t pid, char *name)
{
struct proclist_item_t *it;
it = calloc(1, sizeof(struct proclist_item_t));
if (it == NULL) {
perror("malloc proclist_item_t");
// XXX: Maybe we should just waitpid?
return;
}
it->pid = pid;
it->name = calloc(strlen(name) + 1, sizeof(char));
if (it->name == NULL) {
perror("malloc procitem name");
// XXX: Maybe we should just waitpid?
return;
}
strcpy(it->name, name);
add_to_proclist(&wait_ons, it);
}
/**
* Creates a record of daemons that should be killed on exit.
*/
static void note_daemon(pid_t pid, char *name)
{
struct proclist_item_t *it;
it = calloc(1, sizeof(struct proclist_item_t));
if (it == NULL) {
perror("malloc proclist_item_t");
// XXX: what do we do here?
return;
}
it->pid = pid;
it->name = calloc(strlen(name) + 1, sizeof(char));
if (it->name == NULL) {
perror("malloc procitem name");
// XXX: what do we do here?
return;
}
strcpy(it->name, name);
add_to_proclist(&daemons, it);
}
/**
* Starts a test, with the stdin/out/err fd's redirected to logs.
* The 'fname' parameter should be a relative path from $POUNDER_HOME.
*/
static pid_t spawn_test(char *fname)
{
pid_t pid;
int fd, tmp;
char buf[TEST_PATH_LEN], buf2[TEST_PATH_LEN];
char *last_slash;
pid = fork();
if (pid == 0) {
if (setpgrp() < 0) {
perror("setpgid");
}
pounder_fprintf(stdout, "%s: %s test.\n", fname, start_msg);
// reroute stdin
fd = open("/dev/null", O_RDWR);
if (fd < 0) {
perror("/dev/null");
exit(-1);
}
close(0);
tmp = dup2(fd, 0);
if (tmp < 0) {
perror("dup(/dev/null)");
exit(-1);
}
close(fd);
// generate log name-- '/' -> '-'.
snprintf(buf2, TEST_PATH_LEN, "%s|%s",
getenv("POUNDER_LOGDIR"), fname);
fd = strlen(buf2);
for (tmp = (index(buf2, '|') - buf2); tmp < fd; tmp++) {
if (buf2[tmp] == '/') {
buf2[tmp] = '-';
} else if (buf2[tmp] == '|') {
buf2[tmp] = '/';
}
}
// make it so that we have a way to get back to the
// original console.
tmp = dup2(1, 3);
if (tmp < 0) {
perror("dup(stdout, 3)");
exit(-1);
}
// reroute stdout/stderr
fd = open(buf2, O_RDWR | O_CREAT | O_TRUNC | O_SYNC,
S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
if (fd < 0) {
perror(buf2);
exit(-1);
}
close(1);
tmp = dup2(fd, 1);
if (tmp < 0) {
perror("dup(log, 1)");
exit(-1);
}
close(2);
tmp = dup2(fd, 2);
if (tmp < 0) {
perror("dup(log, 2)");
exit(-1);
}
close(fd);
// let us construct the absolute pathname of the test.
// first find the current directory
if (getcwd(buf, TEST_PATH_LEN) == NULL) {
perror("getcwd");
exit(-1);
}
// then splice cwd + fname
snprintf(buf2, TEST_PATH_LEN, "%s/%s", buf, fname);
// find the location of the last slash
last_slash = rindex(buf2, '/');
if (last_slash != NULL) {
// copy the filename part into a new buffer
snprintf(buf, TEST_PATH_LEN, "./%s", last_slash + 1);
// truncate at the last slash
*last_slash = 0;
// and chdir
if (chdir(buf2) != 0) {
perror(buf2);
exit(-1);
}
// reassign variables
fname = buf;
}
// spawn the process
execlp(fname, fname, NULL);
// If we get here, we can't run the test.
perror(fname);
exit(-1);
}
tmp = errno;
/* yield for a short while, so that the test has
* a little bit of time to run.
*/
usleep(TEST_FORK_WAIT);
errno = tmp;
return pid;
}
/**
* Adds a child process to either the running-test or running-daemon
* list.
*/
static void note_child(pid_t pid, char *fname, char type)
{
if (type == 'T') {
note_process(pid, fname);
} else if (type == 'D') {
note_daemon(pid, fname);
} else {
pounder_fprintf(stdout,
"Don't know what to do with child `%s' of type %c.\n",
fname, type);
}
}
/**
* Process a directory--for each entry in a directory, execute files or spawn
* a new copy of ourself on the new directory. Process execution is subject to
* these rules:
*
* - Test files that start with the same number '00foo' and '00bar' are allowed
* to run simultaneously.
* - Test files are run in order of number and then name.
*
* If a the fork fails, bit 1 of the return code is set. If a
* program runs but fails, bit 2 is set.
*/
static int process_dir(const char *fname)
{
struct dirent **namelist;
int i, result = 0;
char buf[TEST_PATH_LEN];
int curr_level_num = -1;
int test_level_num;
pid_t pid;
int children_ok = 1;
pounder_fprintf(stdout, "%s: Entering directory.\n", fname);
i = scandir(fname, &namelist, test_filter,
(int (*)(const void *, const void *))test_sort);
if (i < 0) {
perror(fname);
return -1;
}
while (i--) {
/* determine level number */
test_level_num = ((namelist[i]->d_name[1] - '0') * 10)
+ (namelist[i]->d_name[2] - '0');
if (curr_level_num == -1) {
curr_level_num = test_level_num;
}
if (curr_level_num != test_level_num) {
children_ok &= wait_for_pids();
curr_level_num = test_level_num;
}
snprintf(buf, TEST_PATH_LEN, "%s/%s", fname,
namelist[i]->d_name);
if (is_directory(buf)) {
pid = fork();
if (pid == 0) {
if (setpgrp() < 0) {
perror("setpgid");
}
// spawn a new copy of ourself.
execl(progname, progname, buf, NULL);
perror(progname);
exit(-1);
}
} else {
pid = spawn_test(buf);
}
if (pid < 0) {
perror("fork");
result |= 1;
free(namelist[i]);
continue;
}
note_child(pid, buf, namelist[i]->d_name[0]);
free(namelist[i]);
}
free(namelist);
/* wait for remaining runners */
children_ok &= wait_for_pids();
if (children_ok == 0) {
result |= 2;
}
pounder_fprintf(stdout, "%s: Leaving directory.\n", fname);
return result;
}