/* * The main pounder process controller and scheduler program. * Author: Darrick Wong <djwong@us.ibm.com> */ /* * Copyright (C) 2003-2006 IBM * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include <errno.h> #include <signal.h> #include <sys/wait.h> #include <unistd.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> #include <ctype.h> #include <stdarg.h> #include <sys/time.h> #include <time.h> #include <stdio.h> #include <dirent.h> #include <sys/stat.h> #include "proclist.h" #include "debug.h" // List of subprocesses to wait upon struct proclist_t wait_ons = { NULL }; struct proclist_t daemons = { NULL }; static int is_leader = 0; static char *pidfile = ""; static inline int is_executable(const char *fname); static inline int is_directory(const char *fname); static inline int test_filter(const struct dirent *p); static inline int test_sort(const struct dirent **a, const struct dirent **b); static int wait_for_pids(void); static void wait_for_daemons(void); static void note_process(pid_t pid, char *name); static void note_daemon(pid_t pid, char *name); static void kill_tests(void); static void kill_daemons(void); static int process_dir(const char *fname); static pid_t spawn_test(char *fname); static void note_child(pid_t pid, char *fname, char type); static int child_finished(const char *name, int stat); static char *progname; #define TEST_PATH_LEN 512 #define TEST_FORK_WAIT 100 /** * Kill everything upon ^C. */ static void jump_out(int signum) { pounder_fprintf(stdout, "Control-C received; aborting!\n"); //unlink("pounder_pgrp"); kill_tests(); kill_daemons(); if (is_leader) { unlink(pidfile); } exit(0); } /** * Kills tests launched from within. */ static void kill_tests(void) { struct proclist_item_t *curr; curr = wait_ons.head; while (curr != NULL) { kill(-curr->pid, SIGTERM); curr = curr->next; } } /** * Kills daemons launched from within. */ static void kill_daemons(void) { struct proclist_item_t *curr; curr = daemons.head; while (curr != NULL) { kill(-curr->pid, SIGTERM); curr = curr->next; } } /** * Record the pounder leader's PID in a file. */ static void record_pid(void) { FILE *fp; pidfile = getenv("POUNDER_PIDFILE"); if (pidfile == NULL) { pidfile = "pounder.pid"; } fp = fopen(pidfile, "w"); if (fp == NULL) { perror(pidfile); } fprintf(fp, "%d", getpid()); fclose(fp); } /** * Main program. Returns 1 if all programs run successfully, 0 if * something failed and -1 if there was an error running programs. */ int main(int argc, char *argv[]) { int retcode; struct sigaction zig; pid_t pid; char *c; /* Check parameters */ if (argc < 2) { fprintf(stderr, "Usage: %s test_prog\n", argv[0]); return 1; } if (argc > 2 && strcmp(argv[2], "--leader") == 0) { pounder_fprintf(stdout, "Logging this test output to %s/POUNDERLOG.\n", getenv("POUNDER_LOGDIR")); is_leader = 1; record_pid(); } progname = argv[0]; /* Set up signals */ memset(&zig, 0x00, sizeof(zig)); zig.sa_handler = jump_out; sigaction(SIGHUP, &zig, NULL); sigaction(SIGINT, &zig, NULL); sigaction(SIGTERM, &zig, NULL); if (is_directory(argv[1])) { retcode = process_dir(argv[1]); } else { if (is_executable(argv[1])) { c = rindex(argv[1], '/'); c++; // Start the test pid = spawn_test(argv[1]); if (pid < 0) { perror("fork"); retcode = -1; goto out; } // Track the test note_process(pid, argv[1]); if (wait_for_pids() == 0) { retcode = 1; } else { retcode = 0; } } else { pounder_fprintf(stderr, "%s: Not a directory or a test.\n", argv[1]); retcode = -1; } } out: kill_daemons(); wait_for_daemons(); if (is_leader) { if (retcode == 0) { pounder_fprintf(stdout, "%s: %s.\n", argv[1], pass_msg); } else if (retcode < 0 || retcode == 255) { pounder_fprintf(stdout, "%s: %s with code %d.\n", argv[1], abort_msg, retcode); } else { pounder_fprintf(stdout, "%s: %s with code %d.\n", argv[1], fail_msg, retcode); } unlink(pidfile); } exit(retcode); } /** * Helper function to determine if a file is executable. * Returns 1 if yes, 0 if no and -1 if error. */ static inline int is_executable(const char *fname) { struct stat tmp; if (stat(fname, &tmp) < 0) { return -1; } if (geteuid() == 0) { return 1; } else if (geteuid() == tmp.st_uid) { return tmp.st_mode & S_IXUSR; } else if (getegid() == tmp.st_gid) { return tmp.st_mode & S_IXGRP; } else { return tmp.st_mode & S_IXOTH; } } /** * Helper function to determine if a file is a directory. * Returns 1 if yes, 0 if no and -1 if error. */ static inline int is_directory(const char *fname) { struct stat tmp; if (stat(fname, &tmp) < 0) { return 0; } return S_ISDIR(tmp.st_mode); } /** * Returns 1 if the directory entry's filename fits the test name pattern. */ static inline int test_filter(const struct dirent *p) { return ((p->d_name[0] == 'T' || p->d_name[0] == 'D') && isdigit(p->d_name[1]) && isdigit(p->d_name[2])); } /** * Simple routine to compare two tests names such that lower number/name pairs * are considered "lesser" values. */ //static inline int test_sort(const struct dirent **a, const struct dirent **b) { static inline int test_sort(const struct dirent **a, const struct dirent **b) { return strcmp(&(*b)->d_name[1], &(*a)->d_name[1]); } /** * Takes the wait() status integer and prints a log message. * Returns 1 if there was a failure. */ static int child_finished(const char *name, int stat) { int x; // did we sig-exit? if (WIFSIGNALED(stat)) { pounder_fprintf(stdout, "%s: %s on signal %d.\n", name, fail_msg, WTERMSIG(stat)); return 1; } else { x = WEXITSTATUS(stat); if (x == 0) { pounder_fprintf(stdout, "%s: %s.\n", name, pass_msg); return 0; } else if (x < 0 || x == 255) { pounder_fprintf(stdout, "%s: %s with code %d.\n", name, abort_msg, x); return 1; // FIXME: add test to blacklist } else { pounder_fprintf(stdout, "%s: %s with code %d.\n", name, fail_msg, x); return 1; } } } /** * Wait for some number of PIDs. If any of them return nonzero, we * assume that there was some kind of failure and return 0. Otherwise, * we return 1 to indicate success. */ static int wait_for_pids(void) { struct proclist_item_t *curr; int i, stat, res, nprocs; pid_t pid; res = 1; // figure out how many times we have to wait... curr = wait_ons.head; nprocs = 0; while (curr != NULL) { nprocs++; curr = curr->next; } // now wait for children. for (i = 0; i < nprocs;) { pid = wait(&stat); if (pid < 0) { perror("wait"); return 0; } // go find the child curr = wait_ons.head; while (curr != NULL) { if (curr->pid == pid) { res = (child_finished(curr->name, stat) ? 0 : res); // one less pid to wait for i++; // stop observing remove_from_proclist(&wait_ons, curr); free(curr->name); free(curr); break; } curr = curr->next; } curr = daemons.head; while (curr != NULL) { if (curr->pid == pid) { child_finished(curr->name, stat); remove_from_proclist(&daemons, curr); free(curr->name); free(curr); break; } curr = curr->next; } } return res; } /** * Wait for daemons to finish. This function does NOT wait for wait_ons. */ static void wait_for_daemons(void) { struct proclist_item_t *curr; int i, stat, res, nprocs; pid_t pid; res = 1; // figure out how many times we have to wait... curr = daemons.head; nprocs = 0; while (curr != NULL) { nprocs++; curr = curr->next; } // now wait for daemons. for (i = 0; i < nprocs;) { pid = wait(&stat); if (pid < 0) { perror("wait"); if (errno == ECHILD) { return; } } curr = daemons.head; while (curr != NULL) { if (curr->pid == pid) { child_finished(curr->name, stat); i++; remove_from_proclist(&daemons, curr); free(curr->name); free(curr); break; } curr = curr->next; } } } /** * Creates a record of processes that we want to watch for. */ static void note_process(pid_t pid, char *name) { struct proclist_item_t *it; it = calloc(1, sizeof(struct proclist_item_t)); if (it == NULL) { perror("malloc proclist_item_t"); // XXX: Maybe we should just waitpid? return; } it->pid = pid; it->name = calloc(strlen(name) + 1, sizeof(char)); if (it->name == NULL) { perror("malloc procitem name"); // XXX: Maybe we should just waitpid? return; } strcpy(it->name, name); add_to_proclist(&wait_ons, it); } /** * Creates a record of daemons that should be killed on exit. */ static void note_daemon(pid_t pid, char *name) { struct proclist_item_t *it; it = calloc(1, sizeof(struct proclist_item_t)); if (it == NULL) { perror("malloc proclist_item_t"); // XXX: what do we do here? return; } it->pid = pid; it->name = calloc(strlen(name) + 1, sizeof(char)); if (it->name == NULL) { perror("malloc procitem name"); // XXX: what do we do here? return; } strcpy(it->name, name); add_to_proclist(&daemons, it); } /** * Starts a test, with the stdin/out/err fd's redirected to logs. * The 'fname' parameter should be a relative path from $POUNDER_HOME. */ static pid_t spawn_test(char *fname) { pid_t pid; int fd, tmp; char buf[TEST_PATH_LEN], buf2[TEST_PATH_LEN]; char *last_slash; pid = fork(); if (pid == 0) { if (setpgrp() < 0) { perror("setpgid"); } pounder_fprintf(stdout, "%s: %s test.\n", fname, start_msg); // reroute stdin fd = open("/dev/null", O_RDWR); if (fd < 0) { perror("/dev/null"); exit(-1); } close(0); tmp = dup2(fd, 0); if (tmp < 0) { perror("dup(/dev/null)"); exit(-1); } close(fd); // generate log name-- '/' -> '-'. snprintf(buf2, TEST_PATH_LEN, "%s|%s", getenv("POUNDER_LOGDIR"), fname); fd = strlen(buf2); for (tmp = (index(buf2, '|') - buf2); tmp < fd; tmp++) { if (buf2[tmp] == '/') { buf2[tmp] = '-'; } else if (buf2[tmp] == '|') { buf2[tmp] = '/'; } } // make it so that we have a way to get back to the // original console. tmp = dup2(1, 3); if (tmp < 0) { perror("dup(stdout, 3)"); exit(-1); } // reroute stdout/stderr fd = open(buf2, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); if (fd < 0) { perror(buf2); exit(-1); } close(1); tmp = dup2(fd, 1); if (tmp < 0) { perror("dup(log, 1)"); exit(-1); } close(2); tmp = dup2(fd, 2); if (tmp < 0) { perror("dup(log, 2)"); exit(-1); } close(fd); // let us construct the absolute pathname of the test. // first find the current directory if (getcwd(buf, TEST_PATH_LEN) == NULL) { perror("getcwd"); exit(-1); } // then splice cwd + fname snprintf(buf2, TEST_PATH_LEN, "%s/%s", buf, fname); // find the location of the last slash last_slash = rindex(buf2, '/'); if (last_slash != NULL) { // copy the filename part into a new buffer snprintf(buf, TEST_PATH_LEN, "./%s", last_slash + 1); // truncate at the last slash *last_slash = 0; // and chdir if (chdir(buf2) != 0) { perror(buf2); exit(-1); } // reassign variables fname = buf; } // spawn the process execlp(fname, fname, NULL); // If we get here, we can't run the test. perror(fname); exit(-1); } tmp = errno; /* yield for a short while, so that the test has * a little bit of time to run. */ usleep(TEST_FORK_WAIT); errno = tmp; return pid; } /** * Adds a child process to either the running-test or running-daemon * list. */ static void note_child(pid_t pid, char *fname, char type) { if (type == 'T') { note_process(pid, fname); } else if (type == 'D') { note_daemon(pid, fname); } else { pounder_fprintf(stdout, "Don't know what to do with child `%s' of type %c.\n", fname, type); } } /** * Process a directory--for each entry in a directory, execute files or spawn * a new copy of ourself on the new directory. Process execution is subject to * these rules: * * - Test files that start with the same number '00foo' and '00bar' are allowed * to run simultaneously. * - Test files are run in order of number and then name. * * If a the fork fails, bit 1 of the return code is set. If a * program runs but fails, bit 2 is set. */ static int process_dir(const char *fname) { struct dirent **namelist; int i, result = 0; char buf[TEST_PATH_LEN]; int curr_level_num = -1; int test_level_num; pid_t pid; int children_ok = 1; pounder_fprintf(stdout, "%s: Entering directory.\n", fname); i = scandir(fname, &namelist, test_filter, (int (*)(const void *, const void *))test_sort); if (i < 0) { perror(fname); return -1; } while (i--) { /* determine level number */ test_level_num = ((namelist[i]->d_name[1] - '0') * 10) + (namelist[i]->d_name[2] - '0'); if (curr_level_num == -1) { curr_level_num = test_level_num; } if (curr_level_num != test_level_num) { children_ok &= wait_for_pids(); curr_level_num = test_level_num; } snprintf(buf, TEST_PATH_LEN, "%s/%s", fname, namelist[i]->d_name); if (is_directory(buf)) { pid = fork(); if (pid == 0) { if (setpgrp() < 0) { perror("setpgid"); } // spawn a new copy of ourself. execl(progname, progname, buf, NULL); perror(progname); exit(-1); } } else { pid = spawn_test(buf); } if (pid < 0) { perror("fork"); result |= 1; free(namelist[i]); continue; } note_child(pid, buf, namelist[i]->d_name[0]); free(namelist[i]); } free(namelist); /* wait for remaining runners */ children_ok &= wait_for_pids(); if (children_ok == 0) { result |= 2; } pounder_fprintf(stdout, "%s: Leaving directory.\n", fname); return result; }