#define _XOPEN_SOURCE 500 /* pwrite */ #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <libaio.h> #include <errno.h> #include <time.h> #include <sys/types.h> #include <sys/wait.h> /* * DIO invalidates the read cache after it writes. At one point it tried to * return EIO if this failed. When called from AIO, though, this EIO return * would clobber EIOCBQUEUED and cause fs/aio.c and fs/direct-io.c to complete * an iocb twice. This typically references freed memory from an interrupt * handler and oopses. * * This test hits the race after at most two minutes on a single spindle. It * spins performing large dio writes. It also spins racing buffered writes. * It assumes it's on ext3 using ordered writes. The ordered write bhs can be * pinned by jbd as a transaction commits. If invalidate_inode_pages2_range() * hits pages backed by those buffers ->releasepage will fail and it'll try to * return -EIO. */ #ifndef O_DIRECT #define O_DIRECT 040000 /* direct disk access hint */ #endif #define GINORMOUS (32 * 1024 * 1024) /* This test never survived to 180 seconds on a single spindle */ #define SECONDS 200 static unsigned char buf[GINORMOUS] __attribute((aligned (512))); #define fail(fmt , args...) do {\ printf(fmt , ##args); \ exit(1); \ } while (0) void spin_dio(int fd) { io_context_t ctx; struct iocb iocb; struct iocb *iocbs[1] = { &iocb }; struct io_event event; int ret; io_prep_pwrite(&iocb, fd, buf, GINORMOUS, 0); ret = io_queue_init(1, &ctx); if (ret) fail("io_queue_init returned %d", ret); while (1) { ret = io_submit(ctx, 1, iocbs); if (ret != 1) fail("io_submit returned %d instead of 1", ret); ret = io_getevents(ctx, 1, 1, &event, NULL); if (ret != 1) fail("io_getevents returned %d instead of 1", ret); if (event.res == -EIO) { printf("invalidation returned -EIO, OK\n"); exit(0); } if (event.res != GINORMOUS) fail("event res %ld\n", event.res); } } void spin_buffered(int fd) { int ret; while (1) { ret = pwrite(fd, buf, GINORMOUS, 0); if (ret != GINORMOUS) fail("buffered write returned %d", ret); } } static void alarm_handler(int signum) { } int main(int argc, char **argv) { pid_t buffered_pid; pid_t dio_pid; pid_t pid; int fd; int fd2; int status; if (argc != 2) fail("only arg should be file name"); fd = open(argv[1], O_DIRECT|O_CREAT|O_RDWR, 0644); if (fd < 0) fail("open dio failed: %d\n", errno); fd2 = open(argv[1], O_RDWR, 0644); if (fd < 0) fail("open failed: %d\n", errno); buffered_pid = fork(); if (buffered_pid < 0) fail("fork failed: %d\n", errno); if (buffered_pid == 0) { spin_buffered(fd2); exit(0); } dio_pid = fork(); if (dio_pid < 0) { kill(buffered_pid, SIGKILL); fail("fork failed: %d\n", errno); } if (dio_pid == 0) { spin_dio(fd); exit(0); } signal(SIGALRM, alarm_handler); alarm(SECONDS); pid = wait(&status); if (pid < 0 && errno == EINTR) { /* if we timed out then we're done */ kill(buffered_pid, SIGKILL); kill(dio_pid, SIGKILL); printf("ran for %d seconds without error, passing\n", SECONDS); exit(0); } if (pid == dio_pid) kill(buffered_pid, SIGKILL); else kill(dio_pid, SIGKILL); /* * pass on the child's pass/fail return code or fail if the child * didn't exit cleanly. */ exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1); }