/******************************************************************************/ /* */ /* Copyright (s) Ying Han <yinghan@google.com>, 2009 */ /* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */ /* the GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* */ /******************************************************************************/ /* ftruncate-mmap: pages are lost after writing to mmaped file, We triggered the failure during some internal experiment with ftruncate/mmap/write/read sequence. And we found that some pages are "lost" after writing to the mmaped file. which in the following test cases (count >= 0). First we deployed the test cases into group of machines and see about >20% failure rate on average. Then, I did couple of experiment to try to reproduce it on a single machine. what i found is that: 1. add a fsync after write the file, i can not reproduce this issue. 2. add memory pressure(mmap/mlock) while run the test in infinite loop, the failure is reproduced quickly. ( background flushing ? ) The "bad pages" count differs each time from one digit to 4,5 digit for 128M ftruncated file. and what i also found that the bad page number are contiguous for each segment which total bad pages container several segments. ext "1-4, 9-20, 48-50" ( batch flushing ? ) (The failure is reproduced based on 2.6.29-rc8, also happened on 2.6.18 kernel. . Here is the simple test case to reproduce it with memory pressure. ) */ #include <sys/mman.h> #include <sys/types.h> #include <fcntl.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <signal.h> #include "test.h" /* Extern Global Variables */ extern int tst_count; /* Global Variables */ char *TCID = "mmap-corruption01"; /* test program identifier. */ int TST_TOTAL = 1; /* total number of tests in this file. */ long kMemSize = 128 << 20; int kPageSize = 4096; char *usage = "-h hours -m minutes -s secs\n"; int anyfail(void) { tst_brkm(TFAIL, tst_rmdir, "Test failed\n"); } int main(int argc, char **argv) { char *progname; int count = 0; int i, c; char *fname = "test.mmap-corruption"; char *mem; unsigned long alarmtime = 0; struct sigaction sa; void finish(int sig); progname = *argv; while ((c = getopt(argc, argv, ":h:m:s:")) != -1) { switch (c) { case 'h': alarmtime += atoi(optarg) * 60 * 60; break; case 'm': alarmtime += atoi(optarg) * 60; break; case 's': alarmtime += atoi(optarg); break; default: (void)fprintf(stderr, "usage: %s %s\n", progname, usage); anyfail(); } } /* * Plan for death by signal. User may have specified * a time limit, in which case set an alarm and catch SIGALRM. * Also catch and cleanup with SIGINT, SIGQUIT, and SIGTERM. */ sa.sa_handler = finish; sa.sa_flags = 0; if (sigemptyset(&sa.sa_mask)) { perror("sigempty error"); exit(1); } if (sigaction(SIGINT, &sa, 0) == -1) { perror("sigaction error SIGINT"); exit(1); } if (alarmtime) { if (sigaction(SIGALRM, &sa, 0) == -1) { perror("sigaction error"); exit(1); } (void)alarm(alarmtime); printf("mmap-corruption will run for=> %ld, seconds\n", alarmtime); } else { //Run for 5 secs only if (sigaction(SIGALRM, &sa, 0) == -1) { perror("sigaction error"); exit(1); } (void)alarm(5); printf("mmap-corruption will run for=> 5, seconds\n"); } /* If we get a SIGQUIT or SIGTERM, clean up and exit immediately. */ sa.sa_handler = finish; if (sigaction(SIGQUIT, &sa, 0) == -1) { perror("sigaction error SIGQUIT"); exit(1); } if (sigaction(SIGTERM, &sa, 0) == -1) { perror("sigaction error SIGTERM"); exit(1); } tst_tmpdir(); while (1) { unlink(fname); int fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600); ftruncate(fd, kMemSize); mem = mmap(0, kMemSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); // Fill the memory with 1s. memset(mem, 1, kMemSize); for (i = 0; i < kMemSize; i++) { int byte_good = mem[i] != 0; if (!byte_good && ((i % kPageSize) == 0)) { //printf("%d ", i / kPageSize); count++; } } munmap(mem, kMemSize); close(fd); unlink(fname); if (count > 0) { printf("Running %d bad page\n", count); return 1; } count = 0; } return 0; } void finish(int sig) { printf("mmap-corruption PASSED\n"); exit(0); }