/* * Copyright (c) 2016 Red Hat, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * DESCRIPTION * * Page fault occurs in spite that madvise(WILLNEED) system call is called * to prefetch the page. This issue is reproduced by running a program * which sequentially accesses to a shared memory and calls madvise(WILLNEED) * to the next page on a page fault. * * This bug is present in all RHEL7 versions. It looks like this was fixed in * mainline kernel > v3.15 by the following patch: * * commit 55231e5c898c5c03c14194001e349f40f59bd300 * Author: Johannes Weiner <hannes@cmpxchg.org> * Date: Thu May 22 11:54:17 2014 -0700 * * mm: madvise: fix MADV_WILLNEED on shmem swapouts */ #include <errno.h> #include <stdio.h> #include <sys/mount.h> #include <sys/sysinfo.h> #include "tst_test.h" #define CHUNK_SZ (400*1024*1024L) #define CHUNK_PAGES (CHUNK_SZ / pg_sz) #define PASS_THRESHOLD (CHUNK_SZ / 4) #define MNT_NAME "memory" #define GROUP_NAME "madvise06" static const char drop_caches_fname[] = "/proc/sys/vm/drop_caches"; static int pg_sz; static void check_path(const char *path) { if (access(path, R_OK | W_OK)) tst_brk(TCONF, "file needed: %s\n", path); } static void setup(void) { struct sysinfo sys_buf_start; pg_sz = getpagesize(); check_path(drop_caches_fname); tst_res(TINFO, "dropping caches"); sync(); SAFE_FILE_PRINTF(drop_caches_fname, "3"); sysinfo(&sys_buf_start); if (sys_buf_start.freeram < 2 * CHUNK_SZ) { tst_brk(TCONF, "System RAM is too small (%li bytes needed)", 2 * CHUNK_SZ); } if (sys_buf_start.freeswap < 2 * CHUNK_SZ) { tst_brk(TCONF, "System swap is too small (%li bytes needed)", 2 * CHUNK_SZ); } SAFE_MKDIR(MNT_NAME, 0700); if (mount("memory", MNT_NAME, "cgroup", 0, "memory") == -1) { if (errno == ENODEV || errno == ENOENT) tst_brk(TCONF, "memory cgroup needed"); } SAFE_MKDIR(MNT_NAME"/"GROUP_NAME, 0700); check_path("/proc/self/oom_score_adj"); check_path(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes"); check_path(MNT_NAME"/"GROUP_NAME"/memory.swappiness"); check_path(MNT_NAME"/"GROUP_NAME"/tasks"); SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000); SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes", "%ld\n", PASS_THRESHOLD); SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.swappiness", "60"); SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/tasks", "%d\n", getpid()); } static void cleanup(void) { if (!access(MNT_NAME"/tasks", F_OK)) { SAFE_FILE_PRINTF(MNT_NAME"/tasks", "%d\n", getpid()); SAFE_RMDIR(MNT_NAME"/"GROUP_NAME); SAFE_UMOUNT(MNT_NAME); } } static void dirty_pages(char *ptr, long size) { long i; long pages = size / pg_sz; for (i = 0; i < pages; i++) ptr[i * pg_sz] = 'x'; } static int get_page_fault_num(void) { int pg; SAFE_FILE_SCANF("/proc/self/stat", "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d", &pg); return pg; } static void test_advice_willneed(void) { int loops = 50; char *target; long swapcached_start, swapcached; int page_fault_num_1, page_fault_num_2; target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); dirty_pages(target, CHUNK_SZ); SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", &swapcached_start); tst_res(TINFO, "SwapCached (before madvise): %ld", swapcached_start); TEST(madvise(target, CHUNK_SZ, MADV_WILLNEED)); if (TEST_RETURN == -1) tst_brk(TBROK | TERRNO, "madvise failed"); do { loops--; usleep(100000); SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", &swapcached); } while (swapcached < swapcached_start + PASS_THRESHOLD / 1024 && loops > 0); tst_res(TINFO, "SwapCached (after madvise): %ld", swapcached); if (swapcached > swapcached_start + PASS_THRESHOLD / 1024) { tst_res(TPASS, "Regression test pass"); SAFE_MUNMAP(target, CHUNK_SZ); return; } /* * We may have hit a bug or we just have slow I/O, * try accessing first page. */ page_fault_num_1 = get_page_fault_num(); tst_res(TINFO, "PageFault(madvice / no mem access): %d", page_fault_num_1); target[0] = 'a'; page_fault_num_2 = get_page_fault_num(); tst_res(TINFO, "PageFault(madvice / mem access): %d", page_fault_num_2); if (page_fault_num_1 != page_fault_num_2) tst_res(TFAIL, "Bug has been reproduced"); else tst_res(TPASS, "Regression test pass"); SAFE_MUNMAP(target, CHUNK_SZ); } static struct tst_test test = { .test_all = test_advice_willneed, .setup = setup, .cleanup = cleanup, .min_kver = "3.10.0", .needs_tmpdir = 1, .needs_root = 1, };