summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/include/linux/atomic.h22
-rw-r--r--tools/testing/radix-tree/maple.c10
-rw-r--r--tools/testing/selftests/damon/Makefile2
-rw-r--r--tools/testing/selftests/damon/access_memory_even.c1
-rwxr-xr-xtools/testing/selftests/damon/sysfs_no_op_commit_break.py72
-rw-r--r--tools/testing/selftests/kho/init.c13
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh28
-rw-r--r--tools/testing/selftests/kselftest.h4
-rw-r--r--tools/testing/selftests/landlock/audit.h6
-rw-r--r--tools/testing/selftests/landlock/common.h4
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile4
-rw-r--r--tools/testing/selftests/mm/cow.c13
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c16
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c156
-rw-r--r--tools/testing/selftests/mm/mremap_test.c6
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c24
-rw-r--r--tools/testing/selftests/mm/pfnmap.c48
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h3
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c291
-rw-r--r--tools/testing/selftests/mm/rmap.c433
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh14
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c353
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh6
-rw-r--r--tools/testing/selftests/mm/thp_settings.c9
-rw-r--r--tools/testing/selftests/mm/thp_settings.h1
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c11
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c19
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c9
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c13
-rw-r--r--tools/testing/selftests/mm/vm_util.c150
-rw-r--r--tools/testing/selftests/mm/vm_util.h17
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c2
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c65
-rw-r--r--tools/testing/selftests/ublk/utils.h2
-rw-r--r--tools/testing/shared/linux/maple_tree.h6
-rw-r--r--tools/testing/shared/shared.mk6
-rw-r--r--tools/testing/vma/linux/atomic.h17
-rw-r--r--tools/testing/vma/vma_internal.h49
40 files changed, 1585 insertions, 326 deletions
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
index 01907b33537e..50c66ba9ada5 100644
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i);
#define atomic_cmpxchg_release atomic_cmpxchg
#endif /* atomic_cmpxchg_relaxed */
+static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new)
+{
+ int ret, old = *oldp;
+
+ ret = atomic_cmpxchg(ptr, old, new);
+ if (ret != old)
+ *oldp = ret;
+ return ret == old;
+}
+
+static inline bool atomic_inc_unless_negative(atomic_t *v)
+{
+ int c = atomic_read(v);
+
+ do {
+ if (unlikely(c < 0))
+ return false;
+ } while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+ return true;
+}
+
#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 172700fb7784..05714c22994e 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -36327,13 +36327,18 @@ extern void test_kmem_cache_bulk(void);
static inline void check_spanning_store_height(struct maple_tree *mt)
{
int index = 0;
+ int last = 140;
MA_STATE(mas, mt, 0, 0);
mas_lock(&mas);
while (mt_height(mt) != 3) {
mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
mas_set(&mas, ++index);
}
- mas_set_range(&mas, 90, 140);
+
+ if (MAPLE_32BIT)
+ last = 155; /* 32 bit higher branching factor. */
+
+ mas_set_range(&mas, 90, last);
mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
MT_BUG_ON(mt, mas_mt_height(&mas) != 2);
mas_unlock(&mas);
@@ -36428,6 +36433,7 @@ static void check_nomem_writer_race(struct maple_tree *mt)
*/
static inline int check_vma_modification(struct maple_tree *mt)
{
+#if defined(CONFIG_64BIT)
MA_STATE(mas, mt, 0, 0);
mtree_lock(mt);
@@ -36451,6 +36457,8 @@ static inline int check_vma_modification(struct maple_tree *mt)
mas_destroy(&mas);
mtree_unlock(mt);
+#endif
+
return 0;
}
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 9a3499827d4b..2180c328a825 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -5,6 +5,7 @@ TEST_GEN_FILES += access_memory access_memory_even
TEST_FILES = _damon_sysfs.py
TEST_FILES += drgn_dump_damon_status.py
+TEST_FILES += _common.sh
# functionality tests
TEST_PROGS += sysfs.sh
@@ -18,6 +19,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh
TEST_PROGS += sysfs_update_removed_scheme_dir.sh
TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
TEST_PROGS += sysfs_memcg_path_leak.sh
+TEST_PROGS += sysfs_no_op_commit_break.py
EXTRA_CLEAN = __pycache__
diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c
index a9f4e9aaf3a9..93f3a71bcfd4 100644
--- a/tools/testing/selftests/damon/access_memory_even.c
+++ b/tools/testing/selftests/damon/access_memory_even.c
@@ -9,7 +9,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <time.h>
int main(int argc, char *argv[])
{
diff --git a/tools/testing/selftests/damon/sysfs_no_op_commit_break.py b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py
new file mode 100755
index 000000000000..2c65cffe6b54
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import subprocess
+import sys
+
+import _damon_sysfs
+
+def dump_damon_status_dict(pid):
+ try:
+ subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL)
+ except:
+ return None, 'drgn not found'
+ file_dir = os.path.dirname(os.path.abspath(__file__))
+ dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py')
+ rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'],
+ stderr=subprocess.DEVNULL)
+
+ if rc != 0:
+ return None, f'drgn fail: return code({rc})'
+ try:
+ with open('damon_dump_output', 'r') as f:
+ return json.load(f), None
+ except Exception as e:
+ return None, 'json.load fail (%s)' % e
+
+def main():
+ kdamonds = _damon_sysfs.Kdamonds(
+ [_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ schemes=[_damon_sysfs.Damos(
+ ops_filters=[
+ _damon_sysfs.DamosFilter(
+ type_='anon',
+ matching=True,
+ allow=True,
+ )
+ ]
+ )],
+ )])]
+ )
+
+ err = kdamonds.start()
+ if err is not None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ before_commit_status, err = \
+ dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print('before-commit status dump failed: %s' % err)
+ exit(1)
+
+ kdamonds.kdamonds[0].commit()
+
+ after_commit_status, err = \
+ dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+ if err is not None:
+ print('after-commit status dump failed: %s' % err)
+ exit(1)
+
+ if before_commit_status != after_commit_status:
+ print(f'before: {json.dumps(before_commit_status, indent=2)}')
+ print(f'after: {json.dumps(after_commit_status, indent=2)}')
+ exit(1)
+
+ kdamonds.stop()
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c
index 8034e24c6bf6..6d9e91d55d68 100644
--- a/tools/testing/selftests/kho/init.c
+++ b/tools/testing/selftests/kho/init.c
@@ -1,22 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
-#ifndef NOLIBC
-#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
-#include <syscall.h>
+#include <sys/syscall.h>
#include <sys/mount.h>
#include <sys/reboot.h>
-#endif
+#include <linux/kexec.h>
/* from arch/x86/include/asm/setup.h */
#define COMMAND_LINE_SIZE 2048
-/* from include/linux/kexex.h */
-#define KEXEC_FILE_NO_INITRAMFS 0x00000004
-
-#define KHO_FINILIZE "/debugfs/kho/out/finalize"
+#define KHO_FINALIZE "/debugfs/kho/out/finalize"
#define KERNEL_IMAGE "/kernel"
static int mount_filesystems(void)
@@ -32,7 +27,7 @@ static int kho_enable(void)
const char enable[] = "1";
int fd;
- fd = open(KHO_FINILIZE, O_RDWR);
+ fd = open(KHO_FINALIZE, O_RDWR);
if (fd < 0)
return -1;
diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh
index ec70a17bd476..3f6c17166846 100755
--- a/tools/testing/selftests/kho/vmtest.sh
+++ b/tools/testing/selftests/kho/vmtest.sh
@@ -10,7 +10,6 @@ kernel_dir=$(realpath "$test_dir/../../../..")
tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX)
headers_dir="$tmp_dir/usr"
-initrd_dir="$tmp_dir/initrd"
initrd="$tmp_dir/initrd.cpio"
source "$test_dir/../kselftest/ktap_helpers.sh"
@@ -81,19 +80,22 @@ EOF
function mkinitrd() {
local kernel=$1
- mkdir -p "$initrd_dir"/{dev,debugfs,proc}
- sudo mknod "$initrd_dir/dev/console" c 5 1
-
- "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \
- -fno-asynchronous-unwind-tables -fno-ident -nostdlib \
- -include "$test_dir/../../../include/nolibc/nolibc.h" \
- -o "$initrd_dir/init" "$test_dir/init.c" \
-
- cp "$kernel" "$initrd_dir/kernel"
+ "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -nostdlib \
+ -fno-asynchronous-unwind-tables -fno-ident \
+ -I "$headers_dir/include" \
+ -I "$kernel_dir/tools/include/nolibc" \
+ -o "$tmp_dir/init" "$test_dir/init.c"
+
+ cat > "$tmp_dir/cpio_list" <<EOF
+dir /dev 0755 0 0
+dir /proc 0755 0 0
+dir /debugfs 0755 0 0
+nod /dev/console 0600 0 0 c 5 1
+file /init $tmp_dir/init 0755 0 0
+file /kernel $kernel 0644 0 0
+EOF
- pushd "$initrd_dir" &>/dev/null
- find . | cpio -H newc --create > "$initrd" 2>/dev/null
- popd &>/dev/null
+ "$build_dir/usr/gen_init_cpio" "$tmp_dir/cpio_list" > "$initrd"
}
function run_qemu() {
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index c3b6d2604b1e..661d31c4b558 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -92,6 +92,10 @@
#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index b16986aa6442..02fd1393947a 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -20,14 +20,12 @@
#include <sys/time.h>
#include <unistd.h>
+#include "../kselftest.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
-#ifndef __maybe_unused
-#define __maybe_unused __attribute__((__unused__))
-#endif
-
#define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)"
struct audit_filter {
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 88a3c78f5d98..9acecae36f51 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -22,10 +22,6 @@
#define TMP_DIR "tmp"
-#ifndef __maybe_unused
-#define __maybe_unused __attribute__((__unused__))
-#endif
-
/* TEST_F_FORK() should not be used for new tests. */
#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index e7b23a8a05fe..c2a8586e51a1 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -58,3 +58,5 @@ pkey_sighandler_tests_32
pkey_sighandler_tests_64
guard-regions
merge
+prctl_thp_disable
+rmap
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index d13b3cef2a2b..5a1dee50b898 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -86,6 +86,7 @@ TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
TEST_GEN_FILES += pfnmap
TEST_GEN_FILES += process_madv
+TEST_GEN_FILES += prctl_thp_disable
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += uffd-stress
@@ -101,6 +102,7 @@ TEST_GEN_FILES += hugetlb_dio
TEST_GEN_FILES += droppable
TEST_GEN_FILES += guard-regions
TEST_GEN_FILES += merge
+TEST_GEN_FILES += rmap
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
@@ -228,6 +230,8 @@ $(OUTPUT)/ksm_tests: LDLIBS += -lnuma
$(OUTPUT)/migration: LDLIBS += -lnuma
+$(OUTPUT)/rmap: LDLIBS += -lnuma
+
local_config.mk local_config.h: check_config.sh
/bin/sh ./check_config.sh $(CC)
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index c744c603d688..6560c26f47d1 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -41,11 +41,6 @@ static size_t hugetlbsizes[10];
static int gup_fd;
static bool has_huge_zeropage;
-static int sz2ord(size_t size)
-{
- return __builtin_ctzll(size / pagesize);
-}
-
static int detect_thp_sizes(size_t sizes[], int max)
{
int count = 0;
@@ -57,7 +52,7 @@ static int detect_thp_sizes(size_t sizes[], int max)
if (!pmdsize)
return 0;
- orders = 1UL << sz2ord(pmdsize);
+ orders = 1UL << sz2ord(pmdsize, pagesize);
orders |= thp_supported_orders();
for (i = 0; orders && count < max; i++) {
@@ -1216,8 +1211,8 @@ static void run_anon_test_case(struct test_case const *test_case)
size_t size = thpsizes[i];
struct thp_settings settings = *thp_current_settings();
- settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
- settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+ settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER;
+ settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
thp_push_settings(&settings);
if (size == pmdsize) {
@@ -1868,7 +1863,7 @@ int main(int argc, char **argv)
if (pmdsize) {
/* Only if THP is supported. */
thp_read_settings(&default_settings);
- default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
+ default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT;
thp_save_settings();
thp_push_settings(&default_settings);
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index c463d1c09c9b..2bd1dac75c3f 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -65,10 +65,20 @@ static void register_region_with_uffd(char *addr, size_t len)
struct uffdio_api uffdio_api;
/* Create and enable userfaultfd object. */
-
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1)
- ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno));
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ ksft_exit_skip("Insufficient permissions, try running as root.\n");
+ break;
+ case ENOSYS:
+ ksft_exit_skip("userfaultfd is not supported/not enabled.\n");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n", strerror(errno));
+ break;
+ }
+ }
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index d8bd1911dfc0..712f43c87736 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -38,14 +38,11 @@ enum ksm_merge_mode {
};
static int mem_fd;
-static int ksm_fd;
-static int ksm_full_scans_fd;
-static int proc_self_ksm_stat_fd;
-static int proc_self_ksm_merging_pages_fd;
-static int ksm_use_zero_pages_fd;
static int pagemap_fd;
static size_t pagesize;
+static void init_global_file_handles(void);
+
static bool range_maps_duplicates(char *addr, unsigned long size)
{
unsigned long offs_a, offs_b, pfn_a, pfn_b;
@@ -73,88 +70,6 @@ static bool range_maps_duplicates(char *addr, unsigned long size)
return false;
}
-static long get_my_ksm_zero_pages(void)
-{
- char buf[200];
- char *substr_ksm_zero;
- size_t value_pos;
- ssize_t read_size;
- unsigned long my_ksm_zero_pages;
-
- if (!proc_self_ksm_stat_fd)
- return 0;
-
- read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
- if (read_size < 0)
- return -errno;
-
- buf[read_size] = 0;
-
- substr_ksm_zero = strstr(buf, "ksm_zero_pages");
- if (!substr_ksm_zero)
- return 0;
-
- value_pos = strcspn(substr_ksm_zero, "0123456789");
- my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10);
-
- return my_ksm_zero_pages;
-}
-
-static long get_my_merging_pages(void)
-{
- char buf[10];
- ssize_t ret;
-
- if (proc_self_ksm_merging_pages_fd < 0)
- return proc_self_ksm_merging_pages_fd;
-
- ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
- if (ret <= 0)
- return -errno;
- buf[ret] = 0;
-
- return strtol(buf, NULL, 10);
-}
-
-static long ksm_get_full_scans(void)
-{
- char buf[10];
- ssize_t ret;
-
- ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
- if (ret <= 0)
- return -errno;
- buf[ret] = 0;
-
- return strtol(buf, NULL, 10);
-}
-
-static int ksm_merge(void)
-{
- long start_scans, end_scans;
-
- /* Wait for two full scans such that any possible merging happened. */
- start_scans = ksm_get_full_scans();
- if (start_scans < 0)
- return start_scans;
- if (write(ksm_fd, "1", 1) != 1)
- return -errno;
- do {
- end_scans = ksm_get_full_scans();
- if (end_scans < 0)
- return end_scans;
- } while (end_scans < start_scans + 2);
-
- return 0;
-}
-
-static int ksm_unmerge(void)
-{
- if (write(ksm_fd, "2", 1) != 1)
- return -errno;
- return 0;
-}
-
static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
enum ksm_merge_mode mode)
{
@@ -163,12 +78,12 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
int ret;
/* Stabilize accounting by disabling KSM completely. */
- if (ksm_unmerge()) {
+ if (ksm_stop() < 0) {
ksft_print_msg("Disabling (unmerging) KSM failed\n");
return err_map;
}
- if (get_my_merging_pages() > 0) {
+ if (ksm_get_self_merging_pages() > 0) {
ksft_print_msg("Still pages merged\n");
return err_map;
}
@@ -218,7 +133,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
}
/* Run KSM to trigger merging and wait. */
- if (ksm_merge()) {
+ if (ksm_start() < 0) {
ksft_print_msg("Running KSM failed\n");
goto unmap;
}
@@ -227,7 +142,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
* Check if anything was merged at all. Ignore the zero page that is
* accounted differently (depending on kernel support).
*/
- if (val && !get_my_merging_pages()) {
+ if (val && !ksm_get_self_merging_pages()) {
ksft_print_msg("No pages got merged\n");
goto unmap;
}
@@ -274,6 +189,7 @@ static void test_unmerge(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -286,15 +202,12 @@ static void test_unmerge_zero_pages(void)
ksft_print_msg("[RUN] %s\n", __func__);
- if (proc_self_ksm_stat_fd < 0) {
- ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n");
- return;
- }
- if (ksm_use_zero_pages_fd < 0) {
- ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
+ if (ksm_get_self_zero_pages() < 0) {
+ ksft_test_result_skip("accessing \"/proc/self/ksm_stat\" failed\n");
return;
}
- if (write(ksm_use_zero_pages_fd, "1", 1) != 1) {
+
+ if (ksm_use_zero_pages() < 0) {
ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
return;
}
@@ -306,7 +219,7 @@ static void test_unmerge_zero_pages(void)
/* Check if ksm_zero_pages is updated correctly after KSM merging */
pages_expected = size / pagesize;
- if (pages_expected != get_my_ksm_zero_pages()) {
+ if (pages_expected != ksm_get_self_zero_pages()) {
ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
goto unmap;
}
@@ -319,7 +232,7 @@ static void test_unmerge_zero_pages(void)
/* Check if ksm_zero_pages is updated correctly after unmerging */
pages_expected /= 2;
- if (pages_expected != get_my_ksm_zero_pages()) {
+ if (pages_expected != ksm_get_self_zero_pages()) {
ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
goto unmap;
}
@@ -329,7 +242,7 @@ static void test_unmerge_zero_pages(void)
*((unsigned int *)&map[offs]) = offs;
/* Now we should have no zeropages remaining. */
- if (get_my_ksm_zero_pages()) {
+ if (ksm_get_self_zero_pages()) {
ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
goto unmap;
}
@@ -338,6 +251,7 @@ static void test_unmerge_zero_pages(void)
ksft_test_result(!range_maps_duplicates(map, size),
"KSM zero pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -366,6 +280,7 @@ static void test_unmerge_discarded(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -452,6 +367,7 @@ static void test_unmerge_uffd_wp(void)
close_uffd:
close(uffd);
unmap:
+ ksm_stop();
munmap(map, size);
}
#endif
@@ -506,27 +422,30 @@ static int test_child_ksm(void)
/* Test if KSM is enabled for the process. */
if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1)
- return -1;
+ return 1;
/* Test if merge could really happen. */
map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE);
if (map == MAP_MERGE_FAIL)
- return -2;
+ return 2;
else if (map == MAP_MERGE_SKIP)
- return -3;
+ return 3;
+ ksm_stop();
munmap(map, size);
return 0;
}
static void test_child_ksm_err(int status)
{
- if (status == -1)
+ if (status == 1)
ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
- else if (status == -2)
+ else if (status == 2)
ksft_test_result_fail("Merge in child failed\n");
- else if (status == -3)
+ else if (status == 3)
ksft_test_result_skip("Merge in child skipped\n");
+ else if (status == 4)
+ ksft_test_result_fail("Binary not found\n");
}
/* Verify that prctl ksm flag is inherited. */
@@ -548,6 +467,7 @@ static void test_prctl_fork(void)
child_pid = fork();
if (!child_pid) {
+ init_global_file_handles();
exit(test_child_ksm());
} else if (child_pid < 0) {
ksft_test_result_fail("fork() failed\n");
@@ -595,10 +515,10 @@ static void test_prctl_fork_exec(void)
return;
} else if (child_pid == 0) {
char *prg_name = "./ksm_functional_tests";
- char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME };
+ char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME, NULL };
execv(prg_name, argv_for_program);
- return;
+ exit(4);
}
if (waitpid(child_pid, &status, 0) > 0) {
@@ -644,6 +564,7 @@ static void test_prctl_unmerge(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -677,6 +598,7 @@ static void test_prot_none(void)
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
+ ksm_stop();
munmap(map, size);
}
@@ -685,19 +607,15 @@ static void init_global_file_handles(void)
mem_fd = open("/proc/self/mem", O_RDWR);
if (mem_fd < 0)
ksft_exit_fail_msg("opening /proc/self/mem failed\n");
- ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
- if (ksm_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
- ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
- if (ksm_full_scans_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
+ if (ksm_stop() < 0)
+ ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/run\") failed\n");
+ if (ksm_get_full_scans() < 0)
+ ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/full_scans\") failed\n");
pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
if (pagemap_fd < 0)
ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
- proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
- proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
- O_RDONLY);
- ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+ if (ksm_get_self_merging_pages() < 0)
+ ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n");
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 5bd52a951cbd..bf2863b102e3 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -846,7 +846,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp,
}
if (err != EFAULT) {
errno = err;
- perror("mrmeap() unexpected error");
+ perror("mremap() unexpected error");
success = false;
goto out_unmap;
}
@@ -899,7 +899,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp,
}
if (err != EFAULT) {
errno = err;
- perror("mrmeap() unexpected error");
+ perror("mremap() unexpected error");
success = false;
goto out_unmap;
}
@@ -948,7 +948,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp,
}
if (err != EFAULT) {
errno = err;
- perror("mrmeap() unexpected error");
+ perror("mremap() unexpected error");
success = false;
goto out_unmap;
}
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index e6face7c0166..4fc8e578ec7c 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -209,7 +209,7 @@ int userfaultfd_tests(void)
wp_addr_range(mem, mem_size);
vec_size = mem_size/page_size;
- vec = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
@@ -247,11 +247,11 @@ int sanity_tests_sd(void)
vec_size = num_pages/2;
mem_size = num_pages * page_size;
- vec = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
if (!vec)
ksft_exit_fail_msg("error nomem\n");
- vec2 = malloc(sizeof(struct page_region) * vec_size);
+ vec2 = calloc(vec_size, sizeof(struct page_region));
if (!vec2)
ksft_exit_fail_msg("error nomem\n");
@@ -436,7 +436,7 @@ int sanity_tests_sd(void)
mem_size = 1050 * page_size;
vec_size = mem_size/(page_size*2);
- vec = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
if (!vec)
ksft_exit_fail_msg("error nomem\n");
@@ -491,7 +491,7 @@ int sanity_tests_sd(void)
mem_size = 10000 * page_size;
vec_size = 50;
- vec = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
if (!vec)
ksft_exit_fail_msg("error nomem\n");
@@ -541,7 +541,7 @@ int sanity_tests_sd(void)
vec_size = 1000;
mem_size = vec_size * page_size;
- vec = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
if (!vec)
ksft_exit_fail_msg("error nomem\n");
@@ -695,8 +695,8 @@ int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip)
}
vec_size = mem_size/page_size;
- vec = malloc(sizeof(struct page_region) * vec_size);
- vec2 = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
+ vec2 = calloc(vec_size, sizeof(struct page_region));
/* 1. all new pages must be not be written (dirty) */
written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
@@ -807,8 +807,8 @@ int hpage_unit_tests(void)
unsigned long long vec_size = map_size/page_size;
struct page_region *vec, *vec2;
- vec = malloc(sizeof(struct page_region) * vec_size);
- vec2 = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
+ vec2 = calloc(vec_size, sizeof(struct page_region));
if (!vec || !vec2)
ksft_exit_fail_msg("malloc failed\n");
@@ -997,7 +997,7 @@ int unmapped_region_tests(void)
void *start = (void *)0x10000000;
int written, len = 0x00040000;
long vec_size = len / page_size;
- struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);
+ struct page_region *vec = calloc(vec_size, sizeof(struct page_region));
/* 1. Get written pages */
written = pagemap_ioctl(start, len, vec, vec_size, 0, 0,
@@ -1062,7 +1062,7 @@ int sanity_tests(void)
mem_size = 10 * page_size;
vec_size = mem_size / page_size;
- vec = malloc(sizeof(struct page_region) * vec_size);
+ vec = calloc(vec_size, sizeof(struct page_region));
mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
if (mem == MAP_FAILED || vec == MAP_FAILED)
ksft_exit_fail_msg("error nomem\n");
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
index 866ac023baf5..88659f0a90ea 100644
--- a/tools/testing/selftests/mm/pfnmap.c
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem'
+ * Basic VM_PFNMAP tests relying on mmap() of input file provided.
+ * Use '/dev/mem' as default.
*
* Copyright 2025, Red Hat, Inc.
*
@@ -25,6 +26,7 @@
#include "vm_util.h"
static sigjmp_buf sigjmp_buf_env;
+static char *file = "/dev/mem";
static void signal_handler(int sig)
{
@@ -51,7 +53,7 @@ static int test_read_access(char *addr, size_t size, size_t pagesize)
return ret;
}
-static int find_ram_target(off_t *phys_addr,
+static int find_ram_target(off_t *offset,
unsigned long long pagesize)
{
unsigned long long start, end;
@@ -91,7 +93,7 @@ static int find_ram_target(off_t *phys_addr,
/* We need two pages. */
if (end > start + 2 * pagesize) {
fclose(file);
- *phys_addr = start;
+ *offset = start;
return 0;
}
}
@@ -100,7 +102,7 @@ static int find_ram_target(off_t *phys_addr,
FIXTURE(pfnmap)
{
- off_t phys_addr;
+ off_t offset;
size_t pagesize;
int dev_mem_fd;
char *addr1;
@@ -113,23 +115,31 @@ FIXTURE_SETUP(pfnmap)
{
self->pagesize = getpagesize();
- /* We'll require two physical pages throughout our tests ... */
- if (find_ram_target(&self->phys_addr, self->pagesize))
- SKIP(return, "Cannot find ram target in '/proc/iomem'\n");
+ if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) {
+ /* We'll require two physical pages throughout our tests ... */
+ if (find_ram_target(&self->offset, self->pagesize))
+ SKIP(return,
+ "Cannot find ram target in '/proc/iomem'\n");
+ } else {
+ self->offset = 0;
+ }
- self->dev_mem_fd = open("/dev/mem", O_RDONLY);
+ self->dev_mem_fd = open(file, O_RDONLY);
if (self->dev_mem_fd < 0)
- SKIP(return, "Cannot open '/dev/mem'\n");
+ SKIP(return, "Cannot open '%s'\n", file);
self->size1 = self->pagesize * 2;
self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
- self->dev_mem_fd, self->phys_addr);
+ self->dev_mem_fd, self->offset);
if (self->addr1 == MAP_FAILED)
- SKIP(return, "Cannot mmap '/dev/mem'\n");
+ SKIP(return, "Cannot mmap '%s'\n", file);
+
+ if (!check_vmflag_pfnmap(self->addr1))
+ SKIP(return, "Invalid file: '%s'. Not pfnmap'ed\n", file);
/* ... and want to be able to read from them. */
if (test_read_access(self->addr1, self->size1, self->pagesize))
- SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n");
+ SKIP(return, "Cannot read-access mmap'ed '%s'\n", file);
self->size2 = 0;
self->addr2 = MAP_FAILED;
@@ -182,7 +192,7 @@ TEST_F(pfnmap, munmap_split)
*/
self->size2 = self->pagesize;
self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
- self->dev_mem_fd, self->phys_addr);
+ self->dev_mem_fd, self->offset);
ASSERT_NE(self->addr2, MAP_FAILED);
}
@@ -246,4 +256,14 @@ TEST_F(pfnmap, fork)
ASSERT_EQ(ret, 0);
}
-TEST_HARNESS_MAIN
+int main(int argc, char **argv)
+{
+ for (int i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--") == 0) {
+ if (i + 1 < argc && strlen(argv[i + 1]) > 0)
+ file = argv[i + 1];
+ return test_harness_run(i, argv);
+ }
+ }
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index ea404f80e6cb..fa15f006fa68 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -84,9 +84,6 @@ extern void abort_hooks(void);
#ifndef noinline
# define noinline __attribute__((noinline))
#endif
-#ifndef __maybe_unused
-# define __maybe_unused __attribute__((__unused__))
-#endif
int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
int sys_pkey_free(unsigned long pkey);
diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c
new file mode 100644
index 000000000000..84b4a4b345af
--- /dev/null
+++ b/tools/testing/selftests/mm/prctl_thp_disable.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic tests for PR_GET/SET_THP_DISABLE prctl calls
+ *
+ * Author(s): Usama Arif <usamaarif642@gmail.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#include "../kselftest_harness.h"
+#include "thp_settings.h"
+#include "vm_util.h"
+
+#ifndef PR_THP_DISABLE_EXCEPT_ADVISED
+#define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
+#endif
+
+enum thp_collapse_type {
+ THP_COLLAPSE_NONE,
+ THP_COLLAPSE_MADV_NOHUGEPAGE,
+ THP_COLLAPSE_MADV_HUGEPAGE, /* MADV_HUGEPAGE before access */
+ THP_COLLAPSE_MADV_COLLAPSE, /* MADV_COLLAPSE after access */
+};
+
+/*
+ * Function to mmap a buffer, fault it in, madvise it appropriately (before
+ * page fault for MADV_HUGE, and after for MADV_COLLAPSE), and check if the
+ * mmap region is huge.
+ * Returns:
+ * 0 if test doesn't give hugepage
+ * 1 if test gives a hugepage
+ * -errno if mmap fails
+ */
+static int test_mmap_thp(enum thp_collapse_type madvise_buf, size_t pmdsize)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+ int ret;
+
+ /* For alignment purposes, we need twice the THP size. */
+ mmap_size = 2 * pmdsize;
+ mmap_mem = (char *)mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED)
+ return -errno;
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
+
+ if (madvise_buf == THP_COLLAPSE_MADV_HUGEPAGE)
+ madvise(mem, pmdsize, MADV_HUGEPAGE);
+ else if (madvise_buf == THP_COLLAPSE_MADV_NOHUGEPAGE)
+ madvise(mem, pmdsize, MADV_NOHUGEPAGE);
+
+ /* Ensure memory is allocated */
+ memset(mem, 1, pmdsize);
+
+ if (madvise_buf == THP_COLLAPSE_MADV_COLLAPSE)
+ madvise(mem, pmdsize, MADV_COLLAPSE);
+
+ /* HACK: make sure we have a separate VMA that we can check reliably. */
+ mprotect(mem, pmdsize, PROT_READ);
+
+ ret = check_huge_anon(mem, 1, pmdsize);
+ munmap(mmap_mem, mmap_size);
+ return ret;
+}
+
+static void prctl_thp_disable_completely_test(struct __test_metadata *const _metadata,
+ size_t pmdsize,
+ enum thp_enabled thp_policy)
+{
+ ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 1);
+
+ /* tests after prctl overrides global policy */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 0);
+
+ /* Reset to global policy */
+ ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0);
+
+ /* tests after prctl is cleared, and only global policy is effective */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize),
+ thp_policy == THP_ALWAYS ? 1 : 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize),
+ thp_policy == THP_NEVER ? 0 : 1);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1);
+}
+
+FIXTURE(prctl_thp_disable_completely)
+{
+ struct thp_settings settings;
+ size_t pmdsize;
+};
+
+FIXTURE_VARIANT(prctl_thp_disable_completely)
+{
+ enum thp_enabled thp_policy;
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, never)
+{
+ .thp_policy = THP_NEVER,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, madvise)
+{
+ .thp_policy = THP_MADVISE,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, always)
+{
+ .thp_policy = THP_ALWAYS,
+};
+
+FIXTURE_SETUP(prctl_thp_disable_completely)
+{
+ if (!thp_available())
+ SKIP(return, "Transparent Hugepages not available\n");
+
+ self->pmdsize = read_pmd_pagesize();
+ if (!self->pmdsize)
+ SKIP(return, "Unable to read PMD size\n");
+
+ if (prctl(PR_SET_THP_DISABLE, 1, NULL, NULL, NULL))
+ SKIP(return, "Unable to disable THPs completely for the process\n");
+
+ thp_save_settings();
+ thp_read_settings(&self->settings);
+ self->settings.thp_enabled = variant->thp_policy;
+ self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT;
+ thp_write_settings(&self->settings);
+}
+
+FIXTURE_TEARDOWN(prctl_thp_disable_completely)
+{
+ thp_restore_settings();
+}
+
+TEST_F(prctl_thp_disable_completely, nofork)
+{
+ prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy);
+}
+
+TEST_F(prctl_thp_disable_completely, fork)
+{
+ int ret = 0;
+ pid_t pid;
+
+ /* Make sure prctl changes are carried across fork */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy);
+ return;
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+static void prctl_thp_disable_except_madvise_test(struct __test_metadata *const _metadata,
+ size_t pmdsize,
+ enum thp_enabled thp_policy)
+{
+ ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 3);
+
+ /* tests after prctl overrides global policy */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize),
+ thp_policy == THP_NEVER ? 0 : 1);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1);
+
+ /* Reset to global policy */
+ ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0);
+
+ /* tests after prctl is cleared, and only global policy is effective */
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize),
+ thp_policy == THP_ALWAYS ? 1 : 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize),
+ thp_policy == THP_NEVER ? 0 : 1);
+
+ ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1);
+}
+
+FIXTURE(prctl_thp_disable_except_madvise)
+{
+ struct thp_settings settings;
+ size_t pmdsize;
+};
+
+FIXTURE_VARIANT(prctl_thp_disable_except_madvise)
+{
+ enum thp_enabled thp_policy;
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, never)
+{
+ .thp_policy = THP_NEVER,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, madvise)
+{
+ .thp_policy = THP_MADVISE,
+};
+
+FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, always)
+{
+ .thp_policy = THP_ALWAYS,
+};
+
+FIXTURE_SETUP(prctl_thp_disable_except_madvise)
+{
+ if (!thp_available())
+ SKIP(return, "Transparent Hugepages not available\n");
+
+ self->pmdsize = read_pmd_pagesize();
+ if (!self->pmdsize)
+ SKIP(return, "Unable to read PMD size\n");
+
+ if (prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, NULL, NULL))
+ SKIP(return, "Unable to set PR_THP_DISABLE_EXCEPT_ADVISED\n");
+
+ thp_save_settings();
+ thp_read_settings(&self->settings);
+ self->settings.thp_enabled = variant->thp_policy;
+ self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT;
+ thp_write_settings(&self->settings);
+}
+
+FIXTURE_TEARDOWN(prctl_thp_disable_except_madvise)
+{
+ thp_restore_settings();
+}
+
+TEST_F(prctl_thp_disable_except_madvise, nofork)
+{
+ prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, variant->thp_policy);
+}
+
+TEST_F(prctl_thp_disable_except_madvise, fork)
+{
+ int ret = 0;
+ pid_t pid;
+
+ /* Make sure prctl changes are carried across fork */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize,
+ variant->thp_policy);
+ return;
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/rmap.c b/tools/testing/selftests/mm/rmap.c
new file mode 100644
index 000000000000..13f7bccfd0a9
--- /dev/null
+++ b/tools/testing/selftests/mm/rmap.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RMAP functional tests
+ *
+ * Author(s): Wei Yang <richard.weiyang@gmail.com>
+ */
+
+#include "../kselftest_harness.h"
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/sem.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "vm_util.h"
+
+#define TOTAL_LEVEL 5
+#define MAX_CHILDREN 3
+
+#define FAIL_ON_CHECK (1 << 0)
+#define FAIL_ON_WORK (1 << 1)
+
+struct sembuf sem_wait = {0, -1, 0};
+struct sembuf sem_signal = {0, 1, 0};
+
+enum backend_type {
+ ANON,
+ SHM,
+ NORM_FILE,
+};
+
+#define PREFIX "kst_rmap"
+#define MAX_FILENAME_LEN 256
+const char *suffixes[] = {
+ "",
+ "_shm",
+ "_file",
+};
+
+struct global_data;
+typedef int (*work_fn)(struct global_data *data);
+typedef int (*check_fn)(struct global_data *data);
+typedef void (*prepare_fn)(struct global_data *data);
+
+struct global_data {
+ int worker_level;
+
+ int semid;
+ int pipefd[2];
+
+ unsigned int mapsize;
+ unsigned int rand_seed;
+ char *region;
+
+ prepare_fn do_prepare;
+ work_fn do_work;
+ check_fn do_check;
+
+ enum backend_type backend;
+ char filename[MAX_FILENAME_LEN];
+
+ unsigned long *expected_pfn;
+};
+
+/*
+ * Create a process tree with TOTAL_LEVEL height and at most MAX_CHILDREN
+ * children for each.
+ *
+ * It will randomly select one process as 'worker' process which will
+ * 'do_work' until all processes are created. And all other processes will
+ * wait until 'worker' finish its work.
+ */
+void propagate_children(struct __test_metadata *_metadata, struct global_data *data)
+{
+ pid_t root_pid, pid;
+ unsigned int num_child;
+ int status;
+ int ret = 0;
+ int curr_child, worker_child;
+ int curr_level = 1;
+ bool is_worker = true;
+
+ root_pid = getpid();
+repeat:
+ num_child = rand_r(&data->rand_seed) % MAX_CHILDREN + 1;
+ worker_child = is_worker ? rand_r(&data->rand_seed) % num_child : -1;
+
+ for (curr_child = 0; curr_child < num_child; curr_child++) {
+ pid = fork();
+
+ if (pid < 0) {
+ perror("Error: fork\n");
+ } else if (pid == 0) {
+ curr_level++;
+
+ if (curr_child != worker_child)
+ is_worker = false;
+
+ if (curr_level == TOTAL_LEVEL)
+ break;
+
+ data->rand_seed += curr_child;
+ goto repeat;
+ }
+ }
+
+ if (data->do_prepare)
+ data->do_prepare(data);
+
+ close(data->pipefd[1]);
+
+ if (is_worker && curr_level == data->worker_level) {
+ /* This is the worker process, first wait last process created */
+ char buf;
+
+ while (read(data->pipefd[0], &buf, 1) > 0)
+ ;
+
+ if (data->do_work)
+ ret = data->do_work(data);
+
+ /* Kick others */
+ semctl(data->semid, 0, IPC_RMID);
+ } else {
+ /* Wait worker finish */
+ semop(data->semid, &sem_wait, 1);
+ if (data->do_check)
+ ret = data->do_check(data);
+ }
+
+ /* Wait all child to quit */
+ while (wait(&status) > 0) {
+ if (WIFEXITED(status))
+ ret |= WEXITSTATUS(status);
+ }
+
+ if (getpid() == root_pid) {
+ if (ret & FAIL_ON_WORK)
+ SKIP(return, "Failed in worker");
+
+ ASSERT_EQ(ret, 0);
+ } else {
+ exit(ret);
+ }
+}
+
+FIXTURE(migrate)
+{
+ struct global_data data;
+};
+
+FIXTURE_SETUP(migrate)
+{
+ struct global_data *data = &self->data;
+
+ if (numa_available() < 0)
+ SKIP(return, "NUMA not available");
+ if (numa_bitmask_weight(numa_all_nodes_ptr) <= 1)
+ SKIP(return, "Not enough NUMA nodes available");
+
+ data->mapsize = getpagesize();
+
+ data->expected_pfn = mmap(0, sizeof(unsigned long),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(data->expected_pfn, MAP_FAILED);
+
+ /* Prepare semaphore */
+ data->semid = semget(IPC_PRIVATE, 1, 0666 | IPC_CREAT);
+ ASSERT_NE(data->semid, -1);
+ ASSERT_NE(semctl(data->semid, 0, SETVAL, 0), -1);
+
+ /* Prepare pipe */
+ ASSERT_NE(pipe(data->pipefd), -1);
+
+ data->rand_seed = time(NULL);
+ srand(data->rand_seed);
+
+ data->worker_level = rand() % TOTAL_LEVEL + 1;
+
+ data->do_prepare = NULL;
+ data->do_work = NULL;
+ data->do_check = NULL;
+
+ data->backend = ANON;
+};
+
+FIXTURE_TEARDOWN(migrate)
+{
+ struct global_data *data = &self->data;
+
+ if (data->region != MAP_FAILED)
+ munmap(data->region, data->mapsize);
+ data->region = MAP_FAILED;
+ if (data->expected_pfn != MAP_FAILED)
+ munmap(data->expected_pfn, sizeof(unsigned long));
+ data->expected_pfn = MAP_FAILED;
+ semctl(data->semid, 0, IPC_RMID);
+ data->semid = -1;
+
+ close(data->pipefd[0]);
+
+ switch (data->backend) {
+ case ANON:
+ break;
+ case SHM:
+ shm_unlink(data->filename);
+ break;
+ case NORM_FILE:
+ unlink(data->filename);
+ break;
+ }
+}
+
+void access_region(struct global_data *data)
+{
+ /*
+ * Force read "region" to make sure page fault in.
+ */
+ FORCE_READ(*data->region);
+}
+
+int try_to_move_page(char *region)
+{
+ int ret;
+ int node;
+ int status = 0;
+ int failures = 0;
+
+ ret = move_pages(0, 1, (void **)&region, NULL, &status, MPOL_MF_MOVE_ALL);
+ if (ret != 0) {
+ perror("Failed to get original numa");
+ return FAIL_ON_WORK;
+ }
+
+ /* Pick up a different target node */
+ for (node = 0; node <= numa_max_node(); node++) {
+ if (numa_bitmask_isbitset(numa_all_nodes_ptr, node) && node != status)
+ break;
+ }
+
+ if (node > numa_max_node()) {
+ ksft_print_msg("Couldn't find available numa node for testing\n");
+ return FAIL_ON_WORK;
+ }
+
+ while (1) {
+ ret = move_pages(0, 1, (void **)&region, &node, &status, MPOL_MF_MOVE_ALL);
+
+ /* migrate successfully */
+ if (!ret)
+ break;
+
+ /* error happened */
+ if (ret < 0) {
+ ksft_perror("Failed to move pages");
+ return FAIL_ON_WORK;
+ }
+
+ /* migration is best effort; try again */
+ if (++failures >= 100)
+ return FAIL_ON_WORK;
+ }
+
+ return 0;
+}
+
+int move_region(struct global_data *data)
+{
+ int ret;
+ int pagemap_fd;
+
+ ret = try_to_move_page(data->region);
+ if (ret != 0)
+ return ret;
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd == -1)
+ return FAIL_ON_WORK;
+ *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region);
+
+ return 0;
+}
+
+int has_same_pfn(struct global_data *data)
+{
+ unsigned long pfn;
+ int pagemap_fd;
+
+ if (data->region == MAP_FAILED)
+ return 0;
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd == -1)
+ return FAIL_ON_CHECK;
+
+ pfn = pagemap_get_pfn(pagemap_fd, data->region);
+ if (pfn != *data->expected_pfn)
+ return FAIL_ON_CHECK;
+
+ return 0;
+}
+
+TEST_F(migrate, anon)
+{
+ struct global_data *data = &self->data;
+
+ /* Map an area and fault in */
+ data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(data->region, MAP_FAILED);
+ memset(data->region, 0xcf, data->mapsize);
+
+ data->do_prepare = access_region;
+ data->do_work = move_region;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+TEST_F(migrate, shm)
+{
+ int shm_fd;
+ struct global_data *data = &self->data;
+
+ snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[SHM]);
+ shm_fd = shm_open(data->filename, O_CREAT | O_RDWR, 0666);
+ ASSERT_NE(shm_fd, -1);
+ ftruncate(shm_fd, data->mapsize);
+ data->backend = SHM;
+
+ /* Map a shared area and fault in */
+ data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd, 0);
+ ASSERT_NE(data->region, MAP_FAILED);
+ memset(data->region, 0xcf, data->mapsize);
+ close(shm_fd);
+
+ data->do_prepare = access_region;
+ data->do_work = move_region;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+TEST_F(migrate, file)
+{
+ int fd;
+ struct global_data *data = &self->data;
+
+ snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[NORM_FILE]);
+ fd = open(data->filename, O_CREAT | O_RDWR | O_EXCL, 0666);
+ ASSERT_NE(fd, -1);
+ ftruncate(fd, data->mapsize);
+ data->backend = NORM_FILE;
+
+ /* Map a shared area and fault in */
+ data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(data->region, MAP_FAILED);
+ memset(data->region, 0xcf, data->mapsize);
+ close(fd);
+
+ data->do_prepare = access_region;
+ data->do_work = move_region;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+void prepare_local_region(struct global_data *data)
+{
+ /* Allocate range and set the same data */
+ data->region = mmap(NULL, data->mapsize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (data->region == MAP_FAILED)
+ return;
+
+ memset(data->region, 0xcf, data->mapsize);
+}
+
+int merge_and_migrate(struct global_data *data)
+{
+ int pagemap_fd;
+ int ret = 0;
+
+ if (data->region == MAP_FAILED)
+ return FAIL_ON_WORK;
+
+ if (ksm_start() < 0)
+ return FAIL_ON_WORK;
+
+ ret = try_to_move_page(data->region);
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd == -1)
+ return FAIL_ON_WORK;
+ *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region);
+
+ return ret;
+}
+
+TEST_F(migrate, ksm)
+{
+ int ret;
+ struct global_data *data = &self->data;
+
+ if (ksm_stop() < 0)
+ SKIP(return, "accessing \"/sys/kernel/mm/ksm/run\") failed");
+ if (ksm_get_full_scans() < 0)
+ SKIP(return, "accessing \"/sys/kernel/mm/ksm/full_scan\") failed");
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL)
+ SKIP(return, "PR_SET_MEMORY_MERGE not supported");
+ else if (ret)
+ ksft_exit_fail_perror("PR_SET_MEMORY_MERGE=1 failed");
+
+ data->do_prepare = prepare_local_region;
+ data->do_work = merge_and_migrate;
+ data->do_check = has_same_pfn;
+
+ propagate_children(_metadata, data);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 471e539d82b8..9e88cc25b9df 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -85,6 +85,8 @@ separated by spaces:
test handling of page fragment allocation and freeing
- vma_merge
test VMA merge cases behave as expected
+- rmap
+ test rmap behaves as expected
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@@ -322,11 +324,15 @@ CATEGORY="gup_test" run_test ./gup_longterm
CATEGORY="userfaultfd" run_test ./uffd-unit-tests
uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
-# Hugetlb tests require source and destination huge pages. Pass in half
-# the size of the free pages we have, which is used for *each*.
+# Hugetlb tests require source and destination huge pages. Pass in almost half
+# the size of the free pages we have, which is used for *each*. An adjustment
+# of (nr_parallel - 1) is done (see nr_parallel in uffd-stress.c) to have some
+# extra hugepages - this is done to prevent the test from failing by racily
+# reserving more hugepages than strictly required.
# uffd-stress expects a region expressed in MiB, so we adjust
# half_ufd_size_MB accordingly.
-half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2))
+adjustment=$(( (31 < (nr_cpus - 1)) ? 31 : (nr_cpus - 1) ))
+half_ufd_size_MB=$((((freepgs - adjustment) * hpgsize_KB) / 1024 / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
@@ -532,6 +538,8 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh aligned
CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
+CATEGORY="rmap" run_test ./rmap
+
echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
echo "1..${count_total}" | tap_output
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 44a3f8a58806..10ae65ea032f 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -25,6 +25,8 @@
uint64_t pagesize;
unsigned int pageshift;
uint64_t pmd_pagesize;
+unsigned int pmd_order;
+int *expected_orders;
#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
#define SMAP_PATH "/proc/self/smaps"
@@ -34,28 +36,226 @@ uint64_t pmd_pagesize;
#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d"
#define PATH_FMT "%s,0x%lx,0x%lx,%d"
-#define PFN_MASK ((1UL<<55)-1)
-#define KPF_THP (1UL<<22)
+const char *pagemap_proc = "/proc/self/pagemap";
+const char *kpageflags_proc = "/proc/kpageflags";
+int pagemap_fd;
+int kpageflags_fd;
-int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
+static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd,
+ int kpageflags_fd)
{
- uint64_t paddr;
- uint64_t page_flags;
+ const unsigned long nr_pages = 1UL << order;
+ unsigned long pfn_head;
+ uint64_t pfn_flags;
+ unsigned long pfn;
+ unsigned long i;
- if (pagemap_file) {
- pread(pagemap_file, &paddr, sizeof(paddr),
- ((long)vaddr >> pageshift) * sizeof(paddr));
+ pfn = pagemap_get_pfn(pagemap_fd, vaddr);
- if (kpageflags_file) {
- pread(kpageflags_file, &page_flags, sizeof(page_flags),
- (paddr & PFN_MASK) * sizeof(page_flags));
+ /* non present page */
+ if (pfn == -1UL)
+ return false;
+
+ if (pageflags_get(pfn, kpageflags_fd, &pfn_flags))
+ goto fail;
+
+ /* check for order-0 pages */
+ if (!order) {
+ if (pfn_flags & (KPF_THP | KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))
+ return false;
+ return true;
+ }
+
+ /* non THP folio */
+ if (!(pfn_flags & KPF_THP))
+ return false;
+
+ pfn_head = pfn & ~(nr_pages - 1);
+
+ if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags))
+ goto fail;
+
+ /* head PFN has no compound_head flag set */
+ if (!(pfn_flags & (KPF_THP | KPF_COMPOUND_HEAD)))
+ return false;
+
+ /* check all tail PFN flags */
+ for (i = 1; i < nr_pages; i++) {
+ if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags))
+ goto fail;
+ if (!(pfn_flags & (KPF_THP | KPF_COMPOUND_TAIL)))
+ return false;
+ }
+
+ /*
+ * check the PFN after this folio, but if its flags cannot be obtained,
+ * assume this folio has the expected order
+ */
+ if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags))
+ return true;
+
+ /* this folio is bigger than the given order */
+ if (pfn_flags & (KPF_THP | KPF_COMPOUND_TAIL))
+ return false;
+
+ return true;
+fail:
+ ksft_exit_fail_msg("Failed to get folio info\n");
+ return false;
+}
+
+static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd,
+ uint64_t *flags)
+{
+ unsigned long pfn;
+
+ pfn = pagemap_get_pfn(pagemap_fd, vaddr);
+
+ /* non-present PFN */
+ if (pfn == -1UL)
+ return 1;
+
+ if (pageflags_get(pfn, kpageflags_fd, flags))
+ return -1;
- return !!(page_flags & KPF_THP);
+ return 0;
+}
+
+/*
+ * gather_after_split_folio_orders - scan through [vaddr_start, len) and record
+ * folio orders
+ *
+ * @vaddr_start: start vaddr
+ * @len: range length
+ * @pagemap_fd: file descriptor to /proc/<pid>/pagemap
+ * @kpageflags_fd: file descriptor to /proc/kpageflags
+ * @orders: output folio order array
+ * @nr_orders: folio order array size
+ *
+ * gather_after_split_folio_orders() scan through [vaddr_start, len) and check
+ * all folios within the range and record their orders. All order-0 pages will
+ * be recorded. Non-present vaddr is skipped.
+ *
+ * NOTE: the function is used to check folio orders after a split is performed,
+ * so it assumes [vaddr_start, len) fully maps to after-split folios within that
+ * range.
+ *
+ * Return: 0 - no error, -1 - unhandled cases
+ */
+static int gather_after_split_folio_orders(char *vaddr_start, size_t len,
+ int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
+{
+ uint64_t page_flags = 0;
+ int cur_order = -1;
+ char *vaddr;
+
+ if (pagemap_fd == -1 || kpageflags_fd == -1)
+ return -1;
+ if (!orders)
+ return -1;
+ if (nr_orders <= 0)
+ return -1;
+
+ for (vaddr = vaddr_start; vaddr < vaddr_start + len;) {
+ char *next_folio_vaddr;
+ int status;
+
+ status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd,
+ &page_flags);
+ if (status < 0)
+ return -1;
+
+ /* skip non present vaddr */
+ if (status == 1) {
+ vaddr += psize();
+ continue;
+ }
+
+ /* all order-0 pages with possible false postive (non folio) */
+ if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+ orders[0]++;
+ vaddr += psize();
+ continue;
+ }
+
+ /* skip non thp compound pages */
+ if (!(page_flags & KPF_THP)) {
+ vaddr += psize();
+ continue;
+ }
+
+ /* vpn points to part of a THP at this point */
+ if (page_flags & KPF_COMPOUND_HEAD)
+ cur_order = 1;
+ else {
+ vaddr += psize();
+ continue;
+ }
+
+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
+
+ if (next_folio_vaddr >= vaddr_start + len)
+ break;
+
+ while ((status = vaddr_pageflags_get(next_folio_vaddr,
+ pagemap_fd, kpageflags_fd,
+ &page_flags)) >= 0) {
+ /*
+ * non present vaddr, next compound head page, or
+ * order-0 page
+ */
+ if (status == 1 ||
+ (page_flags & KPF_COMPOUND_HEAD) ||
+ !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) {
+ if (cur_order < nr_orders) {
+ orders[cur_order]++;
+ cur_order = -1;
+ vaddr = next_folio_vaddr;
+ }
+ break;
+ }
+
+ cur_order++;
+ next_folio_vaddr = vaddr + (1UL << (cur_order + pshift()));
}
+
+ if (status < 0)
+ return status;
}
+ if (cur_order > 0 && cur_order < nr_orders)
+ orders[cur_order]++;
return 0;
}
+static int check_after_split_folio_orders(char *vaddr_start, size_t len,
+ int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders)
+{
+ int *vaddr_orders;
+ int status;
+ int i;
+
+ vaddr_orders = (int *)malloc(sizeof(int) * nr_orders);
+
+ if (!vaddr_orders)
+ ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders");
+
+ memset(vaddr_orders, 0, sizeof(int) * nr_orders);
+ status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd,
+ kpageflags_fd, vaddr_orders, nr_orders);
+ if (status)
+ ksft_exit_fail_msg("gather folio info failed\n");
+
+ for (i = 0; i < nr_orders; i++)
+ if (vaddr_orders[i] != orders[i]) {
+ ksft_print_msg("order %d: expected: %d got %d\n", i,
+ orders[i], vaddr_orders[i]);
+ status = -1;
+ }
+
+ free(vaddr_orders);
+ return status;
+}
+
static void write_file(const char *path, const char *buf, size_t buflen)
{
int fd;
@@ -111,7 +311,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp
unsigned long rss_anon_before, rss_anon_after;
size_t i;
- if (!check_huge_anon(one_page, 4, pmd_pagesize))
+ if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize))
ksft_exit_fail_msg("No THP is allocated\n");
rss_anon_before = rss_anon();
@@ -135,7 +335,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp
rss_anon_before, rss_anon_after);
}
-void split_pmd_zero_pages(void)
+static void split_pmd_zero_pages(void)
{
char *one_page;
int nr_hpages = 4;
@@ -147,7 +347,7 @@ void split_pmd_zero_pages(void)
free(one_page);
}
-void split_pmd_thp_to_order(int order)
+static void split_pmd_thp_to_order(int order)
{
char *one_page;
size_t len = 4 * pmd_pagesize;
@@ -173,6 +373,13 @@ void split_pmd_thp_to_order(int order)
if (one_page[i] != (char)i)
ksft_exit_fail_msg("%ld byte corrupted\n", i);
+ memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
+ expected_orders[order] = 4 << (pmd_order - order);
+
+ if (check_after_split_folio_orders(one_page, len, pagemap_fd,
+ kpageflags_fd, expected_orders,
+ (pmd_order + 1)))
+ ksft_exit_fail_msg("Unexpected THP split\n");
if (!check_huge_anon(one_page, 0, pmd_pagesize))
ksft_exit_fail_msg("Still AnonHugePages not split\n");
@@ -181,28 +388,12 @@ void split_pmd_thp_to_order(int order)
free(one_page);
}
-void split_pte_mapped_thp(void)
+static void split_pte_mapped_thp(void)
{
char *one_page, *pte_mapped, *pte_mapped2;
size_t len = 4 * pmd_pagesize;
uint64_t thp_size;
size_t i;
- const char *pagemap_template = "/proc/%d/pagemap";
- const char *kpageflags_proc = "/proc/kpageflags";
- char pagemap_proc[255];
- int pagemap_fd;
- int kpageflags_fd;
-
- if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0)
- ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno));
-
- pagemap_fd = open(pagemap_proc, O_RDONLY);
- if (pagemap_fd == -1)
- ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
-
- kpageflags_fd = open(kpageflags_proc, O_RDONLY);
- if (kpageflags_fd == -1)
- ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
@@ -234,7 +425,7 @@ void split_pte_mapped_thp(void)
thp_size = 0;
for (i = 0; i < pagesize * 4; i++)
if (i % pagesize == 0 &&
- is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
+ is_backed_by_folio(&pte_mapped[i], pmd_order, pagemap_fd, kpageflags_fd))
thp_size++;
if (thp_size != 4)
@@ -251,7 +442,7 @@ void split_pte_mapped_thp(void)
ksft_exit_fail_msg("%ld byte corrupted\n", i);
if (i % pagesize == 0 &&
- is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
+ !is_backed_by_folio(&pte_mapped[i], 0, pagemap_fd, kpageflags_fd))
thp_size++;
}
@@ -260,11 +451,9 @@ void split_pte_mapped_thp(void)
ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
munmap(one_page, len);
- close(pagemap_fd);
- close(kpageflags_fd);
}
-void split_file_backed_thp(int order)
+static void split_file_backed_thp(int order)
{
int status;
int fd;
@@ -366,7 +555,7 @@ out:
ksft_exit_fail_msg("Error occurred\n");
}
-bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
+static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
const char **thp_fs_loc)
{
if (xfs_path) {
@@ -382,7 +571,7 @@ bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
return true;
}
-void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
+static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
{
int status;
@@ -395,8 +584,8 @@ void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
strerror(errno));
}
-int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
- char **addr)
+static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size,
+ int *fd, char **addr)
{
size_t i;
unsigned char buf[1024];
@@ -462,10 +651,11 @@ err_out_unlink:
return -1;
}
-void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
- int order, int offset)
+static void split_thp_in_pagecache_to_order_at(size_t fd_size,
+ const char *fs_loc, int order, int offset)
{
int fd;
+ char *split_addr;
char *addr;
size_t i;
char testfile[INPUT_MAX];
@@ -479,14 +669,33 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
if (err)
return;
+
err = 0;
- if (offset == -1)
- write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
- (uint64_t)addr + fd_size, order);
- else
- write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr,
- (uint64_t)addr + fd_size, order, offset);
+ memset(expected_orders, 0, sizeof(int) * (pmd_order + 1));
+ /*
+ * use [split_addr, split_addr + pagesize) range to split THPs, since
+ * the debugfs function always split a range with pagesize step and
+ * providing a full [addr, addr + fd_size) range can trigger multiple
+ * splits, complicating after-split result checking.
+ */
+ if (offset == -1) {
+ for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
+ write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr,
+ (uint64_t)split_addr + pagesize, order);
+
+ expected_orders[order] = fd_size / (pagesize << order);
+ } else {
+ int times = fd_size / pmd_pagesize;
+
+ for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize)
+ write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr,
+ (uint64_t)split_addr + pagesize, order, offset);
+
+ for (i = order + 1; i < pmd_order; i++)
+ expected_orders[i] = times;
+ expected_orders[order] = 2 * times;
+ }
for (i = 0; i < fd_size; i++)
if (*(addr + i) != (char)i) {
@@ -495,6 +704,14 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
goto out;
}
+ if (check_after_split_folio_orders(addr, fd_size, pagemap_fd,
+ kpageflags_fd, expected_orders,
+ (pmd_order + 1))) {
+ ksft_print_msg("Unexpected THP split\n");
+ err = 1;
+ goto out;
+ }
+
if (!check_huge_file(addr, 0, pmd_pagesize)) {
ksft_print_msg("Still FilePmdMapped not split\n");
err = EXIT_FAILURE;
@@ -525,6 +742,8 @@ int main(int argc, char **argv)
const char *fs_loc;
bool created_tmp;
int offset;
+ unsigned int nr_pages;
+ unsigned int tests;
ksft_print_header();
@@ -536,38 +755,58 @@ int main(int argc, char **argv)
if (argc > 1)
optional_xfs_path = argv[1];
- ksft_set_plan(1+8+1+9+9+8*4+2);
-
pagesize = getpagesize();
pageshift = ffs(pagesize) - 1;
pmd_pagesize = read_pmd_pagesize();
if (!pmd_pagesize)
ksft_exit_fail_msg("Reading PMD pagesize failed\n");
+ nr_pages = pmd_pagesize / pagesize;
+ pmd_order = sz2ord(pmd_pagesize, pagesize);
+
+ expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1));
+ if (!expected_orders)
+ ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
+
+ tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2;
+ ksft_set_plan(tests);
+
+ pagemap_fd = open(pagemap_proc, O_RDONLY);
+ if (pagemap_fd == -1)
+ ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
+
+ kpageflags_fd = open(kpageflags_proc, O_RDONLY);
+ if (kpageflags_fd == -1)
+ ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
+
fd_size = 2 * pmd_pagesize;
split_pmd_zero_pages();
- for (i = 0; i < 9; i++)
+ for (i = 0; i < pmd_order; i++)
if (i != 1)
split_pmd_thp_to_order(i);
split_pte_mapped_thp();
- for (i = 0; i < 9; i++)
+ for (i = 0; i < pmd_order; i++)
split_file_backed_thp(i);
created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
&fs_loc);
- for (i = 8; i >= 0; i--)
+ for (i = pmd_order - 1; i >= 0; i--)
split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1);
- for (i = 0; i < 9; i++)
+ for (i = 0; i < pmd_order; i++)
for (offset = 0;
- offset < pmd_pagesize / pagesize;
- offset += MAX(pmd_pagesize / pagesize / 4, 1 << i))
+ offset < nr_pages;
+ offset += MAX(nr_pages / 4, 1 << i))
split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset);
cleanup_thp_fs(fs_loc, created_tmp);
+ close(pagemap_fd);
+ close(kpageflags_fd);
+ free(expected_orders);
+
ksft_finished();
return 0;
diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh
index d73b846736f1..d39096723fca 100755
--- a/tools/testing/selftests/mm/test_vmalloc.sh
+++ b/tools/testing/selftests/mm/test_vmalloc.sh
@@ -47,14 +47,14 @@ check_test_requirements()
fi
}
-run_perfformance_check()
+run_performance_check()
{
echo "Run performance tests to evaluate how fast vmalloc allocation is."
echo "It runs all test cases on one single CPU with sequential order."
modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1
echo "Done."
- echo "Ccheck the kernel message buffer to see the summary."
+ echo "Check the kernel message buffer to see the summary."
}
run_stability_check()
@@ -160,7 +160,7 @@ function run_test()
usage
else
if [[ "$1" = "performance" ]]; then
- run_perfformance_check
+ run_performance_check
elif [[ "$1" = "stress" ]]; then
run_stability_check
elif [[ "$1" = "smoke" ]]; then
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
index bad60ac52874..574bd0f8ae48 100644
--- a/tools/testing/selftests/mm/thp_settings.c
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -382,10 +382,17 @@ unsigned long thp_shmem_supported_orders(void)
return __thp_supported_orders(true);
}
-bool thp_is_enabled(void)
+bool thp_available(void)
{
if (access(THP_SYSFS, F_OK) != 0)
return false;
+ return true;
+}
+
+bool thp_is_enabled(void)
+{
+ if (!thp_available())
+ return false;
int mode = thp_read_string("enabled", thp_enabled_strings);
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
index 6c07f70beee9..76eeb712e5f1 100644
--- a/tools/testing/selftests/mm/thp_settings.h
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -84,6 +84,7 @@ void thp_set_read_ahead_path(char *path);
unsigned long thp_supported_orders(void);
unsigned long thp_shmem_supported_orders(void);
+bool thp_available(void);
bool thp_is_enabled(void);
#endif /* __THP_SETTINGS_H__ */
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 8e2b08dc5762..4f5e290ff1a6 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -177,13 +177,16 @@ void find_pagesizes(void)
globfree(&g);
read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val);
- if (shmmax_val < NUM_PAGES * largest)
- ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
- largest * NUM_PAGES);
+ if (shmmax_val < NUM_PAGES * largest) {
+ ksft_print_msg("WARNING: shmmax is too small to run this test.\n");
+ ksft_print_msg("Please run the following command to increase shmmax:\n");
+ ksft_print_msg("echo %lu > /proc/sys/kernel/shmmax\n", largest * NUM_PAGES);
+ ksft_exit_skip("Test skipped due to insufficient shmmax value.\n");
+ }
#if defined(__x86_64__)
if (largest != 1U<<30) {
- ksft_exit_fail_msg("No GB pages available on x86-64\n"
+ ksft_exit_skip("No GB pages available on x86-64\n"
"Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
}
#endif
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index 40af7f67c407..ecd016329935 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -51,7 +51,7 @@ static char *zeropage;
pthread_attr_t attr;
#define swap(a, b) \
- do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+ do { __auto_type __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
const char *examples =
"# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
@@ -448,12 +448,6 @@ int main(int argc, char **argv)
parse_test_type_arg(argv[1]);
bytes = atol(argv[2]) * 1024 * 1024;
- if (test_type == TEST_HUGETLB &&
- get_free_hugepages() < bytes / page_size) {
- printf("skip: Skipping userfaultfd... not enough hugepages\n");
- return KSFT_SKIP;
- }
-
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
if (nr_cpus > 32) {
/* Don't let calculation below go to zero. */
@@ -464,6 +458,17 @@ int main(int argc, char **argv)
nr_parallel = nr_cpus;
}
+ /*
+ * src and dst each require bytes / page_size number of hugepages.
+ * Ensure nr_parallel - 1 hugepages on top of that to account
+ * for racy extra reservation of hugepages.
+ */
+ if (test_type == TEST_HUGETLB &&
+ get_free_hugepages() < 2 * (bytes / page_size) + nr_parallel - 1) {
+ printf("skip: Skipping userfaultfd... not enough hugepages\n");
+ return KSFT_SKIP;
+ }
+
nr_pages_per_cpu = bytes / page_size / nr_parallel;
if (!nr_pages_per_cpu) {
_err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)",
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
index c2ba7d46c7b4..78038c40aaaf 100644
--- a/tools/testing/selftests/mm/uffd-wp-mremap.c
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -19,11 +19,6 @@ static size_t thpsizes[20];
static int nr_hugetlbsizes;
static size_t hugetlbsizes[10];
-static int sz2ord(size_t size)
-{
- return __builtin_ctzll(size / pagesize);
-}
-
static int detect_thp_sizes(size_t sizes[], int max)
{
int count = 0;
@@ -87,9 +82,9 @@ static void *alloc_one_folio(size_t size, bool private, bool hugetlb)
struct thp_settings settings = *thp_current_settings();
if (private)
- settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+ settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
else
- settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS;
+ settings.shmem_hugepages[sz2ord(size, pagesize)].enabled = SHMEM_ALWAYS;
thp_push_settings(&settings);
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 169dbd692bf5..81b33d8f78f4 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -44,12 +44,18 @@
* On Arm64 the address space is 256TB and support for
* high mappings up to 4PB virtual address space has
* been added.
+ *
+ * On PowerPC64, the address space up to 128TB can be
+ * mapped without a hint. Addresses beyond 128TB, up to
+ * 4PB, can be mapped with a hint.
+ *
*/
#define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */
#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL)
#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL)
#define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL)
+#define NR_CHUNKS_3968TB (NR_CHUNKS_128TB * 31UL)
#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */
#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */
@@ -59,6 +65,11 @@
#define HIGH_ADDR_SHIFT 49
#define NR_CHUNKS_LOW NR_CHUNKS_256TB
#define NR_CHUNKS_HIGH NR_CHUNKS_3840TB
+#elif defined(__PPC64__)
+#define HIGH_ADDR_MARK ADDR_MARK_128TB
+#define HIGH_ADDR_SHIFT 48
+#define NR_CHUNKS_LOW NR_CHUNKS_128TB
+#define NR_CHUNKS_HIGH NR_CHUNKS_3968TB
#else
#define HIGH_ADDR_MARK ADDR_MARK_128TB
#define HIGH_ADDR_SHIFT 48
@@ -227,7 +238,7 @@ int main(int argc, char *argv[])
if (hptr[i] == MAP_FAILED)
break;
- mark_range(ptr[i], MAP_CHUNK_SIZE);
+ mark_range(hptr[i], MAP_CHUNK_SIZE);
validate_addr(hptr[i], 1);
}
hchunks = i;
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 9dafa7669ef9..56e9bd541edd 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -338,6 +338,19 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max)
return count;
}
+int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags)
+{
+ size_t count;
+
+ count = pread(kpageflags_fd, flags, sizeof(*flags),
+ pfn * sizeof(*flags));
+
+ if (count != sizeof(*flags))
+ return -1;
+
+ return 0;
+}
+
/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls)
@@ -402,7 +415,7 @@ unsigned long get_free_hugepages(void)
return fhp;
}
-bool check_vmflag_io(void *addr)
+static bool check_vmflag(void *addr, const char *flag)
{
char buffer[MAX_LINE_LENGTH];
const char *flags;
@@ -419,13 +432,23 @@ bool check_vmflag_io(void *addr)
if (!flaglen)
return false;
- if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen))
+ if (flaglen == strlen(flag) && !memcmp(flags, flag, flaglen))
return true;
flags += flaglen;
}
}
+bool check_vmflag_io(void *addr)
+{
+ return check_vmflag(addr, "io");
+}
+
+bool check_vmflag_pfnmap(void *addr)
+{
+ return check_vmflag(addr, "pf");
+}
+
/*
* Open an fd at /proc/$pid/maps and configure procmap_out ready for
* PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise.
@@ -555,3 +578,126 @@ bool detect_huge_zeropage(void)
close(fd);
return enabled;
}
+
+long ksm_get_self_zero_pages(void)
+{
+ int proc_self_ksm_stat_fd;
+ char buf[200];
+ char *substr_ksm_zero;
+ size_t value_pos;
+ ssize_t read_size;
+
+ proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
+ if (proc_self_ksm_stat_fd < 0)
+ return -errno;
+
+ read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
+ close(proc_self_ksm_stat_fd);
+ if (read_size < 0)
+ return -errno;
+
+ buf[read_size] = 0;
+
+ substr_ksm_zero = strstr(buf, "ksm_zero_pages");
+ if (!substr_ksm_zero)
+ return 0;
+
+ value_pos = strcspn(substr_ksm_zero, "0123456789");
+ return strtol(substr_ksm_zero + value_pos, NULL, 10);
+}
+
+long ksm_get_self_merging_pages(void)
+{
+ int proc_self_ksm_merging_pages_fd;
+ char buf[10];
+ ssize_t ret;
+
+ proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
+ O_RDONLY);
+ if (proc_self_ksm_merging_pages_fd < 0)
+ return -errno;
+
+ ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
+ close(proc_self_ksm_merging_pages_fd);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
+long ksm_get_full_scans(void)
+{
+ int ksm_full_scans_fd;
+ char buf[10];
+ ssize_t ret;
+
+ ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
+ if (ksm_full_scans_fd < 0)
+ return -errno;
+
+ ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
+ close(ksm_full_scans_fd);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
+int ksm_use_zero_pages(void)
+{
+ int ksm_use_zero_pages_fd;
+ ssize_t ret;
+
+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+ if (ksm_use_zero_pages_fd < 0)
+ return -errno;
+
+ ret = write(ksm_use_zero_pages_fd, "1", 1);
+ close(ksm_use_zero_pages_fd);
+ return ret == 1 ? 0 : -errno;
+}
+
+int ksm_start(void)
+{
+ int ksm_fd;
+ ssize_t ret;
+ long start_scans, end_scans;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ /* Wait for two full scans such that any possible merging happened. */
+ start_scans = ksm_get_full_scans();
+ if (start_scans < 0) {
+ close(ksm_fd);
+ return start_scans;
+ }
+ ret = write(ksm_fd, "1", 1);
+ close(ksm_fd);
+ if (ret != 1)
+ return -errno;
+ do {
+ end_scans = ksm_get_full_scans();
+ if (end_scans < 0)
+ return end_scans;
+ } while (end_scans < start_scans + 2);
+
+ return 0;
+}
+
+int ksm_stop(void)
+{
+ int ksm_fd;
+ ssize_t ret;
+
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ return -errno;
+
+ ret = write(ksm_fd, "2", 1);
+ close(ksm_fd);
+ return ret == 1 ? 0 : -errno;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index b55d1809debc..07c4acfd84b6 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -18,6 +18,9 @@
#define PM_SWAP BIT_ULL(62)
#define PM_PRESENT BIT_ULL(63)
+#define KPF_COMPOUND_HEAD BIT_ULL(15)
+#define KPF_COMPOUND_TAIL BIT_ULL(16)
+#define KPF_THP BIT_ULL(22)
/*
* Ignore the checkpatch warning, we must read from x but don't want to do
* anything with it in order to trigger a read page fault. We therefore must use
@@ -85,6 +88,7 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
int64_t allocate_transhuge(void *ptr, int pagemap_fd);
unsigned long default_huge_page_size(void);
int detect_hugetlb_page_sizes(size_t sizes[], int max);
+int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags);
int uffd_register(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor);
@@ -93,6 +97,7 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
unsigned long get_free_hugepages(void);
bool check_vmflag_io(void *addr);
+bool check_vmflag_pfnmap(void *addr);
int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
int query_procmap(struct procmap_fd *procmap);
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
@@ -126,9 +131,21 @@ static inline void log_test_result(int result)
ksft_test_result_report(result, "%s\n", test_name);
}
+static inline int sz2ord(size_t size, size_t pagesize)
+{
+ return __builtin_ctzll(size / pagesize);
+}
+
void *sys_mremap(void *old_address, unsigned long old_size,
unsigned long new_size, int flags, void *new_address);
+long ksm_get_self_zero_pages(void);
+long ksm_get_self_merging_pages(void);
+long ksm_get_full_scans(void);
+int ksm_use_zero_pages(void);
+int ksm_start(void);
+int ksm_stop(void);
+
/*
* On ppc64 this will only work with radix 2M hugepage size
*/
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 6e4fef560873..067265b0a554 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -22,10 +22,6 @@
#define PORT_BASE 8000
-#ifndef __maybe_unused
-# define __maybe_unused __attribute__ ((__unused__))
-#endif
-
static __maybe_unused void pair_udp_setfilter(int fd)
{
/* the filter below checks for all of the following conditions that
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
index e03fe1b9bba2..b3a72f0ac522 100644
--- a/tools/testing/selftests/perf_events/watermark_signal.c
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -17,8 +17,6 @@
#include "../kselftest_harness.h"
-#define __maybe_unused __attribute__((__unused__))
-
static int sigio_count;
static void handle_sigio(int signum __maybe_unused,
diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c
index 94bba4553130..a546475db550 100644
--- a/tools/testing/selftests/proc/proc-maps-race.c
+++ b/tools/testing/selftests/proc/proc-maps-race.c
@@ -32,6 +32,8 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -317,6 +319,25 @@ static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self,
strcmp(restored_first_line->text, self->first_line.text) == 0;
}
+static bool query_addr_at(int maps_fd, void *addr,
+ unsigned long *vma_start, unsigned long *vma_end)
+{
+ struct procmap_query q;
+
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ /* Find the VMA at the split address */
+ q.query_addr = (unsigned long long)addr;
+ q.query_flags = 0;
+ if (ioctl(maps_fd, PROCMAP_QUERY, &q))
+ return false;
+
+ *vma_start = q.vma_start;
+ *vma_end = q.vma_end;
+
+ return true;
+}
+
static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self)
{
return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC,
@@ -559,6 +580,8 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split)
do {
bool last_line_changed;
bool first_line_changed;
+ unsigned long vma_start;
+ unsigned long vma_end;
ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line));
@@ -595,6 +618,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split)
first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0;
ASSERT_EQ(last_line_changed, first_line_changed);
+ /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
+ ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size,
+ &vma_start, &vma_end));
+ /*
+ * The vma at the split address can be either the same as
+ * original one (if read before the split) or the same as the
+ * first line in the second page (if read after the split).
+ */
+ ASSERT_TRUE((vma_start == self->last_line.start_addr &&
+ vma_end == self->last_line.end_addr) ||
+ (vma_start == split_first_line.start_addr &&
+ vma_end == split_first_line.end_addr));
+
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
end_test_iteration(&end_ts, self->verbose);
} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
@@ -636,6 +672,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize)
clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts);
start_test_loop(&start_ts, self->verbose);
do {
+ unsigned long vma_start;
+ unsigned long vma_end;
+
ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line));
/* Check if we read vmas after shrinking it */
@@ -662,6 +701,16 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize)
"Expand result invalid", self));
}
+ /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
+ ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr, &vma_start, &vma_end));
+ /*
+ * The vma should stay at the same address and have either the
+ * original size of 3 pages or 1 page if read after shrinking.
+ */
+ ASSERT_TRUE(vma_start == self->last_line.start_addr &&
+ (vma_end - vma_start == self->page_size * 3 ||
+ vma_end - vma_start == self->page_size));
+
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
end_test_iteration(&end_ts, self->verbose);
} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
@@ -703,6 +752,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap)
clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts);
start_test_loop(&start_ts, self->verbose);
do {
+ unsigned long vma_start;
+ unsigned long vma_end;
+
ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line));
/* Check if we read vmas after remapping it */
@@ -729,6 +781,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap)
"Remap restore result invalid", self));
}
+ /* Check if PROCMAP_QUERY ioclt() finds the right VMA */
+ ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size,
+ &vma_start, &vma_end));
+ /*
+ * The vma should either stay at the same address and have the
+ * original size of 3 pages or we should find the remapped vma
+ * at the remap destination address with size of 1 page.
+ */
+ ASSERT_TRUE((vma_start == self->last_line.start_addr &&
+ vma_end - vma_start == self->page_size * 3) ||
+ (vma_start == self->last_line.start_addr + self->page_size &&
+ vma_end - vma_start == self->page_size));
+
clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
end_test_iteration(&end_ts, self->verbose);
} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h
index 36545d1567f1..a852e0b7153e 100644
--- a/tools/testing/selftests/ublk/utils.h
+++ b/tools/testing/selftests/ublk/utils.h
@@ -2,8 +2,6 @@
#ifndef KUBLK_UTILS_H
#define KUBLK_UTILS_H
-#define __maybe_unused __attribute__((unused))
-
#ifndef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h
index f67d47d32857..7d0fadef0f11 100644
--- a/tools/testing/shared/linux/maple_tree.h
+++ b/tools/testing/shared/linux/maple_tree.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0+ */
-#define atomic_t int32_t
-#define atomic_inc(x) uatomic_inc(x)
-#define atomic_read(x) uatomic_read(x)
-#define atomic_set(x, y) uatomic_set(x, y)
+#include <linux/atomic.h>
+
#define U8_MAX UCHAR_MAX
#include "../../../../include/linux/maple_tree.h"
diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk
index 923ee2492256..5bcdf26c8a9d 100644
--- a/tools/testing/shared/shared.mk
+++ b/tools/testing/shared/shared.mk
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.arch
-CFLAGS += -I../shared -I. -I../../include -I../../../lib -g -Og -Wall \
+CFLAGS += -I../shared -I. -I../../include -I../../arch/$(SRCARCH)/include \
+ -I../../../lib -g -Og -Wall \
-D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined
+CFLAGS += $(EXTRA_CFLAGS)
LDFLAGS += -fsanitize=address -fsanitize=undefined
LDLIBS += -lpthread -lurcu
LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o
@@ -11,6 +14,7 @@ SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \
generated/bit-length.h generated/autoconf.h \
../../include/linux/*.h \
../../include/asm/*.h \
+ ../../arch/$(SRCARCH)/include/asm/*.h \
../../../include/linux/xarray.h \
../../../include/linux/maple_tree.h \
../../../include/linux/radix-tree.h \
diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h
deleted file mode 100644
index 788c597c4fde..000000000000
--- a/tools/testing/vma/linux/atomic.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#ifndef _LINUX_ATOMIC_H
-#define _LINUX_ATOMIC_H
-
-#define atomic_t int32_t
-#define atomic_inc(x) uatomic_inc(x)
-#define atomic_read(x) uatomic_read(x)
-#define atomic_set(x, y) uatomic_set(x, y)
-#define U8_MAX UCHAR_MAX
-
-#ifndef atomic_cmpxchg_relaxed
-#define atomic_cmpxchg_relaxed uatomic_cmpxchg
-#define atomic_cmpxchg_release uatomic_cmpxchg
-#endif /* atomic_cmpxchg_relaxed */
-
-#endif /* _LINUX_ATOMIC_H */
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 3639aa8dd2b0..437d2a1013be 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -21,6 +21,7 @@
#include <stdlib.h>
+#include <linux/atomic.h>
#include <linux/list.h>
#include <linux/maple_tree.h>
#include <linux/mm.h>
@@ -249,6 +250,14 @@ struct mutex {};
#define DEFINE_MUTEX(mutexname) \
struct mutex mutexname = {}
+#define DECLARE_BITMAP(name, bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+#define NUM_MM_FLAG_BITS (64)
+typedef struct {
+ __private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS);
+} mm_flags_t;
+
struct mm_struct {
struct maple_tree mm_mt;
int map_count; /* number of VMAs */
@@ -260,7 +269,7 @@ struct mm_struct {
unsigned long def_flags;
- unsigned long flags; /* Must use atomic bitops to access */
+ mm_flags_t flags; /* Must use mm_flags_* helpers to access */
};
struct vm_area_struct;
@@ -467,13 +476,21 @@ struct vm_operations_struct {
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr, pgoff_t *ilx);
#endif
+#ifdef CONFIG_FIND_NORMAL_PAGE
/*
- * Called by vm_normal_page() for special PTEs to find the
- * page for @addr. This is useful if the default behavior
- * (using pte_page()) would not find the correct page.
+ * Called by vm_normal_page() for special PTEs in @vma at @addr. This
+ * allows for returning a "normal" page from vm_normal_page() even
+ * though the PTE indicates that the "struct page" either does not exist
+ * or should not be touched: "special".
+ *
+ * Do not add new users: this really only works when a "normal" page
+ * was mapped, but then the PTE got changed to something weird (+
+ * marked special) that would not make pte_pfn() identify the originally
+ * inserted page.
*/
- struct page *(*find_special_page)(struct vm_area_struct *vma,
- unsigned long addr);
+ struct page *(*find_normal_page)(struct vm_area_struct *vma,
+ unsigned long addr);
+#endif /* CONFIG_FIND_NORMAL_PAGE */
};
struct vm_unmapped_area_info {
@@ -1325,6 +1342,13 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
{
}
+# define ACCESS_PRIVATE(p, member) ((p)->member)
+
+static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
+{
+ return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
+}
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
@@ -1355,7 +1379,7 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
{
/* If MDWE is disabled, we have nothing to deny. */
- if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
+ if (mm_flags_test(MMF_HAS_MDWE, current->mm))
return false;
/* If the new VMA is not executable, we have nothing to deny. */
@@ -1375,15 +1399,8 @@ static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
static inline int mapping_map_writable(struct address_space *mapping)
{
- int c = atomic_read(&mapping->i_mmap_writable);
-
- /* Derived from the raw_atomic_inc_unless_negative() implementation. */
- do {
- if (c < 0)
- return -EPERM;
- } while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
-
- return 0;
+ return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
+ 0 : -EPERM;
}
static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)