diff options
Diffstat (limited to 'tools/testing')
59 files changed, 3910 insertions, 746 deletions
diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c index 68f1a75cd72c..c55f67dd367d 100644 --- a/tools/testing/memblock/tests/alloc_api.c +++ b/tools/testing/memblock/tests/alloc_api.c @@ -134,7 +134,7 @@ static int alloc_top_down_before_check(void) PREFIX_PUSH(); setup_memblock(); - memblock_reserve(memblock_end_of_DRAM() - total_size, r1_size); + memblock_reserve_kern(memblock_end_of_DRAM() - total_size, r1_size); allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES); @@ -182,7 +182,7 @@ static int alloc_top_down_after_check(void) total_size = r1.size + r2_size; - memblock_reserve(r1.base, r1.size); + memblock_reserve_kern(r1.base, r1.size); allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES); @@ -231,8 +231,8 @@ static int alloc_top_down_second_fit_check(void) total_size = r1.size + r2.size + r3_size; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES); @@ -285,8 +285,8 @@ static int alloc_in_between_generic_check(void) total_size = r1.size + r2.size + r3_size; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES); @@ -422,7 +422,7 @@ static int alloc_limited_space_generic_check(void) setup_memblock(); /* Simulate almost-full memory */ - memblock_reserve(memblock_start_of_DRAM(), reserved_size); + memblock_reserve_kern(memblock_start_of_DRAM(), reserved_size); allocated_ptr = run_memblock_alloc(available_size, SMP_CACHE_BYTES); @@ -608,7 +608,7 @@ static int alloc_bottom_up_before_check(void) PREFIX_PUSH(); setup_memblock(); - memblock_reserve(memblock_start_of_DRAM() + r1_size, r2_size); + memblock_reserve_kern(memblock_start_of_DRAM() + r1_size, r2_size); allocated_ptr = run_memblock_alloc(r1_size, SMP_CACHE_BYTES); @@ -655,7 +655,7 @@ static int alloc_bottom_up_after_check(void) total_size = r1.size + r2_size; - memblock_reserve(r1.base, r1.size); + memblock_reserve_kern(r1.base, r1.size); allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES); @@ -705,8 +705,8 @@ static int alloc_bottom_up_second_fit_check(void) total_size = r1.size + r2.size + r3_size; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES); diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c index 3ef9486da8a0..e5362cfd2ff3 100644 --- a/tools/testing/memblock/tests/alloc_helpers_api.c +++ b/tools/testing/memblock/tests/alloc_helpers_api.c @@ -163,7 +163,7 @@ static int alloc_from_top_down_no_space_above_check(void) min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2; /* No space above this address */ - memblock_reserve(min_addr, r2_size); + memblock_reserve_kern(min_addr, r2_size); allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr); @@ -199,7 +199,7 @@ static int alloc_from_top_down_min_addr_cap_check(void) start_addr = (phys_addr_t)memblock_start_of_DRAM(); min_addr = start_addr - SMP_CACHE_BYTES * 3; - memblock_reserve(start_addr + r1_size, MEM_SIZE - r1_size); + memblock_reserve_kern(start_addr + r1_size, MEM_SIZE - r1_size); allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr); diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 49bb416d34ff..562e4701b0e0 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -324,7 +324,7 @@ static int alloc_nid_min_reserved_generic_check(void) min_addr = max_addr - r2_size; reserved_base = min_addr - r1_size; - memblock_reserve(reserved_base, r1_size); + memblock_reserve_kern(reserved_base, r1_size); allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, min_addr, max_addr, @@ -374,7 +374,7 @@ static int alloc_nid_max_reserved_generic_check(void) max_addr = memblock_end_of_DRAM() - r1_size; min_addr = max_addr - r2_size; - memblock_reserve(max_addr, r1_size); + memblock_reserve_kern(max_addr, r1_size); allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES, min_addr, max_addr, @@ -436,8 +436,8 @@ static int alloc_nid_top_down_reserved_with_space_check(void) min_addr = r2.base + r2.size; max_addr = r1.base; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, min_addr, max_addr, @@ -499,8 +499,8 @@ static int alloc_nid_reserved_full_merge_generic_check(void) min_addr = r2.base + r2.size; max_addr = r1.base; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, min_addr, max_addr, @@ -563,8 +563,8 @@ static int alloc_nid_top_down_reserved_no_space_check(void) min_addr = r2.base + r2.size; max_addr = r1.base; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, min_addr, max_addr, @@ -909,8 +909,8 @@ static int alloc_nid_bottom_up_reserved_with_space_check(void) min_addr = r2.base + r2.size; max_addr = r1.base; - memblock_reserve(r1.base, r1.size); - memblock_reserve(r2.base, r2.size); + memblock_reserve_kern(r1.base, r1.size); + memblock_reserve_kern(r2.base, r2.size); allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES, min_addr, max_addr, diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index bc30050227fd..2c0b38301253 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35475,15 +35475,65 @@ static void check_dfs_preorder(struct maple_tree *mt) } /* End of depth first search tests */ +/* get height of the lowest non-leaf node with free space */ +static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry) +{ + struct ma_state *mas = wr_mas->mas; + char vacant_height = 0; + enum maple_type type; + unsigned long *pivots; + unsigned long min = 0; + unsigned long max = ULONG_MAX; + unsigned char offset; + + /* start traversal */ + mas_reset(mas); + mas_start(mas); + if (!xa_is_node(mas_root(mas))) + return 0; + + type = mte_node_type(mas->node); + wr_mas->type = type; + while (!ma_is_leaf(type)) { + mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max); + offset = mas->offset; + mas->end = mas_data_end(mas); + pivots = ma_pivots(mte_to_node(mas->node), type); + + if (pivots) { + if (offset) + min = pivots[mas->offset - 1]; + if (offset < mas->end) + max = pivots[mas->offset]; + } + wr_mas->r_max = offset < mas->end ? pivots[offset] : mas->max; + + /* detect spanning write */ + if (mas_is_span_wr(wr_mas)) + break; + + if (mas->end < mt_slot_count(mas->node) - 1) + vacant_height = mas->depth + 1; + + mas_descend(mas); + type = mte_node_type(mas->node); + mas->depth++; + } + + return vacant_height; +} + /* Preallocation testing */ static noinline void __init check_prealloc(struct maple_tree *mt) { unsigned long i, max = 100; unsigned long allocated; unsigned char height; + unsigned char vacant_height; struct maple_node *mn; void *ptr = check_prealloc; MA_STATE(mas, mt, 10, 20); + MA_WR_STATE(wr_mas, &mas, ptr); mt_set_non_kernel(1000); for (i = 0; i <= max; i++) @@ -35494,8 +35544,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); + vacant_height = get_vacant_height(&wr_mas, ptr); MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); @@ -35503,8 +35554,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); + vacant_height = get_vacant_height(&wr_mas, ptr); MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); @@ -35514,7 +35566,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1 + height * 3); + vacant_height = get_vacant_height(&wr_mas, ptr); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mn->parent = ma_parent_ptr(mn); @@ -35527,7 +35580,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1 + height * 3); + vacant_height = get_vacant_height(&wr_mas, ptr); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); @@ -35540,7 +35594,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1 + height * 3); + vacant_height = get_vacant_height(&wr_mas, ptr); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mas_push_node(&mas, mn); @@ -35553,7 +35608,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1 + height * 3); + vacant_height = get_vacant_height(&wr_mas, ptr); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -35578,7 +35634,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1 + height * 2); + vacant_height = get_vacant_height(&wr_mas, ptr); + MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); @@ -35595,8 +35652,14 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); + vacant_height = get_vacant_height(&wr_mas, ptr); MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + /* + * vacant height cannot be used to compute the number of nodes needed + * as the root contains two entries which means it is on the verge of + * insufficiency. The worst case full height of the tree is needed. + */ + MT_BUG_ON(mt, allocated != height * 3 + 1); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mas_set_range(&mas, 0, 200); @@ -36248,6 +36311,45 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt) extern void test_kmem_cache_bulk(void); +static inline void check_spanning_store_height(struct maple_tree *mt) +{ + int index = 0; + MA_STATE(mas, mt, 0, 0); + mas_lock(&mas); + while (mt_height(mt) != 3) { + mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL); + mas_set(&mas, ++index); + } + mas_set_range(&mas, 90, 140); + mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL); + MT_BUG_ON(mt, mas_mt_height(&mas) != 2); + mas_unlock(&mas); +} + +/* + * Test to check the path of a spanning rebalance which results in + * a collapse where the rebalancing of the child node leads to + * insufficieny in the parent node. + */ +static void check_collapsing_rebalance(struct maple_tree *mt) +{ + int i = 0; + MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX); + + /* create a height 6 tree */ + while (mt_height(mt) < 6) { + mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL); + i += 9; + } + + /* delete all entries one at a time, starting from the right */ + do { + mas_erase(&mas); + } while (mas_prev(&mas, 0) != NULL); + + mtree_unlock(mt); +} + /* callback function used for check_nomem_writer_race() */ static void writer2(void *maple_tree) { @@ -36415,6 +36517,14 @@ void farmer_tests(void) mtree_destroy(&tree); mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_spanning_store_height(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_collapsing_rebalance(&tree); + mtree_destroy(&tree); + + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_null_expand(&tree); mtree_destroy(&tree); diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 1b897152bab6..e01584c2189a 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -21,14 +21,15 @@ TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h include ../lib.mk +include lib/libcgroup.mk -$(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_cpu: cgroup_util.c -$(OUTPUT)/test_cpuset: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c -$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c -$(OUTPUT)/test_kill: cgroup_util.c -$(OUTPUT)/test_kmem: cgroup_util.c -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_pids: cgroup_util.c -$(OUTPUT)/test_zswap: cgroup_util.c +$(OUTPUT)/test_core: $(LIBCGROUP_O) +$(OUTPUT)/test_cpu: $(LIBCGROUP_O) +$(OUTPUT)/test_cpuset: $(LIBCGROUP_O) +$(OUTPUT)/test_freezer: $(LIBCGROUP_O) +$(OUTPUT)/test_hugetlb_memcg: $(LIBCGROUP_O) +$(OUTPUT)/test_kill: $(LIBCGROUP_O) +$(OUTPUT)/test_kmem: $(LIBCGROUP_O) +$(OUTPUT)/test_memcontrol: $(LIBCGROUP_O) +$(OUTPUT)/test_pids: $(LIBCGROUP_O) +$(OUTPUT)/test_zswap: $(LIBCGROUP_O) diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 1e2d46636a0c..8832f3d1cb61 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -17,10 +17,10 @@ #include <unistd.h> #include "cgroup_util.h" -#include "../clone3/clone3_selftests.h" +#include "../../clone3/clone3_selftests.h" /* Returns read len on success, or -errno on failure. */ -static ssize_t read_text(const char *path, char *buf, size_t max_len) +ssize_t read_text(const char *path, char *buf, size_t max_len) { ssize_t len; int fd; @@ -39,7 +39,7 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len) } /* Returns written len on success, or -errno on failure. */ -static ssize_t write_text(const char *path, char *buf, ssize_t len) +ssize_t write_text(const char *path, char *buf, ssize_t len) { int fd; @@ -217,7 +217,8 @@ int cg_write_numeric(const char *cgroup, const char *control, long value) return cg_write(cgroup, control, buf); } -int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) +static int cg_find_root(char *root, size_t len, const char *controller, + bool *nsdelegate) { char buf[10 * PAGE_SIZE]; char *fs, *mount, *type, *options; @@ -236,18 +237,37 @@ int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) options = strtok(NULL, delim); strtok(NULL, delim); strtok(NULL, delim); - - if (strcmp(type, "cgroup2") == 0) { - strncpy(root, mount, len); - if (nsdelegate) - *nsdelegate = !!strstr(options, "nsdelegate"); - return 0; + if (strcmp(type, "cgroup") == 0) { + if (!controller || !strstr(options, controller)) + continue; + } else if (strcmp(type, "cgroup2") == 0) { + if (controller && + cg_read_strstr(mount, "cgroup.controllers", controller)) + continue; + } else { + continue; } + strncpy(root, mount, len); + + if (nsdelegate) + *nsdelegate = !!strstr(options, "nsdelegate"); + return 0; + } return -1; } +int cg_find_controller_root(char *root, size_t len, const char *controller) +{ + return cg_find_root(root, len, controller, NULL); +} + +int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) +{ + return cg_find_root(root, len, NULL, nsdelegate); +} + int cg_create(const char *cgroup) { return mkdir(cgroup, 0755); @@ -488,84 +508,6 @@ int cg_run_nowait(const char *cgroup, return pid; } -int get_temp_fd(void) -{ - return open(".", O_TMPFILE | O_RDWR | O_EXCL); -} - -int alloc_pagecache(int fd, size_t size) -{ - char buf[PAGE_SIZE]; - struct stat st; - int i; - - if (fstat(fd, &st)) - goto cleanup; - - size += st.st_size; - - if (ftruncate(fd, size)) - goto cleanup; - - for (i = 0; i < size; i += sizeof(buf)) - read(fd, buf, sizeof(buf)); - - return 0; - -cleanup: - return -1; -} - -int alloc_anon(const char *cgroup, void *arg) -{ - size_t size = (unsigned long)arg; - char *buf, *ptr; - - buf = malloc(size); - for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) - *ptr = 0; - - free(buf); - return 0; -} - -int is_swap_enabled(void) -{ - char buf[PAGE_SIZE]; - const char delim[] = "\n"; - int cnt = 0; - char *line; - - if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) - return -1; - - for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) - cnt++; - - return cnt > 1; -} - -int set_oom_adj_score(int pid, int score) -{ - char path[PATH_MAX]; - int fd, len; - - sprintf(path, "/proc/%d/oom_score_adj", pid); - - fd = open(path, O_WRONLY | O_APPEND); - if (fd < 0) - return fd; - - len = dprintf(fd, "%d", score); - if (len < 0) { - close(fd); - return len; - } - - close(fd); - return 0; -} - int proc_mount_contains(const char *option) { char buf[4 * PAGE_SIZE]; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index 19b131ee7707..adb2bc193183 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -2,9 +2,9 @@ #include <stdbool.h> #include <stdlib.h> -#include "../kselftest.h" - +#ifndef PAGE_SIZE #define PAGE_SIZE 4096 +#endif #define MB(x) (x << 20) @@ -21,6 +21,10 @@ static inline int values_close(long a, long b, int err) return labs(a - b) <= (a + b) / 100 * err; } +extern ssize_t read_text(const char *path, char *buf, size_t max_len); +extern ssize_t write_text(const char *path, char *buf, ssize_t len); + +extern int cg_find_controller_root(char *root, size_t len, const char *controller); extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate); extern char *cg_name(const char *root, const char *name); extern char *cg_name_indexed(const char *root, const char *name, int index); @@ -49,11 +53,6 @@ extern int cg_enter_current_thread(const char *cgroup); extern int cg_run_nowait(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); -extern int get_temp_fd(void); -extern int alloc_pagecache(int fd, size_t size); -extern int alloc_anon(const char *cgroup, void *arg); -extern int is_swap_enabled(void); -extern int set_oom_adj_score(int pid, int score); extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); int proc_mount_contains(const char *option); diff --git a/tools/testing/selftests/cgroup/lib/libcgroup.mk b/tools/testing/selftests/cgroup/lib/libcgroup.mk new file mode 100644 index 000000000000..7a73007204c3 --- /dev/null +++ b/tools/testing/selftests/cgroup/lib/libcgroup.mk @@ -0,0 +1,19 @@ +CGROUP_DIR := $(selfdir)/cgroup + +LIBCGROUP_C := lib/cgroup_util.c + +LIBCGROUP_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBCGROUP_C)) + +LIBCGROUP_O_DIRS := $(shell dirname $(LIBCGROUP_O) | uniq) + +CFLAGS += -I$(CGROUP_DIR)/lib/include + +EXTRA_HDRS := $(selfdir)/clone3/clone3_selftests.h + +$(LIBCGROUP_O_DIRS): + mkdir -p $@ + +$(LIBCGROUP_O): $(OUTPUT)/%.o : $(CGROUP_DIR)/%.c $(EXTRA_HDRS) $(LIBCGROUP_O_DIRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(LIBCGROUP_O) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 16f5d74ae762..a680f773f2d5 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -24,6 +24,84 @@ static bool has_localevents; static bool has_recursiveprot; +int get_temp_fd(void) +{ + return open(".", O_TMPFILE | O_RDWR | O_EXCL); +} + +int alloc_pagecache(int fd, size_t size) +{ + char buf[PAGE_SIZE]; + struct stat st; + int i; + + if (fstat(fd, &st)) + goto cleanup; + + size += st.st_size; + + if (ftruncate(fd, size)) + goto cleanup; + + for (i = 0; i < size; i += sizeof(buf)) + read(fd, buf, sizeof(buf)); + + return 0; + +cleanup: + return -1; +} + +int alloc_anon(const char *cgroup, void *arg) +{ + size_t size = (unsigned long)arg; + char *buf, *ptr; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + free(buf); + return 0; +} + +int is_swap_enabled(void) +{ + char buf[PAGE_SIZE]; + const char delim[] = "\n"; + int cnt = 0; + char *line; + + if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) + return -1; + + for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) + cnt++; + + return cnt > 1; +} + +int set_oom_adj_score(int pid, int score) +{ + char path[PATH_MAX]; + int fd, len; + + sprintf(path, "/proc/%d/oom_score_adj", pid); + + fd = open(path, O_WRONLY | O_APPEND); + if (fd < 0) + return fd; + + len = dprintf(fd, "%d", score); + if (len < 0) { + close(fd); + return len; + } + + close(fd); + return 0; +} + /* * This test creates two nested cgroups with and without enabling * the memory controller. @@ -380,10 +458,11 @@ static bool reclaim_until(const char *memcg, long goal); * * Then it checks actual memory usages and expects that: * A/B memory.current ~= 50M - * A/B/C memory.current ~= 29M - * A/B/D memory.current ~= 21M - * A/B/E memory.current ~= 0 - * A/B/F memory.current = 0 + * A/B/C memory.current ~= 29M [memory.events:low > 0] + * A/B/D memory.current ~= 21M [memory.events:low > 0] + * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot, + * undefined otherwise] + * A/B/F memory.current = 0 [memory.events:low == 0] * (for origin of the numbers, see model in memcg_protection.m.) * * After that it tries to allocate more than there is @@ -495,10 +574,10 @@ static int test_memcg_protection(const char *root, bool min) for (i = 0; i < ARRAY_SIZE(children); i++) c[i] = cg_read_long(children[i], "memory.current"); - if (!values_close(c[0], MB(29), 10)) + if (!values_close(c[0], MB(29), 15)) goto cleanup; - if (!values_close(c[1], MB(21), 10)) + if (!values_close(c[1], MB(21), 20)) goto cleanup; if (c[3] != 0) @@ -525,7 +604,14 @@ static int test_memcg_protection(const char *root, bool min) goto cleanup; } + /* + * Child 2 has memory.low=0, but some low protection may still be + * distributed down from its parent with memory.low=50M if cgroup2 + * memory_recursiveprot mount option is enabled. Ignore the low + * event count in this case. + */ for (i = 0; i < ARRAY_SIZE(children); i++) { + int ignore_low_events_index = has_recursiveprot ? 2 : -1; int no_low_events_index = 1; long low, oom; @@ -534,6 +620,8 @@ static int test_memcg_protection(const char *root, bool min) if (oom) goto cleanup; + if (i == ignore_low_events_index) + continue; if (i <= no_low_events_index && low <= 0) goto cleanup; if (i > no_low_events_index && low) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index ecbf07afc6dd..ff21524be458 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -3,7 +3,7 @@ TEST_GEN_FILES += access_memory access_memory_even -TEST_FILES = _chk_dependency.sh _damon_sysfs.py +TEST_FILES = _damon_sysfs.py # functionality tests TEST_PROGS += sysfs.sh diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh deleted file mode 100644 index dda3a87dc00a..000000000000 --- a/tools/testing/selftests/damon/_chk_dependency.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}') -if [ "$DBGFS" = "" ] -then - echo "debugfs not mounted" - exit $ksft_skip -fi - -DBGFS+="/damon" - -if [ $EUID -ne 0 ]; -then - echo "Run as root" - exit $ksft_skip -fi - -if [ ! -d "$DBGFS" ] -then - echo "$DBGFS not found" - exit $ksft_skip -fi - -if [ -f "$DBGFS/monitor_on_DEPRECATED" ] -then - monitor_on_file="monitor_on_DEPRECATED" -else - monitor_on_file="monitor_on" -fi - -for f in attrs target_ids "$monitor_on_file" -do - if [ ! -f "$DBGFS/$f" ] - then - echo "$f not found" - exit 1 - fi -done - -permission_error="Operation not permitted" -for f in attrs target_ids "$monitor_on_file" -do - status=$( cat "$DBGFS/$f" 2>&1 ) - if [ "${status#*$permission_error}" != "$status" ]; then - echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?" - exit $ksft_skip - fi -done diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 6e136dc3df19..5b1cb6b3ce4e 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -15,6 +15,10 @@ if sysfs_root is None: print('Seems sysfs not mounted?') exit(ksft_skip) +if not os.path.exists(sysfs_root): + print('Seems DAMON disabled?') + exit(ksft_skip) + def write_file(path, string): "Returns error string if failed, or None otherwise" string = '%s' % string @@ -420,11 +424,16 @@ class Kdamond: tried_regions = [] tried_regions_dir = os.path.join( scheme.sysfs_dir(), 'tried_regions') + region_indices = [] for filename in os.listdir( os.path.join(scheme.sysfs_dir(), 'tried_regions')): tried_region_dir = os.path.join(tried_regions_dir, filename) if not os.path.isdir(tried_region_dir): continue + region_indices.append(int(filename)) + for region_idx in sorted(region_indices): + tried_region_dir = os.path.join(tried_regions_dir, + '%d' % region_idx) region_values = [] for f in ['start', 'end', 'nr_accesses', 'age']: content, err = read_file( diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh deleted file mode 100644 index 54d45791b0d9..000000000000 --- a/tools/testing/selftests/damon/_debugfs_common.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -test_write_result() { - file=$1 - content=$2 - orig_content=$3 - expect_reason=$4 - expected=$5 - - if [ "$expected" = "0" ] - then - echo "$content" > "$file" - else - echo "$content" > "$file" 2> /dev/null - fi - if [ $? -ne "$expected" ] - then - echo "writing $content to $file doesn't return $expected" - echo "expected because: $expect_reason" - echo "$orig_content" > "$file" - exit 1 - fi -} - -test_write_succ() { - test_write_result "$1" "$2" "$3" "$4" 0 -} - -test_write_fail() { - test_write_result "$1" "$2" "$3" "$4" 1 -} - -test_content() { - file=$1 - orig_content=$2 - expected=$3 - expect_reason=$4 - - content=$(cat "$file") - if [ "$content" != "$expected" ] - then - echo "reading $file expected $expected but $content" - echo "expected because: $expect_reason" - echo "$orig_content" > "$file" - exit 1 - fi -} - -source ./_chk_dependency.sh - -damon_onoff="$DBGFS/monitor_on" -if [ -f "$DBGFS/monitor_on_DEPRECATED" ] -then - damon_onoff="$DBGFS/monitor_on_DEPRECATED" -else - damon_onoff="$DBGFS/monitor_on" -fi - -if [ $(cat "$damon_onoff") = "on" ] -then - echo "monitoring is on" - exit $ksft_skip -fi diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c index e8e0ef1460d2..73e0a4d4fb2f 100644 --- a/tools/testing/selftests/filesystems/anon_inode_test.c +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -7,7 +7,7 @@ #include <sys/stat.h> #include "../kselftest_harness.h" -#include "overlayfs/wrappers.h" +#include "wrappers.h" TEST(anon_inode_no_chown) { diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c index 85acb4e3ef00..72d51ad0ee0e 100644 --- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c +++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c @@ -50,7 +50,7 @@ TEST(eventfd_check_flag_rdwr) ASSERT_GE(fd, 0); flags = fcntl(fd, F_GETFL); - // since the kernel automatically added O_RDWR. + // The kernel automatically adds the O_RDWR flag. EXPECT_EQ(flags, O_RDWR); close(fd); @@ -85,7 +85,7 @@ TEST(eventfd_check_flag_nonblock) close(fd); } -TEST(eventfd_chek_flag_cloexec_and_nonblock) +TEST(eventfd_check_flag_cloexec_and_nonblock) { int fd, flags; @@ -178,8 +178,7 @@ TEST(eventfd_check_flag_semaphore) // The semaphore could only be obtained from fdinfo. ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n"); if (ret != 0) - ksft_print_msg("eventfd-semaphore check failed, msg: %s\n", - err.msg); + ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg); EXPECT_EQ(ret, 0); close(fd); diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c index 1136f93a9977..01dd89f8e52f 100644 --- a/tools/testing/selftests/filesystems/file_stressor.c +++ b/tools/testing/selftests/filesystems/file_stressor.c @@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2) ssize_t nr_read; /* - * Concurrently read /proc/<pid>/fd/ which rougly does: + * Concurrently read /proc/<pid>/fd/ which roughly does: * * f = fget_task_next(p, &fd); * if (!f) diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config index 259f4fd6b5e2..1f1e63494af9 100644 --- a/tools/testing/selftests/kmod/config +++ b/tools/testing/selftests/kmod/config @@ -1,7 +1,2 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_LKM=m -CONFIG_XFS_FS=m - -# For the module parameter force_init_test is used -CONFIG_TUN=m -CONFIG_BTRFS_FS=m diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 3e786080473d..38b95998e1e6 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -8,6 +8,7 @@ LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c +LIBKVM += lib/lru_gen_util.c LIBKVM += lib/memstress.c LIBKVM += lib/guest_sprintf.c LIBKVM += lib/rbtree.c @@ -70,6 +71,7 @@ TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fastops_test TEST_GEN_PROGS_x86 += x86/fix_hypercall_test TEST_GEN_PROGS_x86 += x86/hwcr_msr_test TEST_GEN_PROGS_x86 += x86/hyperv_clock @@ -82,6 +84,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush TEST_GEN_PROGS_x86 += x86/kvm_clock_test TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/kvm_buslock_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test @@ -222,6 +225,7 @@ OVERRIDE_TARGETS = 1 # importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, # which causes the environment variable to override the makefile). include ../lib.mk +include ../cgroup/lib/libcgroup.mk INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ @@ -275,7 +279,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O) SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS)) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 447e619cf856..da7196fd1b23 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -7,9 +7,11 @@ * This test measures the performance effects of KVM's access tracking. * Access tracking is driven by the MMU notifiers test_young, clear_young, and * clear_flush_young. These notifiers do not have a direct userspace API, - * however the clear_young notifier can be triggered by marking a pages as idle - * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to - * enable access tracking on guest memory. + * however the clear_young notifier can be triggered either by + * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR + * 2. adding a new MGLRU generation using the lru_gen debugfs file. + * This test leverages page_idle to enable access tracking on guest memory + * unless MGLRU is enabled, in which case MGLRU is used. * * To measure performance this test runs a VM with a configurable number of * vCPUs that each touch every page in disjoint regions of memory. Performance @@ -17,10 +19,11 @@ * predefined region. * * Note that a deterministic correctness test of access tracking is not possible - * by using page_idle as it exists today. This is for a few reasons: + * by using page_idle or MGLRU aging as it exists today. This is for a few + * reasons: * - * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This - * means subsequent guest accesses are not guaranteed to see page table + * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush. + * This means subsequent guest accesses are not guaranteed to see page table * updates made by KVM until some time in the future. * * 2. page_idle only operates on LRU pages. Newly allocated pages are not @@ -48,9 +51,17 @@ #include "guest_modes.h" #include "processor.h" +#include "cgroup_util.h" +#include "lru_gen_util.h" + +static const char *TEST_MEMCG_NAME = "access_tracking_perf_test"; + /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; +/* The cgroup memory controller root. Needed for lru_gen-based aging. */ +char cgroup_root[PATH_MAX]; + /* Defines what vCPU threads should do during a given iteration. */ static enum { /* Run the vCPU to access all its memory. */ @@ -65,6 +76,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access; +/* + * If the test should only warn if there are too many idle pages (i.e., it is + * expected). + * -1: Not yet set. + * 0: We do not expect too many idle pages, so FAIL if too many idle pages. + * 1: Having too many idle pages is expected, so merely print a warning if + * too many idle pages are found. + */ +static int idle_pages_warn_only = -1; + +/* Whether or not to use MGLRU instead of page_idle for access tracking */ +static bool use_lru_gen; + +/* Total number of pages to expect in the memcg after touching everything */ +static long test_pages; + +/* Last generation we found the pages in */ +static int lru_gen_last_gen = -1; + struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -123,8 +153,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) "Set page_idle bits for PFN 0x%" PRIx64, pfn); } -static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct memstress_vcpu_args *vcpu_args) +static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx) +{ + char prefix[18] = {}; + + if (vcpu_idx >= 0) + snprintf(prefix, 18, "vCPU%d: ", vcpu_idx); + + TEST_ASSERT(idle_pages_warn_only, + "%sToo many pages still idle (%lu out of %lu)", + prefix, idle_pages, total_pages); + + printf("WARNING: %sToo many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", + prefix, idle_pages, total_pages); +} + +static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -177,27 +223,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * arbitrary; high enough that we ensure most memory access went through * access tracking but low enough as to not make the test too brittle * over time and across architectures. - * - * When running the guest as a nested VM, "warn" instead of asserting - * as the TLB size is effectively unlimited and the KVM doesn't - * explicitly flush the TLB when aging SPTEs. As a result, more pages - * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle >= pages / 10) { -#ifdef __x86_64__ - TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), - "vCPU%d: Too many pages still idle (%lu out of %lu)", - vcpu_idx, still_idle, pages); -#endif - printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " - "this will affect performance results.\n", - vcpu_idx, still_idle, pages); - } + if (still_idle >= pages / 10) + too_many_idle_pages(still_idle, pages, + overlap_memory_access ? -1 : vcpu_idx); close(page_idle_fd); close(pagemap_fd); } +int find_generation(struct memcg_stats *stats, long total_pages) +{ + /* + * For finding the generation that contains our pages, use the same + * 90% threshold that page_idle uses. + */ + int gen = lru_gen_find_generation(stats, total_pages * 9 / 10); + + if (gen >= 0) + return gen; + + if (!idle_pages_warn_only) { + TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).", + total_pages * 9 / 10); + return gen; + } + + /* + * We couldn't find a generation with 90% of guest memory, which can + * happen if access tracking is unreliable. Simply look for a majority + * of pages. + */ + puts("WARNING: Couldn't find a generation with 90% of guest memory. " + "Performance results may not be accurate."); + gen = lru_gen_find_generation(stats, total_pages / 2); + TEST_ASSERT(gen >= 0, + "Could not find a generation with 50%% of guest memory (%ld pages).", + total_pages / 2); + return gen; +} + +static void lru_gen_mark_memory_idle(struct kvm_vm *vm) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + struct memcg_stats stats; + int new_gen; + + /* Make a new generation */ + clock_gettime(CLOCK_MONOTONIC, &ts_start); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + ts_elapsed = timespec_elapsed(ts_start); + + /* Check the generation again */ + new_gen = find_generation(&stats, test_pages); + + /* + * This function should only be invoked with newly-accessed pages, + * so pages should always move to a newer generation. + */ + if (new_gen <= lru_gen_last_gen) { + /* We did not move to a newer generation. */ + long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen, + &stats); + + too_many_idle_pages(min_t(long, idle_pages, test_pages), + test_pages, -1); + } + pr_info("%-30s: %ld.%09lds\n", + "Mark memory idle (lru_gen)", ts_elapsed.tv_sec, + ts_elapsed.tv_nsec); + lru_gen_last_gen = new_gen; +} + static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall) { struct ucall uc; @@ -237,7 +335,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) assert_ucall(vcpu, UCALL_SYNC); break; case ITERATION_MARK_IDLE: - mark_vcpu_memory_idle(vm, vcpu_args); + pageidle_mark_vcpu_memory_idle(vm, vcpu_args); break; } @@ -289,15 +387,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus, static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus) { + if (use_lru_gen) + return lru_gen_mark_memory_idle(vm); + /* * Even though this parallelizes the work across vCPUs, this is still a * very slow operation because page_idle forces the test to mark one pfn - * at a time and the clear_young notifier serializes on the KVM MMU + * at a time and the clear_young notifier may serialize on the KVM MMU * lock. */ pr_debug("Marking VM memory idle (slow)...\n"); iteration_work = ITERATION_MARK_IDLE; - run_iteration(vm, nr_vcpus, "Mark memory idle"); + run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)"); } static void run_test(enum vm_guest_mode mode, void *arg) @@ -309,11 +410,38 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); + /* + * If guest_page_size is larger than the host's page size, the + * guest (memstress) will only fault in a subset of the host's pages. + */ + test_pages = params->nr_vcpus * params->vcpu_memory_bytes / + max(memstress_args.guest_page_size, + (uint64_t)getpagesize()); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); + if (use_lru_gen) { + struct memcg_stats stats; + + /* + * Do a page table scan now. Following initial population, aging + * may not cause the pages to move to a newer generation. Do + * an aging pass now so that future aging passes always move + * pages to a newer generation. + */ + printf("Initial aging pass (lru_gen)\n"); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages, + "Not all pages accounted for (looking for %ld). " + "Was the memcg set up correctly?", test_pages); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory"); + lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME); + lru_gen_last_gen = find_generation(&stats, test_pages); + } + /* As a control, read and write to the populated memory first. */ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory"); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory"); @@ -328,6 +456,37 @@ static void run_test(enum vm_guest_mode mode, void *arg) memstress_destroy_vm(vm); } +static int access_tracking_unreliable(void) +{ +#ifdef __x86_64__ + /* + * When running nested, the TLB size may be effectively unlimited (for + * example, this is the case when running on KVM L0), and KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. + */ + if (this_cpu_has(X86_FEATURE_HYPERVISOR)) { + puts("Skipping idle page count sanity check, because the test is run nested"); + return 1; + } +#endif + /* + * When NUMA balancing is enabled, guest memory will be unmapped to get + * NUMA faults, dropping the Accessed bits. + */ + if (is_numa_balancing_enabled()) { + puts("Skipping idle page count sanity check, because NUMA balancing is enabled"); + return 1; + } + return 0; +} + +static int run_test_for_each_guest_mode(const char *cgroup, void *arg) +{ + for_each_guest_mode(run_test, arg); + return 0; +} + static void help(char *name) { puts(""); @@ -342,11 +501,22 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -w: Control whether the test warns or fails if more than 10%%\n" + " of pages are still seen as idle/old after accessing guest\n" + " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" + " mode, the test fails by default, but switches to warn only\n" + " if NUMA balancing is enabled or the test detects it's running\n" + " in a VM.\n"); backing_src_help("-s"); puts(""); exit(0); } +void destroy_cgroup(char *cg) +{ + printf("Destroying cgroup: %s\n", cg); +} + int main(int argc, char *argv[]) { struct test_params params = { @@ -354,12 +524,13 @@ int main(int argc, char *argv[]) .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, .nr_vcpus = 1, }; + char *new_cg = NULL; int page_idle_fd; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -376,6 +547,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break; + case 'w': + idle_pages_warn_only = + atoi_non_negative("Idle pages warning", + optarg); + break; case 'h': default: help(argv[0]); @@ -383,12 +559,53 @@ int main(int argc, char *argv[]) } } - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "CONFIG_IDLE_PAGE_TRACKING is not enabled"); - close(page_idle_fd); + if (idle_pages_warn_only == -1) + idle_pages_warn_only = access_tracking_unreliable(); + + if (lru_gen_usable()) { + bool cg_created = true; + int ret; - for_each_guest_mode(run_test, ¶ms); + puts("Using lru_gen for aging"); + use_lru_gen = true; + + if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory")) + ksft_exit_skip("Cannot find memory cgroup controller\n"); + + new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME); + printf("Creating cgroup: %s\n", new_cg); + if (cg_create(new_cg)) { + if (errno == EEXIST) { + printf("Found existing cgroup"); + cg_created = false; + } else { + ksft_exit_skip("could not create new cgroup: %s\n", new_cg); + } + } + + /* + * This will fork off a new process to run the test within + * a new memcg, so we need to properly propagate the return + * value up. + */ + ret = cg_run(new_cg, &run_test_for_each_guest_mode, ¶ms); + if (cg_created) + cg_destroy(new_cg); + if (ret < 0) + TEST_FAIL("child did not spawn or was abnormally killed"); + if (ret) + return ret; + } else { + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + __TEST_REQUIRE(page_idle_fd >= 0, + "Couldn't open /sys/kernel/mm/page_idle/bitmap. " + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); + + close(page_idle_fd); + + puts("Using page_idle for aging"); + run_test_for_each_guest_mode(NULL, ¶ms); + } return 0; } diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 93013564428b..bee65ca08721 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -555,6 +555,41 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, #define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat) #define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat) +static inline bool read_smt_control(char *buf, size_t buf_size) +{ + FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r"); + bool ret; + + if (!f) + return false; + + ret = fread(buf, sizeof(*buf), buf_size, f) > 0; + fclose(f); + + return ret; +} + +static inline bool is_smt_possible(void) +{ + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && + (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12))) + return false; + + return true; +} + +static inline bool is_smt_on(void) +{ + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2)) + return true; + + return false; +} + void vm_create_irqchip(struct kvm_vm *vm); static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h new file mode 100644 index 000000000000..d32ff5d8ffd0 --- /dev/null +++ b/tools/testing/selftests/kvm/include/lru_gen_util.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output. + * + * Copyright (C) 2025, Google LLC. + */ +#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H +#define SELFTEST_KVM_LRU_GEN_UTIL_H + +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> + +#include "test_util.h" + +#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */ +#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */ + +#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen" +#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled" +#define LRU_GEN_ENABLED 1 +#define LRU_GEN_MM_WALK 2 + +struct generation_stats { + int gen; + long age_ms; + long nr_anon; + long nr_file; +}; + +struct node_stats { + int node; + int nr_gens; /* Number of populated gens entries. */ + struct generation_stats gens[MAX_NR_GENS]; +}; + +struct memcg_stats { + unsigned long memcg_id; + int nr_nodes; /* Number of populated nodes entries. */ + struct node_stats nodes[MAX_NR_NODES]; +}; + +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg); +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats); +long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats); +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg); +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long total_pages); +bool lru_gen_usable(void); + +#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 77d13d7920cb..c6ef895fbd9a 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void); /* * Whether or not the given source type is shared memory (as opposed to diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 32ab6ca7ec32..b11b5a53ebd5 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -203,6 +203,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) +#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4) #define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0) #define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2) diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 82c11c81a956..008b4169f5e2 100644 --- a/tools/testing/selftests/kvm/include/x86/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -25,19 +25,51 @@ enum sev_guest_state { #define SEV_POLICY_NO_DBG (1UL << 0) #define SEV_POLICY_ES (1UL << 2) +#define SNP_POLICY_SMT (1ULL << 16) +#define SNP_POLICY_RSVD_MBO (1ULL << 17) +#define SNP_POLICY_DBG (1ULL << 19) + #define GHCB_MSR_TERM_REQ 0x100 +static inline bool is_sev_snp_vm(struct kvm_vm *vm) +{ + return vm->type == KVM_X86_SNP_VM; +} + +static inline bool is_sev_es_vm(struct kvm_vm *vm) +{ + return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM; +} + +static inline bool is_sev_vm(struct kvm_vm *vm) +{ + return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM; +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); +void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy); +void snp_vm_launch_update(struct kvm_vm *vm); +void snp_vm_launch_finish(struct kvm_vm *vm); struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu); -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); +void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); /* + * A SEV-SNP VM requires the policy reserved bit to always be set. + * The SMT policy bit is also required to be set based on SMT being + * available and active on the system. + */ +static inline u64 snp_default_policy(void) +{ + return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0); +} + +/* * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long" * instead of a proper struct. The size of the parameter is embedded in the * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by @@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); void sev_vm_init(struct kvm_vm *vm); void sev_es_vm_init(struct kvm_vm *vm); +void snp_vm_init(struct kvm_vm *vm); + +static inline void vmgexit(void) +{ + __asm__ __volatile__("rep; vmmcall"); +} static inline void sev_register_encrypted_memory(struct kvm_vm *vm, struct userspace_mem_region *region) @@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); } +static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, + uint64_t hva, uint64_t size, uint8_t type) +{ + struct kvm_sev_snp_launch_update update_data = { + .uaddr = hva, + .gfn_start = gpa >> PAGE_SHIFT, + .len = size, + .type = type, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data); +} + #endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5649cf2f40e8..a055343a7bf7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -447,6 +447,15 @@ void kvm_set_files_rlimit(uint32_t nr_vcpus) } +static bool is_guest_memfd_required(struct vm_shape shape) +{ +#ifdef __x86_64__ + return shape.type == KVM_X86_SNP_VM; +#else + return false; +#endif +} + struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { @@ -454,7 +463,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; - int i; + int i, flags; kvm_set_files_rlimit(nr_runnable_vcpus); @@ -463,7 +472,15 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, vm = ____vm_create(shape); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + /* + * Force GUEST_MEMFD for the primary memory region if necessary, e.g. + * for CoCo VMs that require GUEST_MEMFD backed private memory. + */ + flags = 0; + if (is_guest_memfd_required(shape)) + flags |= KVM_MEM_GUEST_MEMFD; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags); for (i = 0; i < NR_MEM_REGIONS; i++) vm->memslots[i] = 0; diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c new file mode 100644 index 000000000000..46a14fd63d9e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + */ + +#include <time.h> + +#include "lru_gen_util.h" + +/* + * Tracks state while we parse memcg lru_gen stats. The file we're parsing is + * structured like this (some extra whitespace elided): + * + * memcg (id) (path) + * node (id) + * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages) + */ +struct memcg_stats_parse_context { + bool consumed; /* Whether or not this line was consumed */ + /* Next parse handler to invoke */ + void (*next_handler)(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + int current_node_idx; /* Current index in nodes array */ + const char *name; /* The name of the memcg we're looking for */ +}; + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + +struct split_iterator { + char *str; + char *save; +}; + +static char *split_next(struct split_iterator *it) +{ + char *ret = strtok_r(it->str, " \t\n\r", &it->save); + + it->str = NULL; + return ret; +} + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *memcg_id = split_next(&it); + char *memcg_name = split_next(&it); + char *end; + + ctx->consumed = true; + + if (!prefix || strcmp("memcg", prefix)) + return; /* Not a memcg line (maybe empty), skip */ + + TEST_ASSERT(memcg_id && memcg_name, + "malformed memcg line; no memcg id or memcg_name"); + + if (strcmp(memcg_name + 1, ctx->name)) + return; /* Wrong memcg, skip */ + + /* Found it! */ + + stats->memcg_id = strtoul(memcg_id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id); + if (!stats->memcg_id) + return; /* Removed memcg? */ + + ctx->next_handler = memcg_stats_handle_in_memcg; +} + +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *id = split_next(&it); + long found_node_id; + char *end; + + ctx->consumed = true; + ctx->current_node_idx = -1; + + if (!prefix) + return; /* Skip empty lines */ + + if (!strcmp("memcg", prefix)) { + /* Memcg done, found next one; stop. */ + ctx->next_handler = NULL; + return; + } else if (strcmp("node", prefix)) + TEST_ASSERT(false, "found malformed line after 'memcg ...'," + "token: '%s'", prefix); + + /* At this point we know we have a node line. Parse the ID. */ + + TEST_ASSERT(id, "malformed node line; no node id"); + + found_node_id = strtol(id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed node id '%s'", id); + + ctx->current_node_idx = stats->nr_nodes++; + TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES, + "memcg has stats for too many nodes, max is %d", + MAX_NR_NODES); + stats->nodes[ctx->current_node_idx].node = found_node_id; + + ctx->next_handler = memcg_stats_handle_in_node; +} + +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + char *my_line = strdup(line); + struct split_iterator it = { .str = my_line }; + char *gen, *age, *nr_anon, *nr_file; + struct node_stats *node_stats; + struct generation_stats *gen_stats; + char *end; + + TEST_ASSERT(it.str, "failed to copy input line"); + + gen = split_next(&it); + + if (!gen) + goto out_consume; /* Skip empty lines */ + + if (!strcmp("memcg", gen) || !strcmp("node", gen)) { + /* + * Reached next memcg or node section. Don't consume, let the + * other handler deal with this. + */ + ctx->next_handler = memcg_stats_handle_in_memcg; + goto out; + } + + node_stats = &stats->nodes[ctx->current_node_idx]; + TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS, + "found too many generation lines; max is %d", + MAX_NR_GENS); + gen_stats = &node_stats->gens[node_stats->nr_gens++]; + + age = split_next(&it); + nr_anon = split_next(&it); + nr_file = split_next(&it); + + TEST_ASSERT(age && nr_anon && nr_file, + "malformed generation line; not enough tokens"); + + gen_stats->gen = (int)strtol(gen, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen); + + gen_stats->age_ms = strtol(age, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age); + + gen_stats->nr_anon = strtol(nr_anon, &end, 10); + TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'", + nr_anon); + + gen_stats->nr_file = strtol(nr_file, &end, 10); + TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file); + +out_consume: + ctx->consumed = true; +out: + free(my_line); +} + +static void print_memcg_stats(const struct memcg_stats *stats, const char *name) +{ + int node, gen; + + pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id); + for (node = 0; node < stats->nr_nodes; ++node) { + pr_debug("\tnode %d\n", stats->nodes[node].node); + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + const struct generation_stats *gstats = + &stats->nodes[node].gens[gen]; + + pr_debug("\t\tgen %d\tage_ms %ld" + "\tnr_anon %ld\tnr_file %ld\n", + gstats->gen, gstats->age_ms, gstats->nr_anon, + gstats->nr_file); + } + } +} + +/* Re-read lru_gen debugfs information for @memcg into @stats. */ +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg) +{ + FILE *f; + ssize_t read = 0; + char *line = NULL; + size_t bufsz; + struct memcg_stats_parse_context ctx = { + .next_handler = memcg_stats_handle_searching, + .name = memcg, + }; + + memset(stats, 0, sizeof(struct memcg_stats)); + + f = fopen(LRU_GEN_DEBUGFS, "r"); + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + + while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) { + ctx.consumed = false; + + do { + ctx.next_handler(stats, &ctx, line); + if (!ctx.next_handler) + break; + } while (!ctx.consumed); + } + + if (read < 0 && !feof(f)) + TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS); + + TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n" + "Did the memcg get created in the proper mount?", + memcg); + if (line) + free(line); + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); + + print_memcg_stats(stats, memcg); +} + +/* + * Find all pages tracked by lru_gen for this memcg in generation @target_gen. + * + * If @target_gen is negative, look for all generations. + */ +long lru_gen_sum_memcg_stats_for_gen(int target_gen, + const struct memcg_stats *stats) +{ + int node, gen; + long total_nr = 0; + + for (node = 0; node < stats->nr_nodes; ++node) { + const struct node_stats *node_stats = &stats->nodes[node]; + + for (gen = 0; gen < node_stats->nr_gens; ++gen) { + const struct generation_stats *gen_stats = + &node_stats->gens[gen]; + + if (target_gen >= 0 && gen_stats->gen != target_gen) + continue; + + total_nr += gen_stats->nr_anon + gen_stats->nr_file; + } + } + + return total_nr; +} + +/* Find all pages tracked by lru_gen for this memcg. */ +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats) +{ + return lru_gen_sum_memcg_stats_for_gen(-1, stats); +} + +/* + * If lru_gen aging should force page table scanning. + * + * If you want to set this to false, you will need to do eviction + * before doing extra aging passes. + */ +static const bool force_scan = true; + +static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen) +{ + FILE *f = fopen(LRU_GEN_DEBUGFS, "w"); + char *command; + size_t sz; + + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + sz = asprintf(&command, "+ %lu %d %d 1 %d\n", + memcg_id, node_id, max_gen, force_scan); + TEST_ASSERT(sz > 0, "creating aging command failed"); + + pr_debug("Running aging command: %s", command); + if (fwrite(command, sizeof(char), sz, f) < sz) { + TEST_ASSERT(false, "writing aging command %s to %s failed", + command, LRU_GEN_DEBUGFS); + } + + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); +} + +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg) +{ + int node, gen; + + pr_debug("lru_gen: invoking aging...\n"); + + /* Must read memcg stats to construct the proper aging command. */ + lru_gen_read_memcg_stats(stats, memcg); + + for (node = 0; node < stats->nr_nodes; ++node) { + int max_gen = 0; + + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + int this_gen = stats->nodes[node].gens[gen].gen; + + max_gen = max_gen > this_gen ? max_gen : this_gen; + } + + run_aging_impl(stats->memcg_id, stats->nodes[node].node, + max_gen); + } + + /* Re-read so callers get updated information */ + lru_gen_read_memcg_stats(stats, memcg); +} + +/* + * Find which generation contains at least @pages pages, assuming that + * such a generation exists. + */ +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long pages) +{ + int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1; + + for (node = 0; node < stats->nr_nodes; ++node) + for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens; + ++gen_idx) { + gen = stats->nodes[node].gens[gen_idx].gen; + max_gen = gen > max_gen ? gen : max_gen; + min_gen = gen < min_gen ? gen : min_gen; + } + + for (gen = min_gen; gen <= max_gen; ++gen) + /* See if this generation has enough pages. */ + if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages) + return gen; + + return -1; +} + +bool lru_gen_usable(void) +{ + long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK; + int lru_gen_fd, lru_gen_debug_fd; + char mglru_feature_str[8] = {}; + long mglru_features; + + lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY); + if (lru_gen_fd < 0) { + puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH); + return false; + } + if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) { + puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH); + close(lru_gen_fd); + return false; + } + close(lru_gen_fd); + + mglru_features = strtol(mglru_feature_str, NULL, 16); + if ((mglru_features & required_features) != required_features) { + printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n", + mglru_features, required_features); + printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n", + required_features); + return false; + } + + lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR); + __TEST_REQUIRE(lru_gen_debug_fd >= 0, + "lru_gen: Could not open " LRU_GEN_DEBUGFS ", " + "but lru_gen is enabled, so cannot use page_idle."); + close(lru_gen_debug_fd); + return true; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8ed0b74ae837..03eb99af9b8d 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -132,37 +132,57 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -bool thp_configured(void) +static bool test_sysfs_path(const char *path) { - int ret; struct stat statbuf; + int ret; - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + ret = stat(path, &statbuf); TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "Error in stating /sys/kernel/mm/transparent_hugepage"); + "Error in stat()ing '%s'", path); return ret == 0; } -size_t get_trans_hugepagesz(void) +bool thp_configured(void) +{ + return test_sysfs_path("/sys/kernel/mm/transparent_hugepage"); +} + +static size_t get_sysfs_val(const char *path) { size_t size; FILE *f; int ret; - TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); - - f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); - TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + f = fopen(path, "r"); + TEST_ASSERT(f, "Error opening '%s'", path); ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret > 0, "Error reading '%s'", path); + + /* Re-scan the input stream to verify the entire file was read. */ ret = fscanf(f, "%ld", &size); - TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); - fclose(f); + TEST_ASSERT(ret < 1, "Error reading '%s'", path); + fclose(f); return size; } +size_t get_trans_hugepagesz(void) +{ + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); +} + +bool is_numa_balancing_enabled(void) +{ + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) + return false; + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; +} + size_t get_def_hugetlb_pagesz(void) { char buf[64]; diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index bd5a802fa7a5..a92dc1dad085 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -639,7 +639,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sync_global_to_guest(vm, host_cpu_is_amd); sync_global_to_guest(vm, is_forced_emulation_enabled); - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { struct kvm_sev_init init = { 0 }; vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); @@ -1156,7 +1156,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index e9535ee20b7f..c3a9838f4806 100644 --- a/tools/testing/selftests/kvm/lib/x86/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -14,7 +14,8 @@ * and find the first range, but that's correct because the condition * expression would cause us to quit the loop. */ -static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region) +static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region, + uint8_t page_type, bool private) { const struct sparsebit *protected_phy_pages = region->protected_phy_pages; const vm_paddr_t gpa_base = region->region.guest_phys_addr; @@ -24,25 +25,35 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio if (!sparsebit_any_set(protected_phy_pages)) return; - sev_register_encrypted_memory(vm, region); + if (!is_sev_snp_vm(vm)) + sev_register_encrypted_memory(vm, region); sparsebit_for_each_set_range(protected_phy_pages, i, j) { const uint64_t size = (j - i + 1) * vm->page_size; const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; - sev_launch_update_data(vm, gpa_base + offset, size); + if (private) + vm_mem_set_private(vm, gpa_base + offset, size); + + if (is_sev_snp_vm(vm)) + snp_launch_update_data(vm, gpa_base + offset, + (uint64_t)addr_gpa2hva(vm, gpa_base + offset), + size, page_type); + else + sev_launch_update_data(vm, gpa_base + offset, size); + } } void sev_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } @@ -50,16 +61,24 @@ void sev_vm_init(struct kvm_vm *vm) void sev_es_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_ES_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } +void snp_vm_init(struct kvm_vm *vm) +{ + struct kvm_sev_init init = { 0 }; + + TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) { struct kvm_sev_launch_start launch_start = { @@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) - encrypt_region(vm, region); + encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false); if (policy & SEV_POLICY_ES) vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); @@ -112,6 +131,33 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } +void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy) +{ + struct kvm_sev_snp_launch_start launch_start = { + .policy = policy, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start); +} + +void snp_vm_launch_update(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + int ctr; + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true); + + vm->arch.is_pt_protected = true; +} + +void snp_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_snp_launch_finish launch_finish = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish); +} + struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu) { @@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, return vm; } -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement) { + if (is_sev_snp_vm(vm)) { + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); + + snp_vm_launch_start(vm, policy); + + snp_vm_launch_update(vm); + + snp_vm_launch_finish(vm); + + return; + } + sev_vm_launch(vm, policy); if (!measurement) diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c new file mode 100644 index 000000000000..2ac89d6c1e46 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* + * Execute a fastop() instruction, with or without forced emulation. BT bit 0 + * to set RFLAGS.CF based on whether or not the input is even or odd, so that + * instructions like ADC and SBB are deterministic. + */ +#define guest_execute_fastop_1(FEP, insn, __val, __flags) \ +({ \ + __asm__ __volatile__("bt $0, %[val]\n\t" \ + FEP insn " %[val]\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" \ + : [val]"+r"(__val), [flags]"=r"(__flags) \ + : : "cc", "memory"); \ +}) + +#define guest_test_fastop_1(insn, type_t, __val) \ +({ \ + type_t val = __val, ex_val = __val, input = __val; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_1("", insn, ex_val, ex_flags); \ + guest_execute_fastop_1(KVM_FEP, insn, val, flags); \ + \ + __GUEST_ASSERT(val == ex_val, \ + "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \ + (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)input, flags); \ +}) + +#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ +({ \ + __asm__ __volatile__("bt $0, %[output]\n\t" \ + FEP insn " %[input], %[output]\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" \ + : [output]"+r"(__output), [flags]"=r"(__flags) \ + : [input]"r"(__input) : "cc", "memory"); \ +}) + +#define guest_test_fastop_2(insn, type_t, __val1, __val2) \ +({ \ + type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \ + guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \ + (uint64_t)ex_output, insn, (uint64_t)input, \ + (uint64_t)input2, (uint64_t)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \ +}) + +#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ +({ \ + __asm__ __volatile__("bt $0, %[output]\n\t" \ + FEP insn " %%cl, %[output]\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" \ + : [output]"+r"(__output), [flags]"=r"(__flags) \ + : "c"(__shift) : "cc", "memory"); \ +}) + +#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ +({ \ + type_t output = __val2, ex_output = __val2, input = __val2; \ + uint8_t shift = __val1; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \ + guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + (uint64_t)ex_output, insn, shift, (uint64_t)input, \ + (uint64_t)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + ex_flags, insn, shift, (uint64_t)input, flags); \ +}) + +static const uint64_t vals[] = { + 0, + 1, + 2, + 4, + 7, + 0x5555555555555555, + 0xaaaaaaaaaaaaaaaa, + 0xfefefefefefefefe, + 0xffffffffffffffff, +}; + +#define guest_test_fastops(type_t, suffix) \ +do { \ + int i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(vals); i++) { \ + guest_test_fastop_1("dec" suffix, type_t, vals[i]); \ + guest_test_fastop_1("inc" suffix, type_t, vals[i]); \ + guest_test_fastop_1("neg" suffix, type_t, vals[i]); \ + guest_test_fastop_1("not" suffix, type_t, vals[i]); \ + \ + for (j = 0; j < ARRAY_SIZE(vals); j++) { \ + guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \ + } \ + } \ +} while (0) + +static void guest_code(void) +{ + guest_test_fastops(uint16_t, "w"); + guest_test_fastops(uint32_t, "l"); + guest_test_fastops(uint64_t, "q"); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4e920705681a..c863a689aa98 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -22,25 +22,6 @@ static void guest_code(void) { } -static bool smt_possible(void) -{ - char buf[16]; - FILE *f; - bool res = true; - - f = fopen("/sys/devices/system/cpu/smt/control", "r"); - if (f) { - if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { - if (!strncmp(buf, "forceoff", 8) || - !strncmp(buf, "notsupported", 12)) - res = false; - } - fclose(f); - } - - return res; -} - static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; @@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) case 0x40000004: test_val = entry->eax & (1UL << 18); - TEST_ASSERT(!!test_val == !smt_possible(), + TEST_ASSERT(!!test_val == !is_smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c new file mode 100644 index 000000000000..d88500c118eb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + */ +#include <linux/atomic.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "vmx.h" +#include "test_util.h" + +#define NR_BUS_LOCKS_PER_LEVEL 100 +#define CACHE_LINE_SIZE 64 + +/* + * To generate a bus lock, carve out a buffer that precisely occupies two cache + * lines and perform an atomic access that splits the two lines. + */ +static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE); +static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)]; + +static void guest_generate_buslocks(void) +{ + for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++) + atomic_inc(val); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_generate_buslocks(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *test_data) +{ + guest_generate_buslocks(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + int i, bus_locks = 0; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT)); + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + run = vcpu->run; + + for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) { + struct ucall uc; + + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_IO) { + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto done; + case UCALL_SYNC: + continue; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK); + + /* + * Verify the counter is actually getting incremented, e.g. that + * KVM isn't skipping the instruction. On Intel, the exit is + * trap-like, i.e. the counter should already have been + * incremented. On AMD, it's fault-like, i.e. the counter will + * be incremented when the guest re-executes the instruction. + */ + sync_global_from_guest(vm, *val); + TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel); + + bus_locks++; + } + TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks); +done: + TEST_ASSERT_EQ(i, bus_locks); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index 3fb967f40c6a..b238615196ad 100644 --- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -28,6 +28,7 @@ int kvm_fd; u64 supported_vmsa_features; bool have_sev_es; +bool have_snp; static int __sev_ioctl(int vm_fd, int cmd_id, void *data) { @@ -83,6 +84,9 @@ void test_vm_types(void) if (have_sev_es) test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + if (have_snp) + test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){}); + test_init2_invalid(0, &(struct kvm_sev_init){}, "VM type is KVM_X86_DEFAULT_VM"); if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) @@ -138,15 +142,24 @@ int main(int argc, char *argv[]) "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP); + TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)), + "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not", + kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM); + test_vm_types(); test_flags(KVM_X86_SEV_VM); if (have_sev_es) test_flags(KVM_X86_SEV_ES_VM); + if (have_snp) + test_flags(KVM_X86_SNP_VM); test_features(KVM_X86_SEV_VM, 0); if (have_sev_es) test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + if (have_snp) + test_features(KVM_X86_SNP_VM, supported_vmsa_features); return 0; } diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index d97816dc476a..77256c89bb8d 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -16,6 +16,18 @@ #define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) +static void guest_snp_code(void) +{ + uint64_t sev_msr = rdmsr(MSR_AMD64_SEV); + + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED); + + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); + vmgexit(); +} + static void guest_sev_es_code(void) { /* TODO: Check CPUID after GHCB-based hypercall support is added. */ @@ -27,7 +39,7 @@ static void guest_sev_es_code(void) * force "termination" to signal "done" via the GHCB MSR protocol. */ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); - __asm__ __volatile__("rep; vmmcall"); + vmgexit(); } static void guest_sev_code(void) @@ -62,7 +74,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) abort(); } -static void test_sync_vmsa(uint32_t policy) +static void test_sync_vmsa(uint32_t type, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -72,7 +84,7 @@ static void test_sync_vmsa(uint32_t policy) double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); + vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu); gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, MEM_REGION_TEST_DATA); hva = addr_gva2hva(vm, gva); @@ -89,7 +101,7 @@ static void test_sync_vmsa(uint32_t policy) : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); vcpu_xsave_set(vcpu, &xsave); - vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + vm_sev_launch(vm, policy, NULL); /* This page is shared, so make it decrypted. */ memset(hva, 0, 4096); @@ -108,14 +120,12 @@ static void test_sync_vmsa(uint32_t policy) kvm_vm_free(vm); } -static void test_sev(void *guest_code, uint64_t policy) +static void test_sev(void *guest_code, uint32_t type, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); /* TODO: Validate the measurement is as expected. */ @@ -124,7 +134,7 @@ static void test_sev(void *guest_code, uint64_t policy) for (;;) { vcpu_run(vcpu); - if (policy & SEV_POLICY_ES) { + if (is_sev_es_vm(vm)) { TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, "Wanted SYSTEM_EVENT, got %s", exit_reason_str(vcpu->run->exit_reason)); @@ -161,16 +171,14 @@ static void guest_shutdown_code(void) __asm__ __volatile__("ud2"); } -static void test_sev_es_shutdown(void) +static void test_sev_shutdown(uint32_t type, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint32_t type = KVM_X86_SEV_ES_VM; - vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); - vm_sev_launch(vm, SEV_POLICY_ES, NULL); + vm_sev_launch(vm, policy, NULL); vcpu_run(vcpu); TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, @@ -180,27 +188,42 @@ static void test_sev_es_shutdown(void) kvm_vm_free(vm); } -int main(int argc, char *argv[]) +static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy) { const u64 xf_mask = XFEATURE_MASK_X87_AVX; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); - - test_sev(guest_sev_code, SEV_POLICY_NO_DBG); - test_sev(guest_sev_code, 0); + if (type == KVM_X86_SNP_VM) + test_sev(guest, type, policy | SNP_POLICY_DBG); + else + test_sev(guest, type, policy | SEV_POLICY_NO_DBG); + test_sev(guest, type, policy); - if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { - test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); - test_sev(guest_sev_es_code, SEV_POLICY_ES); + if (type == KVM_X86_SEV_VM) + return; - test_sev_es_shutdown(); + test_sev_shutdown(type, policy); - if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { - test_sync_vmsa(0); - test_sync_vmsa(SEV_POLICY_NO_DBG); - } + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { + test_sync_vmsa(type, policy); + if (type == KVM_X86_SNP_VM) + test_sync_vmsa(type, policy | SNP_POLICY_DBG); + else + test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG); } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); + + test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0); + + if (kvm_cpu_has(X86_FEATURE_SEV_ES)) + test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES); + + if (kvm_cpu_has(X86_FEATURE_SEV_SNP)) + test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy()); return 0; } diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index c5241b193db8..824266982aa3 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 @@ -58,3 +59,4 @@ hugetlb_dio pkey_sighandler_tests_32 pkey_sighandler_tests_64 guard-regions +merge diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 8270895039d1..ae6f994d3add 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -98,6 +99,7 @@ TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable TEST_GEN_FILES += guard-regions +TEST_GEN_FILES += merge ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index b6cfe0a4b7df..dbbcc5eb3dce 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -112,9 +112,12 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { - if (pipe(comm_pipes->child_ready) < 0) + if (pipe(comm_pipes->child_ready) < 0) { + ksft_perror("pipe()"); return -errno; + } if (pipe(comm_pipes->parent_ready) < 0) { + ksft_perror("pipe()"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; @@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { exit(fn(mem, size, &comm_pipes)); @@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * write-faults by directly mapping pages writable. */ ret = mprotect(mem, size, PROT_READ); - ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + + ret = mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = -EINVAL; if (!ret) { - ksft_test_result_pass("No leak from parent into child\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from parent into child\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from parent into child\n"); + log_test_result(KSFT_FAIL); } close_comm_pipes: close_comm_pipes(&comm_pipes); @@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free; } if (pipe(fds) < 0) { - ksft_test_result_fail("pipe() failed\n"); + ksft_perror("pipe() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_print_msg("vmsplice() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (!before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_perror("vmsplice() failed"); + log_test_result(KSFT_FAIL); wait(&ret); goto close_pipe; } @@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, while (read(comm_pipes.child_ready[0], &buf, 1) != 1) ; if (munmap(mem, size) < 0) { - ksft_test_result_fail("munmap() failed\n"); + ksft_perror("munmap() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } write(comm_pipes.parent_ready[1], "0", 1); @@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, /* Wait until the child is done writing. */ wait(&ret); if (!WIFEXITED(ret)) { - ksft_test_result_fail("wait() failed\n"); + ksft_perror("wait() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } @@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, for (total = 0; total < transferred; total += cur) { cur = read(fds[0], new + total, transferred - total); if (cur < 0) { - ksft_test_result_fail("read() failed\n"); + ksft_perror("read() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } } if (!memcmp(old, new, transferred)) { - ksft_test_result_pass("No leak from child into parent\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from child into parent\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from child into parent\n"); + log_test_result(KSFT_FAIL); } close_pipe: close(fds[0]); @@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } fd = fileno(file); @@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); goto close_file; } /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed\n"); + log_test_result(KSFT_SKIP); goto free_tmp; } @@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) iov.iov_len = size; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) { - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed\n"); + log_test_result(KSFT_SKIP); goto queue_exit; } @@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) * if the page is mapped R/O vs. R/W). */ ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + goto unregister_buffers; + } + clear_softdirty(); - ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + ret = mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } } @@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) memset(mem, 0xff, size); sqe = io_uring_get_sqe(&ring); if (!sqe) { - ksft_test_result_fail("io_uring_get_sqe() failed\n"); + ksft_print_msg("io_uring_get_sqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); ret = io_uring_submit(&ring); if (ret < 0) { - ksft_test_result_fail("io_uring_submit() failed\n"); + ksft_print_msg("io_uring_submit() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { - ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + ksft_print_msg("io_uring_wait_cqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } if (cqe->res != size) { - ksft_test_result_fail("write_fixed failed\n"); + ksft_print_msg("write_fixed failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_cqe_seen(&ring, cqe); @@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_test_result_fail("pread() failed\n"); + ksft_print_msg("pread() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } total += cur; } /* Finally, check if we read what we expected. */ - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/W pin is reliable\n"); + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); quit_child: if (use_fork) { @@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, int ret; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + log_test_result(KSFT_SKIP); return; } tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free_tmp; } @@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, clear_softdirty(); ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret) { if (errno == EINVAL) - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_SKIP; else - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_FAIL; + ksft_perror("PIN_LONGTERM_TEST_START failed"); + log_test_result(ret); goto wait; } @@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ tmp_val = (__u64)(uintptr_t)tmp; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); - if (ret) - ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); - else - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/O pin is reliable\n"); + if (ret) { + ksft_perror("PIN_LONGTERM_TEST_READ failed"); + log_test_result(KSFT_FAIL); + } else { + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); + } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); if (ret) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_perror("PIN_LONGTERM_TEST_STOP failed"); wait: switch (test) { case RO_PIN_TEST_SHARED: write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); if (!WIFEXITED(ret)) - ksft_print_msg("[INFO] wait() failed\n"); + ksft_perror("wait() failed"); break; default: break; @@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout) mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); /* Ignore if not around on a kernel. */ if (ret && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_perror("MADV_NOHUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout) if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); if (!pagemap_is_swapped(pagemap_fd, mem)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } } @@ -775,13 +827,13 @@ munmap: static void run_with_base_page(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with base page\n", desc); + log_test_start("%s ... with base page", desc); do_run_with_base_page(fn, false); } static void run_with_base_page_swap(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + log_test_start("%s ... with swapped out base page", desc); do_run_with_base_page(fn, true); } @@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } @@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) ret = madvise(mem, thpsize, MADV_HUGEPAGE); if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + ksft_perror("MADV_HUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { - ksft_test_result_skip("Did not get a THP populated\n"); + ksft_print_msg("Did not get a THP populated\n"); + log_test_result(KSFT_SKIP); goto munmap; } memset(mem, 1, thpsize); @@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); if (ret) { - ksft_test_result_fail("MADV_DONTNEED failed\n"); + ksft_perror("MADV_DONTNEED failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = pagesize; @@ -877,13 +935,15 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mremap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } tmp = mremap(mem + mremap_size, mremap_size, mremap_size, MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); if (tmp != mremap_mem) { - ksft_test_result_fail("mremap() failed\n"); + ksft_perror("mremap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = mremap_size; @@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto munmap; } else if (!ret) { exit(0); @@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) /* Allow for sharing all pages again. */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) case THP_RUN_SINGLE_PTE_SWAPOUT: madvise(mem, size, MADV_PAGEOUT); if (!range_is_swapped(mem, size)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } break; @@ -941,56 +1005,56 @@ munmap: static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + log_test_start("%s ... with THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD, size); } static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + log_test_start("%s ... with swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE, size); } static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + log_test_start("%s ... with single PTE of THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + log_test_start("%s ... with partially shared THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } @@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; char *mem, *dummy; - ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); return; } @@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) */ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (dummy == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto munmap; } munmap(dummy, hugetlbsize); @@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } @@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } @@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before actually COW-sharing the page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); goto close_comm_pipes; } break; @@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the upper part of the THP. */ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the lower part of the THP. */ ret = madvise(mem, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { switch (test) { @@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = madvise(mem, size, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before anyone modified the COW-shared page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); close_comm_pipes: close_comm_pipes(&comm_pipes); } @@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void) for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { struct test_case const *test_case = &anon_thp_test_cases[i]; - ksft_print_msg("[RUN] %s\n", test_case->desc); + log_test_start("%s", test_case->desc); do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } @@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - ksft_test_result(!memcmp(smem, old, size), - "Other mapping not modified\n"); + if (!memcmp(smem, old, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); free(old); } @@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { char *mem, *smem, tmp; - ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + log_test_start("%s ... with shared zeropage", desc); mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) size_t mmap_size; int ret; - ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + log_test_start("%s ... with huge zeropage", desc); if (!has_huge_zeropage) { - ksft_test_result_skip("Huge zeropage not enabled\n"); + ksft_print_msg("Huge zeropage not enabled\n"); + log_test_result(KSFT_SKIP); return; } @@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } mmap_smem = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); + goto munmap; + } ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_FAIL); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_skip("fileno() failed\n"); + ksft_perror("fileno() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, hugetlbsize)) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } @@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1771,7 +1872,6 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { - int err; struct thp_settings default_settings; ksft_print_header(); @@ -1811,9 +1911,5 @@ int main(int argc, char **argv) thp_restore_settings(); } - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index eba43ead13ae..93af3d3760f9 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -8,6 +8,7 @@ #include <fcntl.h> #include <linux/limits.h> #include <linux/userfaultfd.h> +#include <linux/fs.h> #include <setjmp.h> #include <signal.h> #include <stdbool.h> @@ -1452,8 +1453,21 @@ TEST_F(guard_regions, uffd) /* Set up uffd. */ uffd = userfaultfd(0); - if (uffd == -1 && errno == EPERM) - ksft_exit_skip("No userfaultfd permissions, try running as root.\n"); + if (uffd == -1) { + switch (errno) { + case EPERM: + SKIP(return, "No userfaultfd permissions, try running as root."); + break; + case ENOSYS: + SKIP(return, "userfaultfd is not supported/not enabled."); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", + strerror(errno)); + break; + } + } + ASSERT_NE(uffd, -1); ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); @@ -2075,4 +2089,60 @@ TEST_F(guard_regions, pagemap) ASSERT_EQ(munmap(ptr, 10 * page_size), 0); } +/* + * Assert that PAGEMAP_SCAN correctly reports guard region ranges. + */ +TEST_F(guard_regions, pagemap_scan) +{ + const unsigned long page_size = self->page_size; + struct page_region pm_regs[10]; + struct pm_scan_arg pm_scan_args = { + .size = sizeof(struct pm_scan_arg), + .category_anyof_mask = PAGE_IS_GUARD, + .return_mask = PAGE_IS_GUARD, + .vec = (long)&pm_regs, + .vec_len = ARRAY_SIZE(pm_regs), + }; + int proc_fd, i; + char *ptr; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + pm_scan_args.start = (long)ptr; + pm_scan_args.end = (long)ptr + 10 * page_size; + ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0); + ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* + * Assert ioctl() returns the count of located regions, where each + * region spans every other page within the range of 10 pages. + */ + ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5); + ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 5; i++) { + long ptr_p = (long)&ptr[2 * i * page_size]; + + ASSERT_EQ(pm_regs[i].start, ptr_p); + ASSERT_EQ(pm_regs[i].end, ptr_p + page_size); + ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 21595b20bbc3..8a97ac5176a4 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem; + int result = KSFT_PASS; int ret; + if (fd < 0) { + result = KSFT_FAIL; + goto report; + } + if (ftruncate(fd, size)) { if (errno == ENOENT) { skip_test_dodgy_fs("ftruncate()"); } else { - ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + ksft_print_msg("ftruncate() failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + goto report; } return; } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) - ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize) { + ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize || shared) { + ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } /* Fault in the page such that GUP-fast can pin it directly. */ @@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno)); + ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; goto munmap; } /* FALLTHROUGH */ @@ -147,18 +163,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + result = KSFT_SKIP; break; } if (rw && shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; return; } /* * R/O pinning or pinning in a private mapping is always * expected to work. Otherwise, we expect long-term R/W pinning - * to only succeed for special fielesystems. + * to only succeed for special filesystems. */ should_work = !shared || !rw || fs_supports_writable_longterm_pinning(fs_type); @@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + result = KSFT_SKIP; break; } else if (ret && errno == EFAULT) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) + result = KSFT_FAIL; + else + result = KSFT_PASS; break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n", - strerror(errno)); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; break; } @@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */ - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) + result = KSFT_PASS; + else + result = KSFT_FAIL; break; } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); - return; + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; + goto report; } should_work = !shared || fs_supports_writable_longterm_pinning(fs_type); @@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; break; } /* @@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed (%s)\n", - strerror(errno)); + if (should_work) { + ksft_print_msg("Should have failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + } else { + result = KSFT_PASS; + } } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; } else { - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) { + result = KSFT_PASS; + } else { + ksft_print_msg("Should have worked\n"); + result = KSFT_FAIL; + } io_uring_unregister_buffers(&ring); } @@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) munmap: munmap(mem, size); +report: + log_test_result(result); } typedef void (*test_fn)(int fd, size_t size); @@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc) { int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) { - ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); fn(fd, pagesize); close(fd); @@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno)); - return; - } - - fd = fileno(file); - if (fd < 0) { - ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); + fd = -1; + } else { + fd = fileno(file); + if (fd < 0) { + ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); + } } fn(fd, pagesize); -close: - fclose(file); + + if (file) + fclose(file); } static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd; - ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + log_test_start("%s ... with local tmpfile", desc); fd = mkstemp(filename); - if (fd < 0) { - ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); if (unlink(filename)) { - ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); + close(fd); + fd = -1; } fn(fd, pagesize); -close: - close(fd); + + if (fd >= 0) + close(fd); } static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno)); - return; + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); } fn(fd, hugetlbsize); @@ -455,7 +493,7 @@ static int tests_per_test_case(void) int main(int argc, char **argv) { - int i, err; + int i; pagesize = getpagesize(); nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, @@ -469,9 +507,5 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_test_case(&test_cases[i]); - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 0b0d4ba1af27..0dd31892ff67 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -36,7 +36,7 @@ else do_umount=1 fi fi -MNT='/mnt/huge/' +MNT='/mnt/huge' function get_machine_hugepage_size() { hpz=$(grep -i hugepagesize /proc/meminfo) @@ -56,10 +56,45 @@ function cleanup() { rmdir "$CGROUP_ROOT"/a/b 2>/dev/null rmdir "$CGROUP_ROOT"/a 2>/dev/null rmdir "$CGROUP_ROOT"/test1 2>/dev/null - echo 0 >/proc/sys/vm/nr_hugepages + echo $nr_hugepgs >/proc/sys/vm/nr_hugepages set -e } +function assert_with_retry() { + local actual_path="$1" + local expected="$2" + local tolerance=$((7 * 1024 * 1024)) + local timeout=20 + local interval=1 + local start_time + local now + local elapsed + local actual + + start_time=$(date +%s) + + while true; do + actual="$(cat "$actual_path")" + + if [[ $actual -ge $(($expected - $tolerance)) ]] && + [[ $actual -le $(($expected + $tolerance)) ]]; then + return 0 + fi + + now=$(date +%s) + elapsed=$((now - start_time)) + + if [[ $elapsed -ge $timeout ]]; then + echo "actual = $((${actual%% *} / 1024 / 1024)) MB" + echo "expected = $((${expected%% *} / 1024 / 1024)) MB" + cleanup + exit 1 + fi + + sleep $interval + done +} + function assert_state() { local expected_a="$1" local expected_a_hugetlb="$2" @@ -70,58 +105,13 @@ function assert_state() { expected_b="$3" expected_b_hugetlb="$4" fi - local tolerance=$((5 * 1024 * 1024)) - - local actual_a - actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)" - if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] || - [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then - echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB - echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - - local actual_a_hugetlb - actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)" - if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] || - [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then - echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB - echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - - if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then - return - fi - - local actual_b - actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)" - if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] || - [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then - echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB - echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - local actual_b_hugetlb - actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)" - if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] || - [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then - echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB - echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB - echo fail + assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a" + assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb" - cleanup - exit 1 + if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then + assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b" + assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb" fi } @@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages. cleanup echo -echo echo Test charge, rmdir, uncharge setup echo mkdir @@ -195,7 +184,6 @@ cleanup echo done echo -echo if [[ ! $cgroup2 ]]; then echo "Test parent and child hugetlb usage" setup @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then assert_state 0 $(($size * 2)) 0 $size rmdir "$CGROUP_ROOT"/a/b - sleep 5 echo Assert memory reparent correctly. assert_state 0 $(($size * 2)) @@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then fi echo -echo echo "Test child only hugetlb usage" echo setup setup diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index ef7d911da13e..b6fabd5c27ed 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -172,12 +172,12 @@ static void test_populate_read(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "read range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_READ); ksft_test_result(!ret, "MADV_POPULATE_READ\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "read range is populated\n"); munmap(addr, SIZE); } @@ -194,12 +194,12 @@ static void test_populate_write(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "write range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "write range is populated\n"); munmap(addr, SIZE); } @@ -247,19 +247,19 @@ static void test_softdirty(void) /* Clear any softdirty bits. */ clear_softdirty(); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "cleared range is not softdirty\n"); /* Populating READ should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_READ); - ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n"); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "range is not softdirty after MADV_POPULATE_READ\n"); /* Populating WRITE should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); - ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_softdirty(addr, SIZE), - "range is softdirty\n"); + "range is softdirty after MADV_POPULATE_WRITE \n"); munmap(addr, SIZE); } diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index d53de2486080..1e9980b8993c 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -96,7 +96,7 @@ int main(void) ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); + ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c new file mode 100644 index 000000000000..c76646cdf6e6 --- /dev/null +++ b/tools/testing/selftests/mm/merge.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include "vm_util.h" + +FIXTURE(merge) +{ + unsigned int page_size; + char *carveout; + struct procmap_fd procmap; +}; + +FIXTURE_SETUP(merge) +{ + self->page_size = psize(); + /* Carve out PROT_NONE region to map over. */ + self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(self->carveout, MAP_FAILED); + /* Setup PROCMAP_QUERY interface. */ + ASSERT_EQ(open_self_procmap(&self->procmap), 0); +} + +FIXTURE_TEARDOWN(merge) +{ + ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); + ASSERT_EQ(close_procmap(&self->procmap), 0); +} + +TEST_F(merge, mprotect_unfaulted_left) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit + * merge failure due to lack of VM_ACCOUNT flag by mistake. + * + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first 5 pages read-only, splitting the VMA: + * + * RO RW + * |-----------|-----------| + * | unfaulted | unfaulted | + * |-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + /* + * Fault in the first of the last 5 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW + * |-----------|-----------| + * | unfaulted | faulted | + * |-----------|-----------| + */ + ptr[5 * page_size] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge (though for + * ~15 years we did not! :): + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the last 5 pages read-only, splitting the VMA: + * + * RW RO + * |-----------|-----------| + * | unfaulted | unfaulted | + * |-----------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + /* + * Fault in the first of the first 5 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RW RO + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + */ + ptr[0] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_both) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first and last 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | unfaulted | unfaulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the middle 3 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | faulted | unfaulted | + * |-----------|-----------|-----------| + */ + ptr[3 * page_size] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_faulted_left_unfaulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the last 3 pages read-only, splitting the VMA: + * + * RW RO + * |-----------------------|-----------| + * | unfaulted | unfaulted | + * |-----------------------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the first 6 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RW RO + * |-----------------------|-----------| + * | unfaulted | unfaulted | + * |-----------------------|-----------| + */ + ptr[0] = 'x'; + /* + * Now make the first 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | faulted | faulted | unfaulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_left_faulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first 3 pages read-only, splitting the VMA: + * + * RO RW + * |-----------|-----------------------| + * | unfaulted | unfaulted | + * |-----------|-----------------------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the last 6 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW + * |-----------|-----------------------| + * | unfaulted | faulted | + * |-----------|-----------------------| + */ + ptr[3 * page_size] = 'x'; + /* + * Now make the last 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | faulted | faulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, forked_target_vma) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + pid_t pid; + char *ptr, *ptr2; + int i; + + /* + * |-----------| + * | unfaulted | + * |-----------| + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in process. + * + * |-----------| + * | faulted | + * |-----------| + */ + ptr[0] = 'x'; + + pid = fork(); + ASSERT_NE(pid, -1); + + if (pid != 0) { + wait(NULL); + return; + } + + /* Child process below: */ + + /* Reopen for child. */ + ASSERT_EQ(close_procmap(&self->procmap), 0); + ASSERT_EQ(open_self_procmap(&self->procmap), 0); + + /* unCOWing everything does not cause the AVC to go away. */ + for (i = 0; i < 5 * page_size; i += page_size) + ptr[i] = 'x'; + + /* + * Map in adjacent VMA in child. + * + * forked + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + */ + ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size); +} + +TEST_F(merge, forked_source_vma) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + pid_t pid; + char *ptr, *ptr2; + int i; + + /* + * |-----------|------------| + * | unfaulted | <unmapped> | + * |-----------|------------| + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in process. + * + * |-----------|------------| + * | faulted | <unmapped> | + * |-----------|------------| + */ + ptr[0] = 'x'; + + pid = fork(); + ASSERT_NE(pid, -1); + + if (pid != 0) { + wait(NULL); + return; + } + + /* Child process below: */ + + /* Reopen for child. */ + ASSERT_EQ(close_procmap(&self->procmap), 0); + ASSERT_EQ(open_self_procmap(&self->procmap), 0); + + /* unCOWing everything does not cause the AVC to go away. */ + for (i = 0; i < 5 * page_size; i += page_size) + ptr[i] = 'x'; + + /* + * Map in adjacent VMA in child, ptr2 after ptr, but incompatible. + * + * forked RW RWX + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + */ + ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); + + /* + * Now mprotect forked region to RWX so it becomes the source for the + * merge to unfaulted region: + * + * forked RWX RWX + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + * + * This should NOT result in a merge, as ptr was forked. + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0); + /* Again, make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 7f0d50fa361d..3e90ff37e336 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -196,7 +196,7 @@ static void test_mlock_lock(void) ksft_exit_fail_msg("munlock(): %s\n", strerror(errno)); } - ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__); + ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__); munmap(map, 2 * page_size); } diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f..b07acc86f4f0 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,7 +34,7 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -unsigned int page_size; +unsigned long page_size; unsigned int hpage_size; const char *progname; @@ -112,7 +112,7 @@ int init_uffd(void) return 0; } -int wp_init(void *lpBaseAddress, int dwRegionSize) +int wp_init(void *lpBaseAddress, long dwRegionSize) { struct uffdio_register uffdio_register; struct uffdio_writeprotect wp; @@ -136,7 +136,7 @@ int wp_init(void *lpBaseAddress, int dwRegionSize) return 0; } -int wp_free(void *lpBaseAddress, int dwRegionSize) +int wp_free(void *lpBaseAddress, long dwRegionSize) { struct uffdio_register uffdio_register; @@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid) int userfaultfd_tests(void) { - int mem_size, vec_size, written, num_pages = 16; + long mem_size, vec_size, written, num_pages = 16; char *mem, *vec; mem_size = num_pages * page_size; @@ -213,7 +213,7 @@ int userfaultfd_tests(void) written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) - ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); @@ -995,7 +995,7 @@ int unmapped_region_tests(void) { void *start = (void *)0x10000000; int written, len = 0x00040000; - int vec_size = len / page_size; + long vec_size = len / page_size; struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); /* 1. Get written pages */ @@ -1051,7 +1051,7 @@ static void test_simple(void) int sanity_tests(void) { unsigned long long mem_size, vec_size; - int ret, fd, i, buf_size; + long ret, fd, i, buf_size; struct page_region *vec; char *mem, *fmem; struct stat sbuf; @@ -1160,7 +1160,7 @@ int sanity_tests(void) ret = stat(progname, &sbuf); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 000000000000..866ac023baf5 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * + * Copyright 2025, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "vm_util.h" + +static sigjmp_buf sigjmp_buf_env; + +static void signal_handler(int sig) +{ + siglongjmp(sigjmp_buf_env, -EFAULT); +} + +static int test_read_access(char *addr, size_t size, size_t pagesize) +{ + size_t offs; + int ret; + + if (signal(SIGSEGV, signal_handler) == SIG_ERR) + return -EINVAL; + + ret = sigsetjmp(sigjmp_buf_env, 1); + if (!ret) { + for (offs = 0; offs < size; offs += pagesize) + /* Force a read that the compiler cannot optimize out. */ + *((volatile char *)(addr + offs)); + } + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) + return -EINVAL; + + return ret; +} + +static int find_ram_target(off_t *phys_addr, + unsigned long long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file; + + /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno; + + while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue; + + if (!strstr(line, "System RAM\n")) + continue; + + start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1; + + /* Actual addresses are not exported */ + if (!start && !end) + break; + + /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1); + + if (start != (off_t)start) + break; + + /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +} + +FIXTURE(pfnmap) +{ + off_t phys_addr; + size_t pagesize; + int dev_mem_fd; + char *addr1; + size_t size1; + char *addr2; + size_t size2; +}; + +FIXTURE_SETUP(pfnmap) +{ + self->pagesize = getpagesize(); + + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + + self->dev_mem_fd = open("/dev/mem", O_RDONLY); + if (self->dev_mem_fd < 0) + SKIP(return, "Cannot open '/dev/mem'\n"); + + self->size1 = self->pagesize * 2; + self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, + self->dev_mem_fd, self->phys_addr); + if (self->addr1 == MAP_FAILED) + SKIP(return, "Cannot mmap '/dev/mem'\n"); + + /* ... and want to be able to read from them. */ + if (test_read_access(self->addr1, self->size1, self->pagesize)) + SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + + self->size2 = 0; + self->addr2 = MAP_FAILED; +} + +FIXTURE_TEARDOWN(pfnmap) +{ + if (self->addr2 != MAP_FAILED) + munmap(self->addr2, self->size2); + if (self->addr1 != MAP_FAILED) + munmap(self->addr1, self->size1); + if (self->dev_mem_fd >= 0) + close(self->dev_mem_fd); +} + +TEST_F(pfnmap, madvise_disallowed) +{ + int advices[] = { + MADV_DONTNEED, + MADV_DONTNEED_LOCKED, + MADV_FREE, + MADV_WIPEONFORK, + MADV_COLD, + MADV_PAGEOUT, + MADV_POPULATE_READ, + MADV_POPULATE_WRITE, + }; + int i; + + /* All these advices must be rejected. */ + for (i = 0; i < ARRAY_SIZE(advices); i++) { + EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(pfnmap, munmap_split) +{ + /* + * Unmap the first page. This munmap() call is not really expected to + * fail, but we might be able to trigger other internal issues. + */ + ASSERT_EQ(munmap(self->addr1, self->pagesize), 0); + + /* + * Remap the first page while the second page is still mapped. This + * makes sure that any PAT tracking on x86 will allow for mmap()'ing + * a page again while some parts of the first mmap() are still + * around. + */ + self->size2 = self->pagesize; + self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, + self->dev_mem_fd, self->phys_addr); + ASSERT_NE(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_fixed) +{ + char *ret; + + /* Reserve a destination area. */ + self->size2 = self->size1; + self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(self->addr2, MAP_FAILED); + + /* mremap() over our destination. */ + ret = mremap(self->addr1, self->size1, self->size2, + MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_shrink) +{ + char *ret; + + /* Shrinking is expected to work. */ + ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_expand) +{ + /* + * Growing is not expected to work, and getting it right would + * be challenging. So this test primarily serves as an early warning + * that something that probably should never work suddenly works. + */ + self->size2 = self->size1 + self->pagesize; + self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE); + ASSERT_EQ(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, fork) +{ + pid_t pid; + int ret; + + /* fork() a child and test if the child can access the pages. */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + EXPECT_EQ(test_read_access(self->addr1, self->size1, + self->pagesize), 0); + exit(0); + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 9aff33b10999..dddd1dd8af14 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -63,6 +63,8 @@ separated by spaces: test soft dirty page bit semantics - pagemap test pagemap_scan IOCTL +- pfnmap + tests for VM_PFNMAP handling - cow test copy-on-write semantics - thp @@ -79,6 +81,8 @@ separated by spaces: test prctl(PR_SET_MDWE, ...) - page_frag test handling of page fragment allocation and freeing +- vma_merge + test VMA merge cases behave as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -421,6 +425,8 @@ CATEGORY="madv_guard" run_test ./guard-regions # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +CATEGORY="vma_merge" run_test ./merge + if [ -x ./memfd_secret ] then (echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix @@ -468,6 +474,8 @@ fi CATEGORY="pagemap" run_test ./pagemap_ioctl +CATEGORY="pfnmap" run_test ./pfnmap + # COW tests CATEGORY="cow" run_test ./cow diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index cd5174d735be..a41bc1234b37 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s mmap %lu\n", __func__, size); + "%s mmap %lu %x\n", __func__, size, flags); if (munmap(map, size * NUM_PAGES)) ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno)); @@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s: mmap %lu\n", __func__, size); + "%s: mmap %lu %x\n", __func__, size, flags); if (shmdt(map)) ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno)); } diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index e8fd9011c2a3..c73fd5d455c8 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1231,6 +1231,182 @@ static void uffd_move_pmd_split_test(uffd_test_args_t *targs) uffd_move_pmd_handle_fault); } +static bool +uffdio_verify_results(const char *name, int ret, int error, long result) +{ + /* + * Should always return -1 with errno=EAGAIN, with corresponding + * result field updated in ioctl() args to be -EAGAIN too + * (e.g. copy.copy field for UFFDIO_COPY). + */ + if (ret != -1) { + uffd_test_fail("%s should have returned -1", name); + return false; + } + + if (error != EAGAIN) { + uffd_test_fail("%s should have errno==EAGAIN", name); + return false; + } + + if (result != -EAGAIN) { + uffd_test_fail("%s should have been updated for -EAGAIN", + name); + return false; + } + + return true; +} + +/* + * This defines a function to test one ioctl. Note that here "field" can + * be 1 or anything not -EAGAIN. With that initial value set, we can + * verify later that it should be updated by kernel (when -EAGAIN + * returned), by checking whether it is also updated to -EAGAIN. + */ +#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \ + static bool uffdio_mmap_changing_test_##name(int fd) \ + { \ + int ret; \ + struct uffdio_##name args = { \ + .field = 1, \ + }; \ + ret = ioctl(fd, ioctl_name, &args); \ + return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \ + } + +DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage) +DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy) +DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move) +DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated) +DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped) + +typedef enum { + /* We actually do not care about any state except UNINTERRUPTIBLE.. */ + THR_STATE_UNKNOWN = 0, + THR_STATE_UNINTERRUPTIBLE, +} thread_state; + +static void sleep_short(void) +{ + usleep(1000); +} + +static thread_state thread_state_get(pid_t tid) +{ + const char *header = "State:\t"; + char tmp[256], *p, c; + FILE *fp; + + snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid); + fp = fopen(tmp, "r"); + + if (!fp) + return THR_STATE_UNKNOWN; + + while (fgets(tmp, sizeof(tmp), fp)) { + p = strstr(tmp, header); + if (p) { + /* For example, "State:\tD (disk sleep)" */ + c = *(p + sizeof(header) - 1); + return c == 'D' ? + THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN; + } + } + + return THR_STATE_UNKNOWN; +} + +static void thread_state_until(pid_t tid, thread_state state) +{ + thread_state s; + + do { + s = thread_state_get(tid); + sleep_short(); + } while (s != state); +} + +static void *uffd_mmap_changing_thread(void *opaque) +{ + volatile pid_t *pid = opaque; + int ret; + + /* Unfortunately, it's only fetch-able from the thread itself.. */ + assert(*pid == 0); + *pid = syscall(SYS_gettid); + + /* Inject an event, this will hang solid until the event read */ + ret = madvise(area_dst, page_size, MADV_REMOVE); + if (ret) + err("madvise(MADV_REMOVE) failed"); + + return NULL; +} + +static void uffd_consume_message(int fd) +{ + struct uffd_msg msg = { 0 }; + + while (uffd_read_msg(fd, &msg)); +} + +static void uffd_mmap_changing_test(uffd_test_args_t *targs) +{ + /* + * This stores the real PID (which can be different from how tid is + * defined..) for the child thread, 0 means not initialized. + */ + pid_t pid = 0; + pthread_t tid; + int ret; + + if (uffd_register(uffd, area_dst, nr_pages * page_size, + true, false, false)) + err("uffd_register() failed"); + + /* Create a thread to generate the racy event */ + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + if (ret) + err("pthread_create() failed"); + + /* + * Wait until the thread setup the pid. Use volatile to make sure + * it reads from RAM not regs. + */ + while (!(volatile pid_t)pid) + sleep_short(); + + /* Wait until the thread hangs at REMOVE event */ + thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); + + if (!uffdio_mmap_changing_test_copy(uffd)) + return; + + if (!uffdio_mmap_changing_test_zeropage(uffd)) + return; + + if (!uffdio_mmap_changing_test_move(uffd)) + return; + + if (!uffdio_mmap_changing_test_poison(uffd)) + return; + + if (!uffdio_mmap_changing_test_continue(uffd)) + return; + + /* + * All succeeded above! Recycle everything. Start by reading the + * event so as to kick the thread roll again.. + */ + uffd_consume_message(uffd); + + ret = pthread_join(tid, NULL); + assert(ret == 0); + + uffd_test_pass(); +} + static int prevent_hugepages(const char **errmsg) { /* This should be done before source area is populated */ @@ -1470,6 +1646,32 @@ uffd_test_case_t uffd_tests[] = { .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_POISON, }, + { + .name = "mmap-changing", + .uffd_fn = uffd_mmap_changing_test, + /* + * There's no point running this test over all mem types as + * they share the same code paths. + * + * Choose shmem for simplicity, because (1) shmem supports + * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is + * almost always available (unlike hugetlb). Here we + * abused SHMEM for UFFDIO_MOVE, but the test we want to + * cover doesn't yet need the correct memory type.. + */ + .mem_targets = MEM_SHMEM, + /* + * Any UFFD_FEATURE_EVENT_* should work to trigger the + * race logically, but choose the simplest (REMOVE). + * + * Meanwhile, since we'll cover quite a few new ioctl()s + * (CONTINUE, POISON, MOVE), skip this test for old kernels + * by choosing all of them. + */ + .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE | + UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON | + UFFD_FEATURE_MINOR_SHMEM, + }, }; static void usage(const char *prog) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 1f92e8caceac..325de53966b6 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -7,23 +7,20 @@ # real test to check that the kernel is configured to support at least 5 # pagetable levels. -# 1 means the test failed -exitcode=1 - # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -fail() +skip() { echo "$1" - exit $exitcode + exit $ksft_skip } check_supported_x86_64() { local config="/proc/config.gz" [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" - [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" # gzip -dcfq automatically handles both compressed and plaintext input. # See man 1 gzip under '-f'. @@ -33,11 +30,9 @@ check_supported_x86_64() else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) if [[ "${pg_table_levels}" -lt 5 ]]; then - echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" - exit $ksft_skip + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then - echo "$0: CPU does not have the necessary la57 flag to support page table level 5" - exit $ksft_skip + skip "$0: CPU does not have the necessary la57 flag to support page table level 5" fi } @@ -45,24 +40,21 @@ check_supported_ppc64() { local config="/proc/config.gz" [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" - [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) if [[ "${pg_table_levels}" -lt 5 ]]; then - echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" - exit $ksft_skip + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" fi local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') if [[ "$mmu_support" != "radix" ]]; then - echo "$0: System does not use Radix MMU, required for 5-level paging" - exit $ksft_skip + skip "$0: System does not use Radix MMU, required for 5-level paging" fi local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) if [[ "${hugepages_total}" -eq 0 ]]; then - echo "$0: HugePages are not enabled, required for some tests" - exit $ksft_skip + skip "$0: HugePages are not enabled, required for some tests" fi } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index a36734fb62f3..61d7bf1f8c62 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> +#include <errno.h> #include <fcntl.h> #include <dirent.h> #include <inttypes.h> @@ -424,3 +425,64 @@ bool check_vmflag_io(void *addr) flags += flaglen; } } + +/* + * Open an fd at /proc/$pid/maps and configure procmap_out ready for + * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. + */ +int open_procmap(pid_t pid, struct procmap_fd *procmap_out) +{ + char path[256]; + int ret = 0; + + memset(procmap_out, '\0', sizeof(*procmap_out)); + sprintf(path, "/proc/%d/maps", pid); + procmap_out->query.size = sizeof(procmap_out->query); + procmap_out->fd = open(path, O_RDONLY); + if (procmap_out->fd < 0) + ret = -errno; + + return ret; +} + +/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */ +int query_procmap(struct procmap_fd *procmap) +{ + int ret = 0; + + if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1) + ret = -errno; + + return ret; +} + +/* + * Try to find the VMA at specified address, returns true if found, false if not + * found, and the test is failed if any other error occurs. + * + * On success, procmap->query is populated with the results. + */ +bool find_vma_procmap(struct procmap_fd *procmap, void *address) +{ + int err; + + procmap->query.query_flags = 0; + procmap->query.query_addr = (unsigned long)address; + err = query_procmap(procmap); + if (!err) + return true; + + if (err != -ENOENT) + ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n", + __func__, err); + return false; +} + +/* + * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error + * code otherwise. + */ +int close_procmap(struct procmap_fd *procmap) +{ + return close(procmap->fd); +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 6effafdc4d8a..adb5d294a220 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,9 +3,11 @@ #include <stdbool.h> #include <sys/mman.h> #include <err.h> +#include <stdarg.h> #include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ #include "../kselftest.h" +#include <linux/fs.h> #define BIT_ULL(nr) (1ULL << (nr)) #define PM_SOFT_DIRTY BIT_ULL(55) @@ -19,6 +21,15 @@ extern unsigned int __page_size; extern unsigned int __page_shift; +/* + * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl) + * /proc/$pid/[s]maps lookup. + */ +struct procmap_fd { + int fd; + struct procmap_query query; +}; + static inline unsigned int psize(void) { if (!__page_size) @@ -73,6 +84,36 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); bool check_vmflag_io(void *addr); +int open_procmap(pid_t pid, struct procmap_fd *procmap_out); +int query_procmap(struct procmap_fd *procmap); +bool find_vma_procmap(struct procmap_fd *procmap, void *address); +int close_procmap(struct procmap_fd *procmap); + +static inline int open_self_procmap(struct procmap_fd *procmap_out) +{ + pid_t pid = getpid(); + + return open_procmap(pid, procmap_out); +} + +/* These helpers need to be inline to match the kselftest.h idiom. */ +static char test_name[1024]; + +static inline void log_test_start(const char *name, ...) +{ + va_list args; + va_start(args, name); + + vsnprintf(test_name, sizeof(test_name), name, args); + ksft_print_msg("[RUN] %s\n", test_name); + + va_end(args); +} + +static inline void log_test_result(int result) +{ + ksft_test_result_report(result, "%s\n", test_name); +} /* * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 1c631740a730..c5e0b76ba6ac 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES) -TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud +TEST_GEN_PROGS := get_syscall_info set_syscall_info peeksiginfo vmaccess get_set_sud include ../lib.mk diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c new file mode 100644 index 000000000000..4198248ef874 --- /dev/null +++ b/tools/testing/selftests/ptrace/set_syscall_info.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2018-2025 Dmitry V. Levin <ldv@strace.io> + * All rights reserved. + * + * Check whether PTRACE_SET_SYSCALL_INFO semantics implemented in the kernel + * matches userspace expectations. + */ + +#include "../kselftest_harness.h" +#include <err.h> +#include <fcntl.h> +#include <signal.h> +#include <asm/unistd.h> +#include <linux/types.h> +#include <linux/ptrace.h> + +#if defined(_MIPS_SIM) && _MIPS_SIM == _MIPS_SIM_NABI32 +/* + * MIPS N32 is the only architecture where __kernel_ulong_t + * does not match the bitness of syscall arguments. + */ +typedef unsigned long long kernel_ulong_t; +#else +typedef __kernel_ulong_t kernel_ulong_t; +#endif + +struct si_entry { + int nr; + kernel_ulong_t args[6]; +}; +struct si_exit { + unsigned int is_error; + int rval; +}; + +static unsigned int ptrace_stop; +static pid_t tracee_pid; + +static int +kill_tracee(pid_t pid) +{ + if (!pid) + return 0; + + int saved_errno = errno; + + int rc = kill(pid, SIGKILL); + + errno = saved_errno; + return rc; +} + +static long +sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data) +{ + return syscall(__NR_ptrace, request, pid, addr, data); +} + +#define LOG_KILL_TRACEE(fmt, ...) \ + do { \ + kill_tracee(tracee_pid); \ + TH_LOG("wait #%d: " fmt, \ + ptrace_stop, ##__VA_ARGS__); \ + } while (0) + +static void +check_psi_entry(struct __test_metadata *_metadata, + const struct ptrace_syscall_info *info, + const struct si_entry *exp_entry, + const char *text) +{ + unsigned int i; + int exp_nr = exp_entry->nr; +#if defined __s390__ || defined __s390x__ + /* s390 is the only architecture that has 16-bit syscall numbers */ + exp_nr &= 0xffff; +#endif + + ASSERT_EQ(PTRACE_SYSCALL_INFO_ENTRY, info->op) { + LOG_KILL_TRACEE("%s: entry stop mismatch", text); + } + ASSERT_TRUE(info->arch) { + LOG_KILL_TRACEE("%s: entry stop mismatch", text); + } + ASSERT_TRUE(info->instruction_pointer) { + LOG_KILL_TRACEE("%s: entry stop mismatch", text); + } + ASSERT_TRUE(info->stack_pointer) { + LOG_KILL_TRACEE("%s: entry stop mismatch", text); + } + ASSERT_EQ(exp_nr, info->entry.nr) { + LOG_KILL_TRACEE("%s: syscall nr mismatch", text); + } + for (i = 0; i < ARRAY_SIZE(exp_entry->args); ++i) { + ASSERT_EQ(exp_entry->args[i], info->entry.args[i]) { + LOG_KILL_TRACEE("%s: syscall arg #%u mismatch", + text, i); + } + } +} + +static void +check_psi_exit(struct __test_metadata *_metadata, + const struct ptrace_syscall_info *info, + const struct si_exit *exp_exit, + const char *text) +{ + ASSERT_EQ(PTRACE_SYSCALL_INFO_EXIT, info->op) { + LOG_KILL_TRACEE("%s: exit stop mismatch", text); + } + ASSERT_TRUE(info->arch) { + LOG_KILL_TRACEE("%s: exit stop mismatch", text); + } + ASSERT_TRUE(info->instruction_pointer) { + LOG_KILL_TRACEE("%s: exit stop mismatch", text); + } + ASSERT_TRUE(info->stack_pointer) { + LOG_KILL_TRACEE("%s: exit stop mismatch", text); + } + ASSERT_EQ(exp_exit->is_error, info->exit.is_error) { + LOG_KILL_TRACEE("%s: exit stop mismatch", text); + } + ASSERT_EQ(exp_exit->rval, info->exit.rval) { + LOG_KILL_TRACEE("%s: exit stop mismatch", text); + } +} + +TEST(set_syscall_info) +{ + const pid_t tracer_pid = getpid(); + const kernel_ulong_t dummy[] = { + (kernel_ulong_t) 0xdad0bef0bad0fed0ULL, + (kernel_ulong_t) 0xdad1bef1bad1fed1ULL, + (kernel_ulong_t) 0xdad2bef2bad2fed2ULL, + (kernel_ulong_t) 0xdad3bef3bad3fed3ULL, + (kernel_ulong_t) 0xdad4bef4bad4fed4ULL, + (kernel_ulong_t) 0xdad5bef5bad5fed5ULL, + }; + int splice_in[2], splice_out[2]; + + ASSERT_EQ(0, pipe(splice_in)); + ASSERT_EQ(0, pipe(splice_out)); + ASSERT_EQ(sizeof(dummy), write(splice_in[1], dummy, sizeof(dummy))); + + const struct { + struct si_entry entry[2]; + struct si_exit exit[2]; + } si[] = { + /* change scno, keep non-error rval */ + { + { + { + __NR_gettid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, tracer_pid }, { 0, tracer_pid } + } + }, + + /* set scno to -1, keep error rval */ + { + { + { + __NR_chdir, + { + (uintptr_t) ".", + dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + -1, + { + (uintptr_t) ".", + dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 1, -ENOSYS }, { 1, -ENOSYS } + } + }, + + /* keep scno, change non-error rval */ + { + { + { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, tracer_pid }, { 0, tracer_pid + 1 } + } + }, + + /* change arg1, keep non-error rval */ + { + { + { + __NR_chdir, + { + (uintptr_t) "", + dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_chdir, + { + (uintptr_t) ".", + dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, 0 }, { 0, 0 } + } + }, + + /* set scno to -1, change error rval to non-error */ + { + { + { + __NR_gettid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + -1, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 1, -ENOSYS }, { 0, tracer_pid } + } + }, + + /* change scno, change non-error rval to error */ + { + { + { + __NR_chdir, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_getppid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, tracer_pid }, { 1, -EISDIR } + } + }, + + /* change scno and all args, change non-error rval */ + { + { + { + __NR_gettid, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_splice, + { + splice_in[0], 0, splice_out[1], 0, + sizeof(dummy), SPLICE_F_NONBLOCK + } + } + }, { + { 0, sizeof(dummy) }, { 0, sizeof(dummy) + 1 } + } + }, + + /* change arg1, no exit stop */ + { + { + { + __NR_exit_group, + { + dummy[0], dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + }, { + __NR_exit_group, + { + 0, dummy[1], dummy[2], + dummy[3], dummy[4], dummy[5] + } + } + }, { + { 0, 0 }, { 0, 0 } + } + }, + }; + + long rc; + unsigned int i; + + tracee_pid = fork(); + + ASSERT_LE(0, tracee_pid) { + TH_LOG("fork: %m"); + } + + if (tracee_pid == 0) { + /* get the pid before PTRACE_TRACEME */ + tracee_pid = getpid(); + ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) { + TH_LOG("PTRACE_TRACEME: %m"); + } + ASSERT_EQ(0, kill(tracee_pid, SIGSTOP)) { + /* cannot happen */ + TH_LOG("kill SIGSTOP: %m"); + } + for (i = 0; i < ARRAY_SIZE(si); ++i) { + rc = syscall(si[i].entry[0].nr, + si[i].entry[0].args[0], + si[i].entry[0].args[1], + si[i].entry[0].args[2], + si[i].entry[0].args[3], + si[i].entry[0].args[4], + si[i].entry[0].args[5]); + if (si[i].exit[1].is_error) { + if (rc != -1 || errno != -si[i].exit[1].rval) + break; + } else { + if (rc != si[i].exit[1].rval) + break; + } + } + /* + * Something went wrong, but in this state tracee + * cannot reliably issue syscalls, so just crash. + */ + *(volatile unsigned char *) (uintptr_t) i = 42; + /* unreachable */ + _exit(i + 1); + } + + for (ptrace_stop = 0; ; ++ptrace_stop) { + struct ptrace_syscall_info info = { + .op = 0xff /* invalid PTRACE_SYSCALL_INFO_* op */ + }; + const size_t size = sizeof(info); + const int expected_entry_size = + (void *) &info.entry.args[6] - (void *) &info; + const int expected_exit_size = + (void *) (&info.exit.is_error + 1) - + (void *) &info; + int status; + + ASSERT_EQ(tracee_pid, wait(&status)) { + /* cannot happen */ + LOG_KILL_TRACEE("wait: %m"); + } + if (WIFEXITED(status)) { + tracee_pid = 0; /* the tracee is no more */ + ASSERT_EQ(0, WEXITSTATUS(status)) { + LOG_KILL_TRACEE("unexpected exit status %u", + WEXITSTATUS(status)); + } + break; + } + ASSERT_FALSE(WIFSIGNALED(status)) { + tracee_pid = 0; /* the tracee is no more */ + LOG_KILL_TRACEE("unexpected signal %u", + WTERMSIG(status)); + } + ASSERT_TRUE(WIFSTOPPED(status)) { + /* cannot happen */ + LOG_KILL_TRACEE("unexpected wait status %#x", status); + } + + ASSERT_LT(ptrace_stop, ARRAY_SIZE(si) * 2) { + LOG_KILL_TRACEE("ptrace stop overflow"); + } + + switch (WSTOPSIG(status)) { + case SIGSTOP: + ASSERT_EQ(0, ptrace_stop) { + LOG_KILL_TRACEE("unexpected signal stop"); + } + ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, tracee_pid, + 0, PTRACE_O_TRACESYSGOOD)) { + LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m"); + } + break; + + case SIGTRAP | 0x80: + ASSERT_LT(0, ptrace_stop) { + LOG_KILL_TRACEE("unexpected syscall stop"); + } + ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, + tracee_pid, size, + (uintptr_t) &info))) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1: %m"); + } + if (ptrace_stop & 1) { + /* entering syscall */ + const struct si_entry *exp_entry = + &si[ptrace_stop / 2].entry[0]; + const struct si_entry *set_entry = + &si[ptrace_stop / 2].entry[1]; + + /* check ptrace_syscall_info before the changes */ + ASSERT_EQ(expected_entry_size, rc) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1" + ": entry stop mismatch"); + } + check_psi_entry(_metadata, &info, exp_entry, + "PTRACE_GET_SYSCALL_INFO #1"); + + /* apply the changes */ + info.entry.nr = set_entry->nr; + for (i = 0; i < ARRAY_SIZE(set_entry->args); ++i) + info.entry.args[i] = set_entry->args[i]; + ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO, + tracee_pid, size, + (uintptr_t) &info)) { + LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m"); + } + + /* check ptrace_syscall_info after the changes */ + memset(&info, 0, sizeof(info)); + info.op = 0xff; + ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, + tracee_pid, size, + (uintptr_t) &info))) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m"); + } + ASSERT_EQ(expected_entry_size, rc) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2" + ": entry stop mismatch"); + } + check_psi_entry(_metadata, &info, set_entry, + "PTRACE_GET_SYSCALL_INFO #2"); + } else { + /* exiting syscall */ + const struct si_exit *exp_exit = + &si[ptrace_stop / 2 - 1].exit[0]; + const struct si_exit *set_exit = + &si[ptrace_stop / 2 - 1].exit[1]; + + /* check ptrace_syscall_info before the changes */ + ASSERT_EQ(expected_exit_size, rc) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1" + ": exit stop mismatch"); + } + check_psi_exit(_metadata, &info, exp_exit, + "PTRACE_GET_SYSCALL_INFO #1"); + + /* apply the changes */ + info.exit.is_error = set_exit->is_error; + info.exit.rval = set_exit->rval; + ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO, + tracee_pid, size, + (uintptr_t) &info)) { + LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m"); + } + + /* check ptrace_syscall_info after the changes */ + memset(&info, 0, sizeof(info)); + info.op = 0xff; + ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO, + tracee_pid, size, + (uintptr_t) &info))) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2: %m"); + } + ASSERT_EQ(expected_exit_size, rc) { + LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2" + ": exit stop mismatch"); + } + check_psi_exit(_metadata, &info, set_exit, + "PTRACE_GET_SYSCALL_INFO #2"); + } + break; + + default: + LOG_KILL_TRACEE("unexpected stop signal %u", + WSTOPSIG(status)); + abort(); + } + + ASSERT_EQ(0, sys_ptrace(PTRACE_SYSCALL, tracee_pid, 0, 0)) { + LOG_KILL_TRACEE("PTRACE_SYSCALL: %m"); + } + } + + ASSERT_EQ(ptrace_stop, ARRAY_SIZE(si) * 2); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c index 0326b39a11b9..30cab5d425d2 100644 --- a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c +++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c @@ -56,7 +56,7 @@ int main(int argc, char **argv) } if (write(fd, "1\n", 2) < 0) { - perror("Can' enable power floor notifications\n"); + perror("Can't enable power floor notifications\n"); exit(1); } diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index 217c3a641c53..a40097232967 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -37,7 +37,7 @@ void workload_hint_exit(int signum) } if (write(fd, "0\n", 2) < 0) { - perror("Can' disable workload hints\n"); + perror("Can't disable workload hints\n"); exit(1); } @@ -99,7 +99,7 @@ int main(int argc, char **argv) } if (write(fd, "1\n", 2) < 0) { - perror("Can' enable workload hints\n"); + perror("Can't enable workload hints\n"); exit(1); } diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile index 860fd2311dcc..66f3831a668f 100644 --- a/tools/testing/vma/Makefile +++ b/tools/testing/vma/Makefile @@ -9,7 +9,7 @@ include ../shared/shared.mk OFILES = $(SHARED_OFILES) vma.o maple-shim.o TARGETS = vma -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma.h +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h vma: $(OFILES) $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS) diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 11f761769b5b..2be7597a2ac2 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -28,6 +28,8 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; * Directly import the VMA implementation here. Our vma_internal.h wrapper * provides userland-equivalent functionality for everything vma.c uses. */ +#include "../../../mm/vma_init.c" +#include "../../../mm/vma_exec.c" #include "../../../mm/vma.c" const struct vm_operations_struct vma_dummy_vm_ops; @@ -90,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma) return res; } +static void detach_free_vma(struct vm_area_struct *vma) +{ + vma_mark_detached(vma); + vm_area_free(vma); +} + /* Helper function to allocate a VMA and link it to the tree. */ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, unsigned long start, @@ -103,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, return NULL; if (attach_vma(mm, vma)) { - vm_area_free(vma); + detach_free_vma(vma); return NULL; } @@ -185,6 +193,15 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, vmg->__adjust_next_start = false; } +/* Helper function to set both the VMG range and its anon_vma. */ +static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start, + unsigned long end, pgoff_t pgoff, vm_flags_t flags, + struct anon_vma *anon_vma) +{ + vmg_set_range(vmg, start, end, pgoff, flags); + vmg->anon_vma = anon_vma; +} + /* * Helper function to try to merge a new VMA. * @@ -239,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi) vma_iter_set(vmi, 0); for_each_vma(*vmi, vma) { - vm_area_free(vma); + detach_free_vma(vma); count++; } @@ -265,6 +282,22 @@ static void dummy_close(struct vm_area_struct *) { } +static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma, + struct anon_vma_chain *avc, + struct anon_vma *anon_vma) +{ + vma->anon_vma = anon_vma; + INIT_LIST_HEAD(&vma->anon_vma_chain); + list_add(&avc->same_vma, &vma->anon_vma_chain); + avc->anon_vma = vma->anon_vma; +} + +static void vma_set_dummy_anon_vma(struct vm_area_struct *vma, + struct anon_vma_chain *avc) +{ + __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma); +} + static bool test_simple_merge(void) { struct vm_area_struct *vma; @@ -293,7 +326,7 @@ static bool test_simple_merge(void) ASSERT_EQ(vma->vm_pgoff, 0); ASSERT_EQ(vma->vm_flags, flags); - vm_area_free(vma); + detach_free_vma(vma); mtree_destroy(&mm.mm_mt); return true; @@ -335,7 +368,7 @@ static bool test_simple_modify(void) ASSERT_EQ(vma->vm_end, 0x1000); ASSERT_EQ(vma->vm_pgoff, 0); - vm_area_free(vma); + detach_free_vma(vma); vma_iter_clear(&vmi); vma = vma_next(&vmi); @@ -344,7 +377,7 @@ static bool test_simple_modify(void) ASSERT_EQ(vma->vm_end, 0x2000); ASSERT_EQ(vma->vm_pgoff, 1); - vm_area_free(vma); + detach_free_vma(vma); vma_iter_clear(&vmi); vma = vma_next(&vmi); @@ -353,7 +386,7 @@ static bool test_simple_modify(void) ASSERT_EQ(vma->vm_end, 0x3000); ASSERT_EQ(vma->vm_pgoff, 2); - vm_area_free(vma); + detach_free_vma(vma); mtree_destroy(&mm.mm_mt); return true; @@ -381,7 +414,7 @@ static bool test_simple_expand(void) ASSERT_EQ(vma->vm_end, 0x3000); ASSERT_EQ(vma->vm_pgoff, 0); - vm_area_free(vma); + detach_free_vma(vma); mtree_destroy(&mm.mm_mt); return true; @@ -402,7 +435,7 @@ static bool test_simple_shrink(void) ASSERT_EQ(vma->vm_end, 0x1000); ASSERT_EQ(vma->vm_pgoff, 0); - vm_area_free(vma); + detach_free_vma(vma); mtree_destroy(&mm.mm_mt); return true; @@ -593,7 +626,7 @@ static bool test_merge_new(void) ASSERT_EQ(vma->vm_pgoff, 0); ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); - vm_area_free(vma); + detach_free_vma(vma); count++; } @@ -953,6 +986,7 @@ static bool test_merge_existing(void) const struct vm_operations_struct vm_ops = { .close = dummy_close, }; + struct anon_vma_chain avc = {}; /* * Merge right case - partial span. @@ -968,10 +1002,10 @@ static bool test_merge_existing(void) vma->vm_ops = &vm_ops; /* This should have no impact. */ vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); vma_next->vm_ops = &vm_ops; /* This should have no impact. */ - vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma); vmg.middle = vma; vmg.prev = vma; - vma->anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &avc); ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); ASSERT_EQ(vma_next->vm_start, 0x3000); @@ -1001,9 +1035,9 @@ static bool test_merge_existing(void) vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags); vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); vma_next->vm_ops = &vm_ops; /* This should have no impact. */ - vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags); + vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, flags, &dummy_anon_vma); vmg.middle = vma; - vma->anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &avc); ASSERT_EQ(merge_existing(&vmg), vma_next); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); ASSERT_EQ(vma_next->vm_start, 0x2000); @@ -1030,11 +1064,10 @@ static bool test_merge_existing(void) vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); vma->vm_ops = &vm_ops; /* This should have no impact. */ - vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma); vmg.prev = vma_prev; vmg.middle = vma; - vma->anon_vma = &dummy_anon_vma; - + vma_set_dummy_anon_vma(vma, &avc); ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); ASSERT_EQ(vma_prev->vm_start, 0); @@ -1064,10 +1097,10 @@ static bool test_merge_existing(void) vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); - vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma); vmg.prev = vma_prev; vmg.middle = vma; - vma->anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &avc); ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); ASSERT_EQ(vma_prev->vm_start, 0); @@ -1094,10 +1127,10 @@ static bool test_merge_existing(void) vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); - vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma); vmg.prev = vma_prev; vmg.middle = vma; - vma->anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &avc); ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); ASSERT_EQ(vma_prev->vm_start, 0); @@ -1180,12 +1213,9 @@ static bool test_anon_vma_non_mergeable(void) .mm = &mm, .vmi = &vmi, }; - struct anon_vma_chain dummy_anon_vma_chain1 = { - .anon_vma = &dummy_anon_vma, - }; - struct anon_vma_chain dummy_anon_vma_chain2 = { - .anon_vma = &dummy_anon_vma, - }; + struct anon_vma_chain dummy_anon_vma_chain_1 = {}; + struct anon_vma_chain dummy_anon_vma_chain_2 = {}; + struct anon_vma dummy_anon_vma_2; /* * In the case of modified VMA merge, merging both left and right VMAs @@ -1209,24 +1239,11 @@ static bool test_anon_vma_non_mergeable(void) * * However, when prev is compared to next, the merge should fail. */ - - INIT_LIST_HEAD(&vma_prev->anon_vma_chain); - list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain); - ASSERT_TRUE(list_is_singular(&vma_prev->anon_vma_chain)); - vma_prev->anon_vma = &dummy_anon_vma; - ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_prev->anon_vma, vma_prev)); - - INIT_LIST_HEAD(&vma_next->anon_vma_chain); - list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain); - ASSERT_TRUE(list_is_singular(&vma_next->anon_vma_chain)); - vma_next->anon_vma = (struct anon_vma *)2; - ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_next->anon_vma, vma_next)); - - ASSERT_FALSE(is_mergeable_anon_vma(vma_prev->anon_vma, vma_next->anon_vma, NULL)); - - vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL); vmg.prev = vma_prev; vmg.middle = vma; + vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1); + __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2); ASSERT_EQ(merge_existing(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); @@ -1253,17 +1270,12 @@ static bool test_anon_vma_non_mergeable(void) vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); - INIT_LIST_HEAD(&vma_prev->anon_vma_chain); - list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain); - vma_prev->anon_vma = (struct anon_vma *)1; - - INIT_LIST_HEAD(&vma_next->anon_vma_chain); - list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain); - vma_next->anon_vma = (struct anon_vma *)2; - - vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL); vmg.prev = vma_prev; + vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1); + __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2); + vmg.anon_vma = NULL; ASSERT_EQ(merge_new(&vmg), vma_prev); ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); ASSERT_EQ(vma_prev->vm_start, 0); @@ -1363,8 +1375,8 @@ static bool test_dup_anon_vma(void) vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags); - - vma->anon_vma = &dummy_anon_vma; + vmg.anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; vmg.middle = vma; @@ -1392,7 +1404,7 @@ static bool test_dup_anon_vma(void) vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags); - vma->anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma_prev; vmg.middle = vma; @@ -1420,7 +1432,7 @@ static bool test_dup_anon_vma(void) vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, flags); vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags); - vma->anon_vma = &dummy_anon_vma; + vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain); vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); vmg.prev = vma; vmg.middle = vma; @@ -1447,6 +1459,7 @@ static bool test_vmi_prealloc_fail(void) .mm = &mm, .vmi = &vmi, }; + struct anon_vma_chain avc = {}; struct vm_area_struct *vma_prev, *vma; /* @@ -1459,9 +1472,10 @@ static bool test_vmi_prealloc_fail(void) vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); vma->anon_vma = &dummy_anon_vma; - vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, flags, &dummy_anon_vma); vmg.prev = vma_prev; vmg.middle = vma; + vma_set_dummy_anon_vma(vma, &avc); fail_prealloc = true; @@ -1661,6 +1675,7 @@ int main(void) int num_tests = 0, num_fail = 0; maple_tree_init(); + vma_state_init(); #define TEST(name) \ do { \ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 572ab2cea763..441feb21aa5a 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -56,6 +56,8 @@ extern unsigned long dac_mmap_min_addr; #define VM_PFNMAP 0x00000400 #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 +#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ +#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ #define VM_DONTEXPAND 0x00040000 #define VM_LOCKONFAULT 0x00080000 #define VM_ACCOUNT 0x00100000 @@ -70,6 +72,20 @@ extern unsigned long dac_mmap_min_addr; #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +#ifdef CONFIG_STACK_GROWSUP +#define VM_STACK VM_GROWSUP +#define VM_STACK_EARLY VM_GROWSDOWN +#else +#define VM_STACK VM_GROWSDOWN +#define VM_STACK_EARLY 0 +#endif + +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW +#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX + /* This mask represents all the VMA flag bits used by mlock */ #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) @@ -82,6 +98,10 @@ extern unsigned long dac_mmap_min_addr; #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) +#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS +#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) + #define RLIMIT_STACK 3 /* max stack size */ #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ @@ -135,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t; */ #define pr_warn_once pr_err +#define data_race(expr) expr + +#define ASSERT_EXCLUSIVE_WRITER(x) + struct kref { refcount_t refcount; }; @@ -229,12 +253,46 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access */ }; +struct vm_area_struct; + +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *file; + vm_flags_t vm_flags; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; +}; + +struct file_operations { + int (*mmap)(struct file *, struct vm_area_struct *); + int (*mmap_prepare)(struct vm_area_desc *); +}; + struct file { struct address_space *f_mapping; + const struct file_operations *f_op; }; #define VMA_LOCK_OFFSET 0x40000000 +typedef struct { unsigned long v; } freeptr_t; + struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -244,9 +302,7 @@ struct vm_area_struct { unsigned long vm_start; unsigned long vm_end; }; -#ifdef CONFIG_PER_VMA_LOCK - struct rcu_head vm_rcu; /* Used for deferred freeing. */ -#endif + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ }; struct mm_struct *vm_mm; /* The address space we belong to. */ @@ -421,6 +477,87 @@ struct vm_unmapped_area_info { unsigned long start_gap; }; +struct pagetable_move_control { + struct vm_area_struct *old; /* Source VMA. */ + struct vm_area_struct *new; /* Destination VMA. */ + unsigned long old_addr; /* Address from which the move begins. */ + unsigned long old_end; /* Exclusive address at which old range ends. */ + unsigned long new_addr; /* Address to move page tables to. */ + unsigned long len_in; /* Bytes to remap specified by user. */ + + bool need_rmap_locks; /* Do rmap locks need to be taken? */ + bool for_stack; /* Is this an early temp stack being moved? */ +}; + +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \ + struct pagetable_move_control name = { \ + .old = old_, \ + .new = new_, \ + .old_addr = old_addr_, \ + .old_end = (old_addr_) + (len_), \ + .new_addr = new_addr_, \ + .len_in = len_, \ + } + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + static inline void vma_iter_invalidate(struct vma_iterator *vmi) { mas_pause(&vmi->mas); @@ -505,31 +642,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_lock_seq = UINT_MAX; } -static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) -{ - struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct)); +struct kmem_cache { + const char *name; + size_t object_size; + struct kmem_cache_args *args; +}; - if (!vma) - return NULL; +static inline struct kmem_cache *__kmem_cache_create(const char *name, + size_t object_size, + struct kmem_cache_args *args) +{ + struct kmem_cache *ret = malloc(sizeof(struct kmem_cache)); - vma_init(vma, mm); + ret->name = name; + ret->object_size = object_size; + ret->args = args; - return vma; + return ret; } -static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) -{ - struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct)); +#define kmem_cache_create(__name, __object_size, __args, ...) \ + __kmem_cache_create((__name), (__object_size), (__args)) - if (!new) - return NULL; +static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) +{ + (void)gfpflags; - memcpy(new, orig, sizeof(*new)); - refcount_set(&new->vm_refcnt, 0); - new->vm_lock_seq = UINT_MAX; - INIT_LIST_HEAD(&new->anon_vma_chain); + return calloc(s->object_size, 1); +} - return new; +static inline void kmem_cache_free(struct kmem_cache *s, void *x) +{ + free(x); } /* @@ -696,11 +840,6 @@ static inline void mpol_put(struct mempolicy *) { } -static inline void vm_area_free(struct vm_area_struct *vma) -{ - free(vma); -} - static inline void lru_add_drain(void) { } @@ -1018,11 +1157,6 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, vma->__vm_flags &= ~flags; } -static inline int call_mmap(struct file *, struct vm_area_struct *) -{ - return 0; -} - static inline int shmem_zero_setup(struct vm_area_struct *) { return 0; @@ -1240,4 +1374,96 @@ static inline int mapping_map_writable(struct address_space *mapping) return 0; } +static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) +{ + (void)pmc; + + return 0; +} + +static inline void free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + (void)tlb; + (void)addr; + (void)end; + (void)floor; + (void)ceiling; +} + +static inline int ksm_execve(struct mm_struct *mm) +{ + (void)mm; + + return 0; +} + +static inline void ksm_exit(struct mm_struct *mm) +{ + (void)mm; +} + +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) +{ + (void)vma; + (void)reset_refcnt; +} + +static inline void vma_numab_state_init(struct vm_area_struct *vma) +{ + (void)vma; +} + +static inline void vma_numab_state_free(struct vm_area_struct *vma) +{ + (void)vma; +} + +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) +{ + (void)orig_vma; + (void)new_vma; +} + +static inline void free_anon_vma_name(struct vm_area_struct *vma) +{ + (void)vma; +} + +/* Did the driver provide valid mmap hook configuration? */ +static inline bool file_has_valid_mmap_hooks(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + return false; + + return true; +} + +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (WARN_ON_ONCE(file->f_op->mmap_prepare)) + return -EINVAL; + + return file->f_op->mmap(file, vma); +} + +static inline int __call_mmap_prepare(struct file *file, + struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + (void)vma; +} + #endif /* __MM_VMA_INTERNAL_H */ |