summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/memblock/tests/alloc_api.c22
-rw-r--r--tools/testing/memblock/tests/alloc_helpers_api.c4
-rw-r--r--tools/testing/memblock/tests/alloc_nid_api.c20
-rw-r--r--tools/testing/radix-tree/maple.c126
-rw-r--r--tools/testing/selftests/cgroup/Makefile21
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c (renamed from tools/testing/selftests/cgroup/cgroup_util.c)118
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h (renamed from tools/testing/selftests/cgroup/cgroup_util.h)13
-rw-r--r--tools/testing/selftests/cgroup/lib/libcgroup.mk19
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c100
-rw-r--r--tools/testing/selftests/damon/Makefile2
-rw-r--r--tools/testing/selftests/damon/_chk_dependency.sh52
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py9
-rw-r--r--tools/testing/selftests/damon/_debugfs_common.sh64
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c7
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c2
-rw-r--r--tools/testing/selftests/kmod/config5
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm6
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c281
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h35
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h51
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h53
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c21
-rw-r--r--tools/testing/selftests/kvm/lib/lru_gen_util.c387
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c42
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c4
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c76
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c165
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c21
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_buslock_test.c135
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c13
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c75
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile2
-rw-r--r--tools/testing/selftests/mm/cow.c340
-rw-r--r--tools/testing/selftests/mm/guard-regions.c74
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c160
-rwxr-xr-xtools/testing/selftests/mm/hugetlb_reparenting_test.sh98
-rw-r--r--tools/testing/selftests/mm/madv_populate.c18
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c2
-rw-r--r--tools/testing/selftests/mm/merge.c455
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c16
-rw-r--r--tools/testing/selftests/mm/pfnmap.c249
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh8
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c4
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c202
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh26
-rw-r--r--tools/testing/selftests/mm/vm_util.c62
-rw-r--r--tools/testing/selftests/mm/vm_util.h41
-rw-r--r--tools/testing/selftests/ptrace/Makefile2
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c519
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c2
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c4
-rw-r--r--tools/testing/vma/Makefile2
-rw-r--r--tools/testing/vma/vma.c127
-rw-r--r--tools/testing/vma/vma_internal.h286
59 files changed, 3910 insertions, 746 deletions
diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c
index 68f1a75cd72c..c55f67dd367d 100644
--- a/tools/testing/memblock/tests/alloc_api.c
+++ b/tools/testing/memblock/tests/alloc_api.c
@@ -134,7 +134,7 @@ static int alloc_top_down_before_check(void)
PREFIX_PUSH();
setup_memblock();
- memblock_reserve(memblock_end_of_DRAM() - total_size, r1_size);
+ memblock_reserve_kern(memblock_end_of_DRAM() - total_size, r1_size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -182,7 +182,7 @@ static int alloc_top_down_after_check(void)
total_size = r1.size + r2_size;
- memblock_reserve(r1.base, r1.size);
+ memblock_reserve_kern(r1.base, r1.size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -231,8 +231,8 @@ static int alloc_top_down_second_fit_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
@@ -285,8 +285,8 @@ static int alloc_in_between_generic_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
@@ -422,7 +422,7 @@ static int alloc_limited_space_generic_check(void)
setup_memblock();
/* Simulate almost-full memory */
- memblock_reserve(memblock_start_of_DRAM(), reserved_size);
+ memblock_reserve_kern(memblock_start_of_DRAM(), reserved_size);
allocated_ptr = run_memblock_alloc(available_size, SMP_CACHE_BYTES);
@@ -608,7 +608,7 @@ static int alloc_bottom_up_before_check(void)
PREFIX_PUSH();
setup_memblock();
- memblock_reserve(memblock_start_of_DRAM() + r1_size, r2_size);
+ memblock_reserve_kern(memblock_start_of_DRAM() + r1_size, r2_size);
allocated_ptr = run_memblock_alloc(r1_size, SMP_CACHE_BYTES);
@@ -655,7 +655,7 @@ static int alloc_bottom_up_after_check(void)
total_size = r1.size + r2_size;
- memblock_reserve(r1.base, r1.size);
+ memblock_reserve_kern(r1.base, r1.size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -705,8 +705,8 @@ static int alloc_bottom_up_second_fit_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c
index 3ef9486da8a0..e5362cfd2ff3 100644
--- a/tools/testing/memblock/tests/alloc_helpers_api.c
+++ b/tools/testing/memblock/tests/alloc_helpers_api.c
@@ -163,7 +163,7 @@ static int alloc_from_top_down_no_space_above_check(void)
min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
/* No space above this address */
- memblock_reserve(min_addr, r2_size);
+ memblock_reserve_kern(min_addr, r2_size);
allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
@@ -199,7 +199,7 @@ static int alloc_from_top_down_min_addr_cap_check(void)
start_addr = (phys_addr_t)memblock_start_of_DRAM();
min_addr = start_addr - SMP_CACHE_BYTES * 3;
- memblock_reserve(start_addr + r1_size, MEM_SIZE - r1_size);
+ memblock_reserve_kern(start_addr + r1_size, MEM_SIZE - r1_size);
allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
index 49bb416d34ff..562e4701b0e0 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -324,7 +324,7 @@ static int alloc_nid_min_reserved_generic_check(void)
min_addr = max_addr - r2_size;
reserved_base = min_addr - r1_size;
- memblock_reserve(reserved_base, r1_size);
+ memblock_reserve_kern(reserved_base, r1_size);
allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -374,7 +374,7 @@ static int alloc_nid_max_reserved_generic_check(void)
max_addr = memblock_end_of_DRAM() - r1_size;
min_addr = max_addr - r2_size;
- memblock_reserve(max_addr, r1_size);
+ memblock_reserve_kern(max_addr, r1_size);
allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -436,8 +436,8 @@ static int alloc_nid_top_down_reserved_with_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -499,8 +499,8 @@ static int alloc_nid_reserved_full_merge_generic_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -563,8 +563,8 @@ static int alloc_nid_top_down_reserved_no_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -909,8 +909,8 @@ static int alloc_nid_bottom_up_reserved_with_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index bc30050227fd..2c0b38301253 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -35475,15 +35475,65 @@ static void check_dfs_preorder(struct maple_tree *mt)
}
/* End of depth first search tests */
+/* get height of the lowest non-leaf node with free space */
+static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry)
+{
+ struct ma_state *mas = wr_mas->mas;
+ char vacant_height = 0;
+ enum maple_type type;
+ unsigned long *pivots;
+ unsigned long min = 0;
+ unsigned long max = ULONG_MAX;
+ unsigned char offset;
+
+ /* start traversal */
+ mas_reset(mas);
+ mas_start(mas);
+ if (!xa_is_node(mas_root(mas)))
+ return 0;
+
+ type = mte_node_type(mas->node);
+ wr_mas->type = type;
+ while (!ma_is_leaf(type)) {
+ mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max);
+ offset = mas->offset;
+ mas->end = mas_data_end(mas);
+ pivots = ma_pivots(mte_to_node(mas->node), type);
+
+ if (pivots) {
+ if (offset)
+ min = pivots[mas->offset - 1];
+ if (offset < mas->end)
+ max = pivots[mas->offset];
+ }
+ wr_mas->r_max = offset < mas->end ? pivots[offset] : mas->max;
+
+ /* detect spanning write */
+ if (mas_is_span_wr(wr_mas))
+ break;
+
+ if (mas->end < mt_slot_count(mas->node) - 1)
+ vacant_height = mas->depth + 1;
+
+ mas_descend(mas);
+ type = mte_node_type(mas->node);
+ mas->depth++;
+ }
+
+ return vacant_height;
+}
+
/* Preallocation testing */
static noinline void __init check_prealloc(struct maple_tree *mt)
{
unsigned long i, max = 100;
unsigned long allocated;
unsigned char height;
+ unsigned char vacant_height;
struct maple_node *mn;
void *ptr = check_prealloc;
MA_STATE(mas, mt, 10, 20);
+ MA_WR_STATE(wr_mas, &mas, ptr);
mt_set_non_kernel(1000);
for (i = 0; i <= max; i++)
@@ -35494,8 +35544,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
@@ -35503,8 +35554,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
@@ -35514,7 +35566,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mn->parent = ma_parent_ptr(mn);
@@ -35527,7 +35580,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
@@ -35540,7 +35594,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mas_push_node(&mas, mn);
@@ -35553,7 +35608,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -35578,7 +35634,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 2);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mt_set_non_kernel(1);
@@ -35595,8 +35652,14 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ /*
+ * vacant height cannot be used to compute the number of nodes needed
+ * as the root contains two entries which means it is on the verge of
+ * insufficiency. The worst case full height of the tree is needed.
+ */
+ MT_BUG_ON(mt, allocated != height * 3 + 1);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mas_set_range(&mas, 0, 200);
@@ -36248,6 +36311,45 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt)
extern void test_kmem_cache_bulk(void);
+static inline void check_spanning_store_height(struct maple_tree *mt)
+{
+ int index = 0;
+ MA_STATE(mas, mt, 0, 0);
+ mas_lock(&mas);
+ while (mt_height(mt) != 3) {
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ mas_set(&mas, ++index);
+ }
+ mas_set_range(&mas, 90, 140);
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ MT_BUG_ON(mt, mas_mt_height(&mas) != 2);
+ mas_unlock(&mas);
+}
+
+/*
+ * Test to check the path of a spanning rebalance which results in
+ * a collapse where the rebalancing of the child node leads to
+ * insufficieny in the parent node.
+ */
+static void check_collapsing_rebalance(struct maple_tree *mt)
+{
+ int i = 0;
+ MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
+
+ /* create a height 6 tree */
+ while (mt_height(mt) < 6) {
+ mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL);
+ i += 9;
+ }
+
+ /* delete all entries one at a time, starting from the right */
+ do {
+ mas_erase(&mas);
+ } while (mas_prev(&mas, 0) != NULL);
+
+ mtree_unlock(mt);
+}
+
/* callback function used for check_nomem_writer_race() */
static void writer2(void *maple_tree)
{
@@ -36415,6 +36517,14 @@ void farmer_tests(void)
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_spanning_store_height(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_collapsing_rebalance(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_null_expand(&tree);
mtree_destroy(&tree);
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 1b897152bab6..e01584c2189a 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -21,14 +21,15 @@ TEST_GEN_PROGS += test_zswap
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
include ../lib.mk
+include lib/libcgroup.mk
-$(OUTPUT)/test_core: cgroup_util.c
-$(OUTPUT)/test_cpu: cgroup_util.c
-$(OUTPUT)/test_cpuset: cgroup_util.c
-$(OUTPUT)/test_freezer: cgroup_util.c
-$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
-$(OUTPUT)/test_kill: cgroup_util.c
-$(OUTPUT)/test_kmem: cgroup_util.c
-$(OUTPUT)/test_memcontrol: cgroup_util.c
-$(OUTPUT)/test_pids: cgroup_util.c
-$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_core: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpu: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpuset: $(LIBCGROUP_O)
+$(OUTPUT)/test_freezer: $(LIBCGROUP_O)
+$(OUTPUT)/test_hugetlb_memcg: $(LIBCGROUP_O)
+$(OUTPUT)/test_kill: $(LIBCGROUP_O)
+$(OUTPUT)/test_kmem: $(LIBCGROUP_O)
+$(OUTPUT)/test_memcontrol: $(LIBCGROUP_O)
+$(OUTPUT)/test_pids: $(LIBCGROUP_O)
+$(OUTPUT)/test_zswap: $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 1e2d46636a0c..8832f3d1cb61 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -17,10 +17,10 @@
#include <unistd.h>
#include "cgroup_util.h"
-#include "../clone3/clone3_selftests.h"
+#include "../../clone3/clone3_selftests.h"
/* Returns read len on success, or -errno on failure. */
-static ssize_t read_text(const char *path, char *buf, size_t max_len)
+ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
int fd;
@@ -39,7 +39,7 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
}
/* Returns written len on success, or -errno on failure. */
-static ssize_t write_text(const char *path, char *buf, ssize_t len)
+ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
@@ -217,7 +217,8 @@ int cg_write_numeric(const char *cgroup, const char *control, long value)
return cg_write(cgroup, control, buf);
}
-int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+static int cg_find_root(char *root, size_t len, const char *controller,
+ bool *nsdelegate)
{
char buf[10 * PAGE_SIZE];
char *fs, *mount, *type, *options;
@@ -236,18 +237,37 @@ int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
options = strtok(NULL, delim);
strtok(NULL, delim);
strtok(NULL, delim);
-
- if (strcmp(type, "cgroup2") == 0) {
- strncpy(root, mount, len);
- if (nsdelegate)
- *nsdelegate = !!strstr(options, "nsdelegate");
- return 0;
+ if (strcmp(type, "cgroup") == 0) {
+ if (!controller || !strstr(options, controller))
+ continue;
+ } else if (strcmp(type, "cgroup2") == 0) {
+ if (controller &&
+ cg_read_strstr(mount, "cgroup.controllers", controller))
+ continue;
+ } else {
+ continue;
}
+ strncpy(root, mount, len);
+
+ if (nsdelegate)
+ *nsdelegate = !!strstr(options, "nsdelegate");
+ return 0;
+
}
return -1;
}
+int cg_find_controller_root(char *root, size_t len, const char *controller)
+{
+ return cg_find_root(root, len, controller, NULL);
+}
+
+int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+{
+ return cg_find_root(root, len, NULL, nsdelegate);
+}
+
int cg_create(const char *cgroup)
{
return mkdir(cgroup, 0755);
@@ -488,84 +508,6 @@ int cg_run_nowait(const char *cgroup,
return pid;
}
-int get_temp_fd(void)
-{
- return open(".", O_TMPFILE | O_RDWR | O_EXCL);
-}
-
-int alloc_pagecache(int fd, size_t size)
-{
- char buf[PAGE_SIZE];
- struct stat st;
- int i;
-
- if (fstat(fd, &st))
- goto cleanup;
-
- size += st.st_size;
-
- if (ftruncate(fd, size))
- goto cleanup;
-
- for (i = 0; i < size; i += sizeof(buf))
- read(fd, buf, sizeof(buf));
-
- return 0;
-
-cleanup:
- return -1;
-}
-
-int alloc_anon(const char *cgroup, void *arg)
-{
- size_t size = (unsigned long)arg;
- char *buf, *ptr;
-
- buf = malloc(size);
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
-
- free(buf);
- return 0;
-}
-
-int is_swap_enabled(void)
-{
- char buf[PAGE_SIZE];
- const char delim[] = "\n";
- int cnt = 0;
- char *line;
-
- if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
- return -1;
-
- for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
- cnt++;
-
- return cnt > 1;
-}
-
-int set_oom_adj_score(int pid, int score)
-{
- char path[PATH_MAX];
- int fd, len;
-
- sprintf(path, "/proc/%d/oom_score_adj", pid);
-
- fd = open(path, O_WRONLY | O_APPEND);
- if (fd < 0)
- return fd;
-
- len = dprintf(fd, "%d", score);
- if (len < 0) {
- close(fd);
- return len;
- }
-
- close(fd);
- return 0;
-}
-
int proc_mount_contains(const char *option)
{
char buf[4 * PAGE_SIZE];
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 19b131ee7707..adb2bc193183 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -2,9 +2,9 @@
#include <stdbool.h>
#include <stdlib.h>
-#include "../kselftest.h"
-
+#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
+#endif
#define MB(x) (x << 20)
@@ -21,6 +21,10 @@ static inline int values_close(long a, long b, int err)
return labs(a - b) <= (a + b) / 100 * err;
}
+extern ssize_t read_text(const char *path, char *buf, size_t max_len);
+extern ssize_t write_text(const char *path, char *buf, ssize_t len);
+
+extern int cg_find_controller_root(char *root, size_t len, const char *controller);
extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate);
extern char *cg_name(const char *root, const char *name);
extern char *cg_name_indexed(const char *root, const char *name, int index);
@@ -49,11 +53,6 @@ extern int cg_enter_current_thread(const char *cgroup);
extern int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
-extern int get_temp_fd(void);
-extern int alloc_pagecache(int fd, size_t size);
-extern int alloc_anon(const char *cgroup, void *arg);
-extern int is_swap_enabled(void);
-extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
int proc_mount_contains(const char *option);
diff --git a/tools/testing/selftests/cgroup/lib/libcgroup.mk b/tools/testing/selftests/cgroup/lib/libcgroup.mk
new file mode 100644
index 000000000000..7a73007204c3
--- /dev/null
+++ b/tools/testing/selftests/cgroup/lib/libcgroup.mk
@@ -0,0 +1,19 @@
+CGROUP_DIR := $(selfdir)/cgroup
+
+LIBCGROUP_C := lib/cgroup_util.c
+
+LIBCGROUP_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBCGROUP_C))
+
+LIBCGROUP_O_DIRS := $(shell dirname $(LIBCGROUP_O) | uniq)
+
+CFLAGS += -I$(CGROUP_DIR)/lib/include
+
+EXTRA_HDRS := $(selfdir)/clone3/clone3_selftests.h
+
+$(LIBCGROUP_O_DIRS):
+ mkdir -p $@
+
+$(LIBCGROUP_O): $(OUTPUT)/%.o : $(CGROUP_DIR)/%.c $(EXTRA_HDRS) $(LIBCGROUP_O_DIRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 16f5d74ae762..a680f773f2d5 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -24,6 +24,84 @@
static bool has_localevents;
static bool has_recursiveprot;
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
@@ -380,10 +458,11 @@ static bool reclaim_until(const char *memcg, long goal);
*
* Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/C memory.current ~= 29M
- * A/B/D memory.current ~= 21M
- * A/B/E memory.current ~= 0
- * A/B/F memory.current = 0
+ * A/B/C memory.current ~= 29M [memory.events:low > 0]
+ * A/B/D memory.current ~= 21M [memory.events:low > 0]
+ * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
+ * undefined otherwise]
+ * A/B/F memory.current = 0 [memory.events:low == 0]
* (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
@@ -495,10 +574,10 @@ static int test_memcg_protection(const char *root, bool min)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(29), 10))
+ if (!values_close(c[0], MB(29), 15))
goto cleanup;
- if (!values_close(c[1], MB(21), 10))
+ if (!values_close(c[1], MB(21), 20))
goto cleanup;
if (c[3] != 0)
@@ -525,7 +604,14 @@ static int test_memcg_protection(const char *root, bool min)
goto cleanup;
}
+ /*
+ * Child 2 has memory.low=0, but some low protection may still be
+ * distributed down from its parent with memory.low=50M if cgroup2
+ * memory_recursiveprot mount option is enabled. Ignore the low
+ * event count in this case.
+ */
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int ignore_low_events_index = has_recursiveprot ? 2 : -1;
int no_low_events_index = 1;
long low, oom;
@@ -534,6 +620,8 @@ static int test_memcg_protection(const char *root, bool min)
if (oom)
goto cleanup;
+ if (i == ignore_low_events_index)
+ continue;
if (i <= no_low_events_index && low <= 0)
goto cleanup;
if (i > no_low_events_index && low)
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index ecbf07afc6dd..ff21524be458 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -3,7 +3,7 @@
TEST_GEN_FILES += access_memory access_memory_even
-TEST_FILES = _chk_dependency.sh _damon_sysfs.py
+TEST_FILES = _damon_sysfs.py
# functionality tests
TEST_PROGS += sysfs.sh
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
deleted file mode 100644
index dda3a87dc00a..000000000000
--- a/tools/testing/selftests/damon/_chk_dependency.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}')
-if [ "$DBGFS" = "" ]
-then
- echo "debugfs not mounted"
- exit $ksft_skip
-fi
-
-DBGFS+="/damon"
-
-if [ $EUID -ne 0 ];
-then
- echo "Run as root"
- exit $ksft_skip
-fi
-
-if [ ! -d "$DBGFS" ]
-then
- echo "$DBGFS not found"
- exit $ksft_skip
-fi
-
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- monitor_on_file="monitor_on_DEPRECATED"
-else
- monitor_on_file="monitor_on"
-fi
-
-for f in attrs target_ids "$monitor_on_file"
-do
- if [ ! -f "$DBGFS/$f" ]
- then
- echo "$f not found"
- exit 1
- fi
-done
-
-permission_error="Operation not permitted"
-for f in attrs target_ids "$monitor_on_file"
-do
- status=$( cat "$DBGFS/$f" 2>&1 )
- if [ "${status#*$permission_error}" != "$status" ]; then
- echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?"
- exit $ksft_skip
- fi
-done
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index 6e136dc3df19..5b1cb6b3ce4e 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -15,6 +15,10 @@ if sysfs_root is None:
print('Seems sysfs not mounted?')
exit(ksft_skip)
+if not os.path.exists(sysfs_root):
+ print('Seems DAMON disabled?')
+ exit(ksft_skip)
+
def write_file(path, string):
"Returns error string if failed, or None otherwise"
string = '%s' % string
@@ -420,11 +424,16 @@ class Kdamond:
tried_regions = []
tried_regions_dir = os.path.join(
scheme.sysfs_dir(), 'tried_regions')
+ region_indices = []
for filename in os.listdir(
os.path.join(scheme.sysfs_dir(), 'tried_regions')):
tried_region_dir = os.path.join(tried_regions_dir, filename)
if not os.path.isdir(tried_region_dir):
continue
+ region_indices.append(int(filename))
+ for region_idx in sorted(region_indices):
+ tried_region_dir = os.path.join(tried_regions_dir,
+ '%d' % region_idx)
region_values = []
for f in ['start', 'end', 'nr_accesses', 'age']:
content, err = read_file(
diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh
deleted file mode 100644
index 54d45791b0d9..000000000000
--- a/tools/testing/selftests/damon/_debugfs_common.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-test_write_result() {
- file=$1
- content=$2
- orig_content=$3
- expect_reason=$4
- expected=$5
-
- if [ "$expected" = "0" ]
- then
- echo "$content" > "$file"
- else
- echo "$content" > "$file" 2> /dev/null
- fi
- if [ $? -ne "$expected" ]
- then
- echo "writing $content to $file doesn't return $expected"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-test_write_succ() {
- test_write_result "$1" "$2" "$3" "$4" 0
-}
-
-test_write_fail() {
- test_write_result "$1" "$2" "$3" "$4" 1
-}
-
-test_content() {
- file=$1
- orig_content=$2
- expected=$3
- expect_reason=$4
-
- content=$(cat "$file")
- if [ "$content" != "$expected" ]
- then
- echo "reading $file expected $expected but $content"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-source ./_chk_dependency.sh
-
-damon_onoff="$DBGFS/monitor_on"
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- damon_onoff="$DBGFS/monitor_on_DEPRECATED"
-else
- damon_onoff="$DBGFS/monitor_on"
-fi
-
-if [ $(cat "$damon_onoff") = "on" ]
-then
- echo "monitoring is on"
- exit $ksft_skip
-fi
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
index e8e0ef1460d2..73e0a4d4fb2f 100644
--- a/tools/testing/selftests/filesystems/anon_inode_test.c
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -7,7 +7,7 @@
#include <sys/stat.h>
#include "../kselftest_harness.h"
-#include "overlayfs/wrappers.h"
+#include "wrappers.h"
TEST(anon_inode_no_chown)
{
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
index 85acb4e3ef00..72d51ad0ee0e 100644
--- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -50,7 +50,7 @@ TEST(eventfd_check_flag_rdwr)
ASSERT_GE(fd, 0);
flags = fcntl(fd, F_GETFL);
- // since the kernel automatically added O_RDWR.
+ // The kernel automatically adds the O_RDWR flag.
EXPECT_EQ(flags, O_RDWR);
close(fd);
@@ -85,7 +85,7 @@ TEST(eventfd_check_flag_nonblock)
close(fd);
}
-TEST(eventfd_chek_flag_cloexec_and_nonblock)
+TEST(eventfd_check_flag_cloexec_and_nonblock)
{
int fd, flags;
@@ -178,8 +178,7 @@ TEST(eventfd_check_flag_semaphore)
// The semaphore could only be obtained from fdinfo.
ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");
if (ret != 0)
- ksft_print_msg("eventfd-semaphore check failed, msg: %s\n",
- err.msg);
+ ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg);
EXPECT_EQ(ret, 0);
close(fd);
diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c
index 1136f93a9977..01dd89f8e52f 100644
--- a/tools/testing/selftests/filesystems/file_stressor.c
+++ b/tools/testing/selftests/filesystems/file_stressor.c
@@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2)
ssize_t nr_read;
/*
- * Concurrently read /proc/<pid>/fd/ which rougly does:
+ * Concurrently read /proc/<pid>/fd/ which roughly does:
*
* f = fget_task_next(p, &fd);
* if (!f)
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
index 259f4fd6b5e2..1f1e63494af9 100644
--- a/tools/testing/selftests/kmod/config
+++ b/tools/testing/selftests/kmod/config
@@ -1,7 +1,2 @@
CONFIG_TEST_KMOD=m
CONFIG_TEST_LKM=m
-CONFIG_XFS_FS=m
-
-# For the module parameter force_init_test is used
-CONFIG_TUN=m
-CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 3e786080473d..38b95998e1e6 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -8,6 +8,7 @@ LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
+LIBKVM += lib/lru_gen_util.c
LIBKVM += lib/memstress.c
LIBKVM += lib/guest_sprintf.c
LIBKVM += lib/rbtree.c
@@ -70,6 +71,7 @@ TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
@@ -82,6 +84,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
@@ -222,6 +225,7 @@ OVERRIDE_TARGETS = 1
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
# which causes the environment variable to override the makefile).
include ../lib.mk
+include ../cgroup/lib/libcgroup.mk
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
@@ -275,7 +279,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O)
SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 447e619cf856..da7196fd1b23 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -7,9 +7,11 @@
* This test measures the performance effects of KVM's access tracking.
* Access tracking is driven by the MMU notifiers test_young, clear_young, and
* clear_flush_young. These notifiers do not have a direct userspace API,
- * however the clear_young notifier can be triggered by marking a pages as idle
- * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
- * enable access tracking on guest memory.
+ * however the clear_young notifier can be triggered either by
+ * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR
+ * 2. adding a new MGLRU generation using the lru_gen debugfs file.
+ * This test leverages page_idle to enable access tracking on guest memory
+ * unless MGLRU is enabled, in which case MGLRU is used.
*
* To measure performance this test runs a VM with a configurable number of
* vCPUs that each touch every page in disjoint regions of memory. Performance
@@ -17,10 +19,11 @@
* predefined region.
*
* Note that a deterministic correctness test of access tracking is not possible
- * by using page_idle as it exists today. This is for a few reasons:
+ * by using page_idle or MGLRU aging as it exists today. This is for a few
+ * reasons:
*
- * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
- * means subsequent guest accesses are not guaranteed to see page table
+ * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush.
+ * This means subsequent guest accesses are not guaranteed to see page table
* updates made by KVM until some time in the future.
*
* 2. page_idle only operates on LRU pages. Newly allocated pages are not
@@ -48,9 +51,17 @@
#include "guest_modes.h"
#include "processor.h"
+#include "cgroup_util.h"
+#include "lru_gen_util.h"
+
+static const char *TEST_MEMCG_NAME = "access_tracking_perf_test";
+
/* Global variable used to synchronize all of the vCPU threads. */
static int iteration;
+/* The cgroup memory controller root. Needed for lru_gen-based aging. */
+char cgroup_root[PATH_MAX];
+
/* Defines what vCPU threads should do during a given iteration. */
static enum {
/* Run the vCPU to access all its memory. */
@@ -65,6 +76,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
/* Whether to overlap the regions of memory vCPUs access. */
static bool overlap_memory_access;
+/*
+ * If the test should only warn if there are too many idle pages (i.e., it is
+ * expected).
+ * -1: Not yet set.
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
+ * 1: Having too many idle pages is expected, so merely print a warning if
+ * too many idle pages are found.
+ */
+static int idle_pages_warn_only = -1;
+
+/* Whether or not to use MGLRU instead of page_idle for access tracking */
+static bool use_lru_gen;
+
+/* Total number of pages to expect in the memcg after touching everything */
+static long test_pages;
+
+/* Last generation we found the pages in */
+static int lru_gen_last_gen = -1;
+
struct test_params {
/* The backing source for the region of memory. */
enum vm_mem_backing_src_type backing_src;
@@ -123,8 +153,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn)
"Set page_idle bits for PFN 0x%" PRIx64, pfn);
}
-static void mark_vcpu_memory_idle(struct kvm_vm *vm,
- struct memstress_vcpu_args *vcpu_args)
+static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx)
+{
+ char prefix[18] = {};
+
+ if (vcpu_idx >= 0)
+ snprintf(prefix, 18, "vCPU%d: ", vcpu_idx);
+
+ TEST_ASSERT(idle_pages_warn_only,
+ "%sToo many pages still idle (%lu out of %lu)",
+ prefix, idle_pages, total_pages);
+
+ printf("WARNING: %sToo many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ prefix, idle_pages, total_pages);
+}
+
+static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
{
int vcpu_idx = vcpu_args->vcpu_idx;
uint64_t base_gva = vcpu_args->gva;
@@ -177,27 +223,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
* arbitrary; high enough that we ensure most memory access went through
* access tracking but low enough as to not make the test too brittle
* over time and across architectures.
- *
- * When running the guest as a nested VM, "warn" instead of asserting
- * as the TLB size is effectively unlimited and the KVM doesn't
- * explicitly flush the TLB when aging SPTEs. As a result, more pages
- * are cached and the guest won't see the "idle" bit cleared.
*/
- if (still_idle >= pages / 10) {
-#ifdef __x86_64__
- TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
- "vCPU%d: Too many pages still idle (%lu out of %lu)",
- vcpu_idx, still_idle, pages);
-#endif
- printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
- "this will affect performance results.\n",
- vcpu_idx, still_idle, pages);
- }
+ if (still_idle >= pages / 10)
+ too_many_idle_pages(still_idle, pages,
+ overlap_memory_access ? -1 : vcpu_idx);
close(page_idle_fd);
close(pagemap_fd);
}
+int find_generation(struct memcg_stats *stats, long total_pages)
+{
+ /*
+ * For finding the generation that contains our pages, use the same
+ * 90% threshold that page_idle uses.
+ */
+ int gen = lru_gen_find_generation(stats, total_pages * 9 / 10);
+
+ if (gen >= 0)
+ return gen;
+
+ if (!idle_pages_warn_only) {
+ TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).",
+ total_pages * 9 / 10);
+ return gen;
+ }
+
+ /*
+ * We couldn't find a generation with 90% of guest memory, which can
+ * happen if access tracking is unreliable. Simply look for a majority
+ * of pages.
+ */
+ puts("WARNING: Couldn't find a generation with 90% of guest memory. "
+ "Performance results may not be accurate.");
+ gen = lru_gen_find_generation(stats, total_pages / 2);
+ TEST_ASSERT(gen >= 0,
+ "Could not find a generation with 50%% of guest memory (%ld pages).",
+ total_pages / 2);
+ return gen;
+}
+
+static void lru_gen_mark_memory_idle(struct kvm_vm *vm)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ struct memcg_stats stats;
+ int new_gen;
+
+ /* Make a new generation */
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ ts_elapsed = timespec_elapsed(ts_start);
+
+ /* Check the generation again */
+ new_gen = find_generation(&stats, test_pages);
+
+ /*
+ * This function should only be invoked with newly-accessed pages,
+ * so pages should always move to a newer generation.
+ */
+ if (new_gen <= lru_gen_last_gen) {
+ /* We did not move to a newer generation. */
+ long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen,
+ &stats);
+
+ too_many_idle_pages(min_t(long, idle_pages, test_pages),
+ test_pages, -1);
+ }
+ pr_info("%-30s: %ld.%09lds\n",
+ "Mark memory idle (lru_gen)", ts_elapsed.tv_sec,
+ ts_elapsed.tv_nsec);
+ lru_gen_last_gen = new_gen;
+}
+
static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
{
struct ucall uc;
@@ -237,7 +335,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
assert_ucall(vcpu, UCALL_SYNC);
break;
case ITERATION_MARK_IDLE:
- mark_vcpu_memory_idle(vm, vcpu_args);
+ pageidle_mark_vcpu_memory_idle(vm, vcpu_args);
break;
}
@@ -289,15 +387,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus,
static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
{
+ if (use_lru_gen)
+ return lru_gen_mark_memory_idle(vm);
+
/*
* Even though this parallelizes the work across vCPUs, this is still a
* very slow operation because page_idle forces the test to mark one pfn
- * at a time and the clear_young notifier serializes on the KVM MMU
+ * at a time and the clear_young notifier may serialize on the KVM MMU
* lock.
*/
pr_debug("Marking VM memory idle (slow)...\n");
iteration_work = ITERATION_MARK_IDLE;
- run_iteration(vm, nr_vcpus, "Mark memory idle");
+ run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)");
}
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -309,11 +410,38 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);
+ /*
+ * If guest_page_size is larger than the host's page size, the
+ * guest (memstress) will only fault in a subset of the host's pages.
+ */
+ test_pages = params->nr_vcpus * params->vcpu_memory_bytes /
+ max(memstress_args.guest_page_size,
+ (uint64_t)getpagesize());
+
memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
pr_info("\n");
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+ if (use_lru_gen) {
+ struct memcg_stats stats;
+
+ /*
+ * Do a page table scan now. Following initial population, aging
+ * may not cause the pages to move to a newer generation. Do
+ * an aging pass now so that future aging passes always move
+ * pages to a newer generation.
+ */
+ printf("Initial aging pass (lru_gen)\n");
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages,
+ "Not all pages accounted for (looking for %ld). "
+ "Was the memcg set up correctly?", test_pages);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory");
+ lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME);
+ lru_gen_last_gen = find_generation(&stats, test_pages);
+ }
+
/* As a control, read and write to the populated memory first. */
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
@@ -328,6 +456,37 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memstress_destroy_vm(vm);
}
+static int access_tracking_unreliable(void)
+{
+#ifdef __x86_64__
+ /*
+ * When running nested, the TLB size may be effectively unlimited (for
+ * example, this is the case when running on KVM L0), and KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ puts("Skipping idle page count sanity check, because the test is run nested");
+ return 1;
+ }
+#endif
+ /*
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
+ * NUMA faults, dropping the Accessed bits.
+ */
+ if (is_numa_balancing_enabled()) {
+ puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
+ return 1;
+ }
+ return 0;
+}
+
+static int run_test_for_each_guest_mode(const char *cgroup, void *arg)
+{
+ for_each_guest_mode(run_test, arg);
+ return 0;
+}
+
static void help(char *name)
{
puts("");
@@ -342,11 +501,22 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
+ printf(" -w: Control whether the test warns or fails if more than 10%%\n"
+ " of pages are still seen as idle/old after accessing guest\n"
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
+ " mode, the test fails by default, but switches to warn only\n"
+ " if NUMA balancing is enabled or the test detects it's running\n"
+ " in a VM.\n");
backing_src_help("-s");
puts("");
exit(0);
}
+void destroy_cgroup(char *cg)
+{
+ printf("Destroying cgroup: %s\n", cg);
+}
+
int main(int argc, char *argv[])
{
struct test_params params = {
@@ -354,12 +524,13 @@ int main(int argc, char *argv[])
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.nr_vcpus = 1,
};
+ char *new_cg = NULL;
int page_idle_fd;
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -376,6 +547,11 @@ int main(int argc, char *argv[])
case 's':
params.backing_src = parse_backing_src_type(optarg);
break;
+ case 'w':
+ idle_pages_warn_only =
+ atoi_non_negative("Idle pages warning",
+ optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -383,12 +559,53 @@ int main(int argc, char *argv[])
}
}
- page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- __TEST_REQUIRE(page_idle_fd >= 0,
- "CONFIG_IDLE_PAGE_TRACKING is not enabled");
- close(page_idle_fd);
+ if (idle_pages_warn_only == -1)
+ idle_pages_warn_only = access_tracking_unreliable();
+
+ if (lru_gen_usable()) {
+ bool cg_created = true;
+ int ret;
- for_each_guest_mode(run_test, &params);
+ puts("Using lru_gen for aging");
+ use_lru_gen = true;
+
+ if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory"))
+ ksft_exit_skip("Cannot find memory cgroup controller\n");
+
+ new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME);
+ printf("Creating cgroup: %s\n", new_cg);
+ if (cg_create(new_cg)) {
+ if (errno == EEXIST) {
+ printf("Found existing cgroup");
+ cg_created = false;
+ } else {
+ ksft_exit_skip("could not create new cgroup: %s\n", new_cg);
+ }
+ }
+
+ /*
+ * This will fork off a new process to run the test within
+ * a new memcg, so we need to properly propagate the return
+ * value up.
+ */
+ ret = cg_run(new_cg, &run_test_for_each_guest_mode, &params);
+ if (cg_created)
+ cg_destroy(new_cg);
+ if (ret < 0)
+ TEST_FAIL("child did not spawn or was abnormally killed");
+ if (ret)
+ return ret;
+ } else {
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ __TEST_REQUIRE(page_idle_fd >= 0,
+ "Couldn't open /sys/kernel/mm/page_idle/bitmap. "
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
+
+ close(page_idle_fd);
+
+ puts("Using page_idle for aging");
+ run_test_for_each_guest_mode(NULL, &params);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 93013564428b..bee65ca08721 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -555,6 +555,41 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
+static inline bool read_smt_control(char *buf, size_t buf_size)
+{
+ FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ bool ret;
+
+ if (!f)
+ return false;
+
+ ret = fread(buf, sizeof(*buf), buf_size, f) > 0;
+ fclose(f);
+
+ return ret;
+}
+
+static inline bool is_smt_possible(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) &&
+ (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12)))
+ return false;
+
+ return true;
+}
+
+static inline bool is_smt_on(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2))
+ return true;
+
+ return false;
+}
+
void vm_create_irqchip(struct kvm_vm *vm);
static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
new file mode 100644
index 000000000000..d32ff5d8ffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output.
+ *
+ * Copyright (C) 2025, Google LLC.
+ */
+#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H
+#define SELFTEST_KVM_LRU_GEN_UTIL_H
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "test_util.h"
+
+#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
+#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+
+#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
+#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
+#define LRU_GEN_ENABLED 1
+#define LRU_GEN_MM_WALK 2
+
+struct generation_stats {
+ int gen;
+ long age_ms;
+ long nr_anon;
+ long nr_file;
+};
+
+struct node_stats {
+ int node;
+ int nr_gens; /* Number of populated gens entries. */
+ struct generation_stats gens[MAX_NR_GENS];
+};
+
+struct memcg_stats {
+ unsigned long memcg_id;
+ int nr_nodes; /* Number of populated nodes entries. */
+ struct node_stats nodes[MAX_NR_NODES];
+};
+
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg);
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats);
+long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats);
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg);
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long total_pages);
+bool lru_gen_usable(void);
+
+#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 77d13d7920cb..c6ef895fbd9a 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
+bool is_numa_balancing_enabled(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 32ab6ca7ec32..b11b5a53ebd5 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -203,6 +203,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..008b4169f5e2 100644
--- a/tools/testing/selftests/kvm/include/x86/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -25,19 +25,51 @@ enum sev_guest_state {
#define SEV_POLICY_NO_DBG (1UL << 0)
#define SEV_POLICY_ES (1UL << 2)
+#define SNP_POLICY_SMT (1ULL << 16)
+#define SNP_POLICY_RSVD_MBO (1ULL << 17)
+#define SNP_POLICY_DBG (1ULL << 19)
+
#define GHCB_MSR_TERM_REQ 0x100
+static inline bool is_sev_snp_vm(struct kvm_vm *vm)
+{
+ return vm->type == KVM_X86_SNP_VM;
+}
+
+static inline bool is_sev_es_vm(struct kvm_vm *vm)
+{
+ return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM;
+}
+
+static inline bool is_sev_vm(struct kvm_vm *vm)
+{
+ return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM;
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy);
+void snp_vm_launch_update(struct kvm_vm *vm);
+void snp_vm_launch_finish(struct kvm_vm *vm);
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
/*
+ * A SEV-SNP VM requires the policy reserved bit to always be set.
+ * The SMT policy bit is also required to be set based on SMT being
+ * available and active on the system.
+ */
+static inline u64 snp_default_policy(void)
+{
+ return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0);
+}
+
+/*
* The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
* instead of a proper struct. The size of the parameter is embedded in the
* ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
@@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
void sev_vm_init(struct kvm_vm *vm);
void sev_es_vm_init(struct kvm_vm *vm);
+void snp_vm_init(struct kvm_vm *vm);
+
+static inline void vmgexit(void)
+{
+ __asm__ __volatile__("rep; vmmcall");
+}
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
@@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
}
+static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t hva, uint64_t size, uint8_t type)
+{
+ struct kvm_sev_snp_launch_update update_data = {
+ .uaddr = hva,
+ .gfn_start = gpa >> PAGE_SHIFT,
+ .len = size,
+ .type = type,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
+}
+
#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5649cf2f40e8..a055343a7bf7 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -447,6 +447,15 @@ void kvm_set_files_rlimit(uint32_t nr_vcpus)
}
+static bool is_guest_memfd_required(struct vm_shape shape)
+{
+#ifdef __x86_64__
+ return shape.type == KVM_X86_SNP_VM;
+#else
+ return false;
+#endif
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -454,7 +463,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
nr_extra_pages);
struct userspace_mem_region *slot0;
struct kvm_vm *vm;
- int i;
+ int i, flags;
kvm_set_files_rlimit(nr_runnable_vcpus);
@@ -463,7 +472,15 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
vm = ____vm_create(shape);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ /*
+ * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
+ * for CoCo VMs that require GUEST_MEMFD backed private memory.
+ */
+ flags = 0;
+ if (is_guest_memfd_required(shape))
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
for (i = 0; i < NR_MEM_REGIONS; i++)
vm->memslots[i] = 0;
diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c
new file mode 100644
index 000000000000..46a14fd63d9e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ */
+
+#include <time.h>
+
+#include "lru_gen_util.h"
+
+/*
+ * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
+ * structured like this (some extra whitespace elided):
+ *
+ * memcg (id) (path)
+ * node (id)
+ * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
+ */
+struct memcg_stats_parse_context {
+ bool consumed; /* Whether or not this line was consumed */
+ /* Next parse handler to invoke */
+ void (*next_handler)(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+ int current_node_idx; /* Current index in nodes array */
+ const char *name; /* The name of the memcg we're looking for */
+};
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+
+struct split_iterator {
+ char *str;
+ char *save;
+};
+
+static char *split_next(struct split_iterator *it)
+{
+ char *ret = strtok_r(it->str, " \t\n\r", &it->save);
+
+ it->str = NULL;
+ return ret;
+}
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *memcg_id = split_next(&it);
+ char *memcg_name = split_next(&it);
+ char *end;
+
+ ctx->consumed = true;
+
+ if (!prefix || strcmp("memcg", prefix))
+ return; /* Not a memcg line (maybe empty), skip */
+
+ TEST_ASSERT(memcg_id && memcg_name,
+ "malformed memcg line; no memcg id or memcg_name");
+
+ if (strcmp(memcg_name + 1, ctx->name))
+ return; /* Wrong memcg, skip */
+
+ /* Found it! */
+
+ stats->memcg_id = strtoul(memcg_id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
+ if (!stats->memcg_id)
+ return; /* Removed memcg? */
+
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+}
+
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *id = split_next(&it);
+ long found_node_id;
+ char *end;
+
+ ctx->consumed = true;
+ ctx->current_node_idx = -1;
+
+ if (!prefix)
+ return; /* Skip empty lines */
+
+ if (!strcmp("memcg", prefix)) {
+ /* Memcg done, found next one; stop. */
+ ctx->next_handler = NULL;
+ return;
+ } else if (strcmp("node", prefix))
+ TEST_ASSERT(false, "found malformed line after 'memcg ...',"
+ "token: '%s'", prefix);
+
+ /* At this point we know we have a node line. Parse the ID. */
+
+ TEST_ASSERT(id, "malformed node line; no node id");
+
+ found_node_id = strtol(id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
+
+ ctx->current_node_idx = stats->nr_nodes++;
+ TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
+ "memcg has stats for too many nodes, max is %d",
+ MAX_NR_NODES);
+ stats->nodes[ctx->current_node_idx].node = found_node_id;
+
+ ctx->next_handler = memcg_stats_handle_in_node;
+}
+
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ char *my_line = strdup(line);
+ struct split_iterator it = { .str = my_line };
+ char *gen, *age, *nr_anon, *nr_file;
+ struct node_stats *node_stats;
+ struct generation_stats *gen_stats;
+ char *end;
+
+ TEST_ASSERT(it.str, "failed to copy input line");
+
+ gen = split_next(&it);
+
+ if (!gen)
+ goto out_consume; /* Skip empty lines */
+
+ if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
+ /*
+ * Reached next memcg or node section. Don't consume, let the
+ * other handler deal with this.
+ */
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+ goto out;
+ }
+
+ node_stats = &stats->nodes[ctx->current_node_idx];
+ TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
+ "found too many generation lines; max is %d",
+ MAX_NR_GENS);
+ gen_stats = &node_stats->gens[node_stats->nr_gens++];
+
+ age = split_next(&it);
+ nr_anon = split_next(&it);
+ nr_file = split_next(&it);
+
+ TEST_ASSERT(age && nr_anon && nr_file,
+ "malformed generation line; not enough tokens");
+
+ gen_stats->gen = (int)strtol(gen, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
+
+ gen_stats->age_ms = strtol(age, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
+
+ gen_stats->nr_anon = strtol(nr_anon, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
+ nr_anon);
+
+ gen_stats->nr_file = strtol(nr_file, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
+
+out_consume:
+ ctx->consumed = true;
+out:
+ free(my_line);
+}
+
+static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
+{
+ int node, gen;
+
+ pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ pr_debug("\tnode %d\n", stats->nodes[node].node);
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ const struct generation_stats *gstats =
+ &stats->nodes[node].gens[gen];
+
+ pr_debug("\t\tgen %d\tage_ms %ld"
+ "\tnr_anon %ld\tnr_file %ld\n",
+ gstats->gen, gstats->age_ms, gstats->nr_anon,
+ gstats->nr_file);
+ }
+ }
+}
+
+/* Re-read lru_gen debugfs information for @memcg into @stats. */
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
+{
+ FILE *f;
+ ssize_t read = 0;
+ char *line = NULL;
+ size_t bufsz;
+ struct memcg_stats_parse_context ctx = {
+ .next_handler = memcg_stats_handle_searching,
+ .name = memcg,
+ };
+
+ memset(stats, 0, sizeof(struct memcg_stats));
+
+ f = fopen(LRU_GEN_DEBUGFS, "r");
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+
+ while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
+ ctx.consumed = false;
+
+ do {
+ ctx.next_handler(stats, &ctx, line);
+ if (!ctx.next_handler)
+ break;
+ } while (!ctx.consumed);
+ }
+
+ if (read < 0 && !feof(f))
+ TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
+
+ TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
+ "Did the memcg get created in the proper mount?",
+ memcg);
+ if (line)
+ free(line);
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+
+ print_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
+ *
+ * If @target_gen is negative, look for all generations.
+ */
+long lru_gen_sum_memcg_stats_for_gen(int target_gen,
+ const struct memcg_stats *stats)
+{
+ int node, gen;
+ long total_nr = 0;
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ const struct node_stats *node_stats = &stats->nodes[node];
+
+ for (gen = 0; gen < node_stats->nr_gens; ++gen) {
+ const struct generation_stats *gen_stats =
+ &node_stats->gens[gen];
+
+ if (target_gen >= 0 && gen_stats->gen != target_gen)
+ continue;
+
+ total_nr += gen_stats->nr_anon + gen_stats->nr_file;
+ }
+ }
+
+ return total_nr;
+}
+
+/* Find all pages tracked by lru_gen for this memcg. */
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
+{
+ return lru_gen_sum_memcg_stats_for_gen(-1, stats);
+}
+
+/*
+ * If lru_gen aging should force page table scanning.
+ *
+ * If you want to set this to false, you will need to do eviction
+ * before doing extra aging passes.
+ */
+static const bool force_scan = true;
+
+static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
+{
+ FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
+ char *command;
+ size_t sz;
+
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+ sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
+ memcg_id, node_id, max_gen, force_scan);
+ TEST_ASSERT(sz > 0, "creating aging command failed");
+
+ pr_debug("Running aging command: %s", command);
+ if (fwrite(command, sizeof(char), sz, f) < sz) {
+ TEST_ASSERT(false, "writing aging command %s to %s failed",
+ command, LRU_GEN_DEBUGFS);
+ }
+
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+}
+
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
+{
+ int node, gen;
+
+ pr_debug("lru_gen: invoking aging...\n");
+
+ /* Must read memcg stats to construct the proper aging command. */
+ lru_gen_read_memcg_stats(stats, memcg);
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ int max_gen = 0;
+
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ int this_gen = stats->nodes[node].gens[gen].gen;
+
+ max_gen = max_gen > this_gen ? max_gen : this_gen;
+ }
+
+ run_aging_impl(stats->memcg_id, stats->nodes[node].node,
+ max_gen);
+ }
+
+ /* Re-read so callers get updated information */
+ lru_gen_read_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find which generation contains at least @pages pages, assuming that
+ * such a generation exists.
+ */
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long pages)
+{
+ int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
+
+ for (node = 0; node < stats->nr_nodes; ++node)
+ for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
+ ++gen_idx) {
+ gen = stats->nodes[node].gens[gen_idx].gen;
+ max_gen = gen > max_gen ? gen : max_gen;
+ min_gen = gen < min_gen ? gen : min_gen;
+ }
+
+ for (gen = min_gen; gen <= max_gen; ++gen)
+ /* See if this generation has enough pages. */
+ if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
+ return gen;
+
+ return -1;
+}
+
+bool lru_gen_usable(void)
+{
+ long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
+ int lru_gen_fd, lru_gen_debug_fd;
+ char mglru_feature_str[8] = {};
+ long mglru_features;
+
+ lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
+ if (lru_gen_fd < 0) {
+ puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
+ return false;
+ }
+ if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
+ puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
+ close(lru_gen_fd);
+ return false;
+ }
+ close(lru_gen_fd);
+
+ mglru_features = strtol(mglru_feature_str, NULL, 16);
+ if ((mglru_features & required_features) != required_features) {
+ printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
+ mglru_features, required_features);
+ printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
+ required_features);
+ return false;
+ }
+
+ lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
+ __TEST_REQUIRE(lru_gen_debug_fd >= 0,
+ "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
+ "but lru_gen is enabled, so cannot use page_idle.");
+ close(lru_gen_debug_fd);
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8ed0b74ae837..03eb99af9b8d 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -132,37 +132,57 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
}
-bool thp_configured(void)
+static bool test_sysfs_path(const char *path)
{
- int ret;
struct stat statbuf;
+ int ret;
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ ret = stat(path, &statbuf);
TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- "Error in stating /sys/kernel/mm/transparent_hugepage");
+ "Error in stat()ing '%s'", path);
return ret == 0;
}
-size_t get_trans_hugepagesz(void)
+bool thp_configured(void)
+{
+ return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
+}
+
+static size_t get_sysfs_val(const char *path)
{
size_t size;
FILE *f;
int ret;
- TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
-
- f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
- TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+ f = fopen(path, "r");
+ TEST_ASSERT(f, "Error opening '%s'", path);
ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret > 0, "Error reading '%s'", path);
+
+ /* Re-scan the input stream to verify the entire file was read. */
ret = fscanf(f, "%ld", &size);
- TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
- fclose(f);
+ TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+ fclose(f);
return size;
}
+size_t get_trans_hugepagesz(void)
+{
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
+}
+
+bool is_numa_balancing_enabled(void)
+{
+ if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
+ return false;
+ return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
+
size_t get_def_hugetlb_pagesz(void)
{
char buf[64];
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index bd5a802fa7a5..a92dc1dad085 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -639,7 +639,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
sync_global_to_guest(vm, host_cpu_is_amd);
sync_global_to_guest(vm, is_forced_emulation_enabled);
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
struct kvm_sev_init init = { 0 };
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
@@ -1156,7 +1156,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..c3a9838f4806 100644
--- a/tools/testing/selftests/kvm/lib/x86/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -14,7 +14,8 @@
* and find the first range, but that's correct because the condition
* expression would cause us to quit the loop.
*/
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region,
+ uint8_t page_type, bool private)
{
const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
const vm_paddr_t gpa_base = region->region.guest_phys_addr;
@@ -24,25 +25,35 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio
if (!sparsebit_any_set(protected_phy_pages))
return;
- sev_register_encrypted_memory(vm, region);
+ if (!is_sev_snp_vm(vm))
+ sev_register_encrypted_memory(vm, region);
sparsebit_for_each_set_range(protected_phy_pages, i, j) {
const uint64_t size = (j - i + 1) * vm->page_size;
const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
- sev_launch_update_data(vm, gpa_base + offset, size);
+ if (private)
+ vm_mem_set_private(vm, gpa_base + offset, size);
+
+ if (is_sev_snp_vm(vm))
+ snp_launch_update_data(vm, gpa_base + offset,
+ (uint64_t)addr_gpa2hva(vm, gpa_base + offset),
+ size, page_type);
+ else
+ sev_launch_update_data(vm, gpa_base + offset, size);
+
}
}
void sev_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
@@ -50,16 +61,24 @@ void sev_vm_init(struct kvm_vm *vm)
void sev_es_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_ES_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
+void snp_vm_init(struct kvm_vm *vm)
+{
+ struct kvm_sev_init init = { 0 };
+
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
{
struct kvm_sev_launch_start launch_start = {
@@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
- encrypt_region(vm, region);
+ encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false);
if (policy & SEV_POLICY_ES)
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
@@ -112,6 +131,33 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy)
+{
+ struct kvm_sev_snp_launch_start launch_start = {
+ .policy = policy,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start);
+}
+
+void snp_vm_launch_update(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void snp_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_snp_launch_finish launch_finish = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish);
+}
+
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu)
{
@@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
return vm;
}
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement)
{
+ if (is_sev_snp_vm(vm)) {
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE));
+
+ snp_vm_launch_start(vm, policy);
+
+ snp_vm_launch_update(vm);
+
+ snp_vm_launch_finish(vm);
+
+ return;
+ }
+
sev_vm_launch(vm, policy);
if (!measurement)
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..2ac89d6c1e46
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define guest_execute_fastop_1(FEP, insn, __val, __flags) \
+({ \
+ __asm__ __volatile__("bt $0, %[val]\n\t" \
+ FEP insn " %[val]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [val]"+r"(__val), [flags]"=r"(__flags) \
+ : : "cc", "memory"); \
+})
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+({ \
+ type_t val = __val, ex_val = __val, input = __val; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \
+({ \
+ __asm__ __volatile__("bt $0, %[output]\n\t" \
+ FEP insn " %[input], %[output]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [output]"+r"(__output), [flags]"=r"(__flags) \
+ : [input]"r"(__input) : "cc", "memory"); \
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+({ \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, (uint64_t)input, \
+ (uint64_t)input2, (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \
+({ \
+ __asm__ __volatile__("bt $0, %[output]\n\t" \
+ FEP insn " %%cl, %[output]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [output]"+r"(__output), [flags]"=r"(__flags) \
+ : "c"(__shift) : "cc", "memory"); \
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+({ \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ uint8_t shift = __val1; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, shift, (uint64_t)input, \
+ (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (uint64_t)input, flags); \
+})
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4e920705681a..c863a689aa98 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -22,25 +22,6 @@ static void guest_code(void)
{
}
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
@@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
case 0x40000004:
test_val = entry->eax & (1UL << 18);
- TEST_ASSERT(!!test_val == !smt_possible(),
+ TEST_ASSERT(!!test_val == !is_smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..d88500c118eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE 64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+ for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+ atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_generate_buslocks();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+ guest_generate_buslocks();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ int i, bus_locks = 0;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ run = vcpu->run;
+
+ for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+ /*
+ * Verify the counter is actually getting incremented, e.g. that
+ * KVM isn't skipping the instruction. On Intel, the exit is
+ * trap-like, i.e. the counter should already have been
+ * incremented. On AMD, it's fault-like, i.e. the counter will
+ * be incremented when the guest re-executes the instruction.
+ */
+ sync_global_from_guest(vm, *val);
+ TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+ bus_locks++;
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+ TEST_ASSERT_EQ(i, bus_locks);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..b238615196ad 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -28,6 +28,7 @@
int kvm_fd;
u64 supported_vmsa_features;
bool have_sev_es;
+bool have_snp;
static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
{
@@ -83,6 +84,9 @@ void test_vm_types(void)
if (have_sev_es)
test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+ if (have_snp)
+ test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
test_init2_invalid(0, &(struct kvm_sev_init){},
"VM type is KVM_X86_DEFAULT_VM");
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
@@ -138,15 +142,24 @@ int main(int argc, char *argv[])
"sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+ have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+ TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+ "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+ kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
test_vm_types();
test_flags(KVM_X86_SEV_VM);
if (have_sev_es)
test_flags(KVM_X86_SEV_ES_VM);
+ if (have_snp)
+ test_flags(KVM_X86_SNP_VM);
test_features(KVM_X86_SEV_VM, 0);
if (have_sev_es)
test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+ if (have_snp)
+ test_features(KVM_X86_SNP_VM, supported_vmsa_features);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index d97816dc476a..77256c89bb8d 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -16,6 +16,18 @@
#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+static void guest_snp_code(void)
+{
+ uint64_t sev_msr = rdmsr(MSR_AMD64_SEV);
+
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
@@ -27,7 +39,7 @@ static void guest_sev_es_code(void)
* force "termination" to signal "done" via the GHCB MSR protocol.
*/
wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
- __asm__ __volatile__("rep; vmmcall");
+ vmgexit();
}
static void guest_sev_code(void)
@@ -62,7 +74,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
abort();
}
-static void test_sync_vmsa(uint32_t policy)
+static void test_sync_vmsa(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -72,7 +84,7 @@ static void test_sync_vmsa(uint32_t policy)
double x87val = M_PI;
struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
- vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
MEM_REGION_TEST_DATA);
hva = addr_gva2hva(vm, gva);
@@ -89,7 +101,7 @@ static void test_sync_vmsa(uint32_t policy)
: "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
vcpu_xsave_set(vcpu, &xsave);
- vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+ vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
memset(hva, 0, 4096);
@@ -108,14 +120,12 @@ static void test_sync_vmsa(uint32_t policy)
kvm_vm_free(vm);
}
-static void test_sev(void *guest_code, uint64_t policy)
+static void test_sev(void *guest_code, uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
/* TODO: Validate the measurement is as expected. */
@@ -124,7 +134,7 @@ static void test_sev(void *guest_code, uint64_t policy)
for (;;) {
vcpu_run(vcpu);
- if (policy & SEV_POLICY_ES) {
+ if (is_sev_es_vm(vm)) {
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
"Wanted SYSTEM_EVENT, got %s",
exit_reason_str(vcpu->run->exit_reason));
@@ -161,16 +171,14 @@ static void guest_shutdown_code(void)
__asm__ __volatile__("ud2");
}
-static void test_sev_es_shutdown(void)
+static void test_sev_shutdown(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint32_t type = KVM_X86_SEV_ES_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
- vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+ vm_sev_launch(vm, policy, NULL);
vcpu_run(vcpu);
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
@@ -180,27 +188,42 @@ static void test_sev_es_shutdown(void)
kvm_vm_free(vm);
}
-int main(int argc, char *argv[])
+static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy)
{
const u64 xf_mask = XFEATURE_MASK_X87_AVX;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
- test_sev(guest_sev_code, 0);
+ if (type == KVM_X86_SNP_VM)
+ test_sev(guest, type, policy | SNP_POLICY_DBG);
+ else
+ test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+ test_sev(guest, type, policy);
- if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
- test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
- test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ if (type == KVM_X86_SEV_VM)
+ return;
- test_sev_es_shutdown();
+ test_sev_shutdown(type, policy);
- if (kvm_has_cap(KVM_CAP_XCRS) &&
- (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
- test_sync_vmsa(0);
- test_sync_vmsa(SEV_POLICY_NO_DBG);
- }
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(type, policy);
+ if (type == KVM_X86_SNP_VM)
+ test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+ else
+ test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
}
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
return 0;
}
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index c5241b193db8..824266982aa3 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -20,6 +20,7 @@ mremap_test
on-fault-limit
transhuge-stress
pagemap_ioctl
+pfnmap
*.tmp*
protection_keys
protection_keys_32
@@ -58,3 +59,4 @@ hugetlb_dio
pkey_sighandler_tests_32
pkey_sighandler_tests_64
guard-regions
+merge
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 8270895039d1..ae6f994d3add 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += mseal_test
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
+TEST_GEN_FILES += pfnmap
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += uffd-stress
@@ -98,6 +99,7 @@ TEST_GEN_FILES += hugetlb_madv_vs_map
TEST_GEN_FILES += hugetlb_dio
TEST_GEN_FILES += droppable
TEST_GEN_FILES += guard-regions
+TEST_GEN_FILES += merge
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index b6cfe0a4b7df..dbbcc5eb3dce 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -112,9 +112,12 @@ struct comm_pipes {
static int setup_comm_pipes(struct comm_pipes *comm_pipes)
{
- if (pipe(comm_pipes->child_ready) < 0)
+ if (pipe(comm_pipes->child_ready) < 0) {
+ ksft_perror("pipe()");
return -errno;
+ }
if (pipe(comm_pipes->parent_ready) < 0) {
+ ksft_perror("pipe()");
close(comm_pipes->child_ready[0]);
close(comm_pipes->child_ready[1]);
return -errno;
@@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
exit(fn(mem, size, &comm_pipes));
@@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
* write-faults by directly mapping pages writable.
*/
ret = mprotect(mem, size, PROT_READ);
- ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+
+ ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
ret = -EINVAL;
if (!ret) {
- ksft_test_result_pass("No leak from parent into child\n");
+ log_test_result(KSFT_PASS);
} else if (xfail) {
/*
* With hugetlb, some vmsplice() tests are currently expected to
* fail because (a) harder to fix and (b) nobody really cares.
* Flag them as expected failure for now.
*/
- ksft_test_result_xfail("Leak from parent into child\n");
+ log_test_result(KSFT_XFAIL);
} else {
- ksft_test_result_fail("Leak from parent into child\n");
+ log_test_result(KSFT_FAIL);
}
close_comm_pipes:
close_comm_pipes(&comm_pipes);
@@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
goto free;
}
if (pipe(fds) < 0) {
- ksft_test_result_fail("pipe() failed\n");
+ ksft_perror("pipe() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
if (before_fork) {
transferred = vmsplice(fds[1], &iov, 1, 0);
if (transferred <= 0) {
- ksft_test_result_fail("vmsplice() failed\n");
+ ksft_print_msg("vmsplice() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
if (!before_fork) {
transferred = vmsplice(fds[1], &iov, 1, 0);
if (transferred <= 0) {
- ksft_test_result_fail("vmsplice() failed\n");
+ ksft_perror("vmsplice() failed");
+ log_test_result(KSFT_FAIL);
wait(&ret);
goto close_pipe;
}
@@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
;
if (munmap(mem, size) < 0) {
- ksft_test_result_fail("munmap() failed\n");
+ ksft_perror("munmap() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
write(comm_pipes.parent_ready[1], "0", 1);
@@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
/* Wait until the child is done writing. */
wait(&ret);
if (!WIFEXITED(ret)) {
- ksft_test_result_fail("wait() failed\n");
+ ksft_perror("wait() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
@@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
for (total = 0; total < transferred; total += cur) {
cur = read(fds[0], new + total, transferred - total);
if (cur < 0) {
- ksft_test_result_fail("read() failed\n");
+ ksft_perror("read() failed");
+ log_test_result(KSFT_FAIL);
goto close_pipe;
}
}
if (!memcmp(old, new, transferred)) {
- ksft_test_result_pass("No leak from child into parent\n");
+ log_test_result(KSFT_PASS);
} else if (xfail) {
/*
* With hugetlb, some vmsplice() tests are currently expected to
* fail because (a) harder to fix and (b) nobody really cares.
* Flag them as expected failure for now.
*/
- ksft_test_result_xfail("Leak from child into parent\n");
+ log_test_result(KSFT_XFAIL);
} else {
- ksft_test_result_fail("Leak from child into parent\n");
+ log_test_result(KSFT_FAIL);
}
close_pipe:
close(fds[0]);
@@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
fd = fileno(file);
@@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
tmp = malloc(size);
if (!tmp) {
- ksft_test_result_fail("malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
goto close_file;
}
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed\n");
+ ksft_print_msg("io_uring_queue_init() failed\n");
+ log_test_result(KSFT_SKIP);
goto free_tmp;
}
@@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
iov.iov_len = size;
ret = io_uring_register_buffers(&ring, &iov, 1);
if (ret) {
- ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ ksft_print_msg("io_uring_register_buffers() failed\n");
+ log_test_result(KSFT_SKIP);
goto queue_exit;
}
@@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
*/
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto unregister_buffers;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
* if the page is mapped R/O vs. R/W).
*/
ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
+ goto unregister_buffers;
+ }
+
clear_softdirty();
- ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+ ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto unregister_buffers;
}
}
@@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
memset(mem, 0xff, size);
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
- ksft_test_result_fail("io_uring_get_sqe() failed\n");
+ ksft_print_msg("io_uring_get_sqe() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
ret = io_uring_submit(&ring);
if (ret < 0) {
- ksft_test_result_fail("io_uring_submit() failed\n");
+ ksft_print_msg("io_uring_submit() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret < 0) {
- ksft_test_result_fail("io_uring_wait_cqe() failed\n");
+ ksft_print_msg("io_uring_wait_cqe() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
if (cqe->res != size) {
- ksft_test_result_fail("write_fixed failed\n");
+ ksft_print_msg("write_fixed failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
io_uring_cqe_seen(&ring, cqe);
@@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
while (total < size) {
cur = pread(fd, tmp + total, size - total, total);
if (cur < 0) {
- ksft_test_result_fail("pread() failed\n");
+ ksft_print_msg("pread() failed\n");
+ log_test_result(KSFT_FAIL);
goto quit_child;
}
total += cur;
}
/* Finally, check if we read what we expected. */
- ksft_test_result(!memcmp(mem, tmp, size),
- "Longterm R/W pin is reliable\n");
+ if (!memcmp(mem, tmp, size))
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
quit_child:
if (use_fork) {
@@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
int ret;
if (gup_fd < 0) {
- ksft_test_result_skip("gup_test not available\n");
+ ksft_print_msg("gup_test not available\n");
+ log_test_result(KSFT_SKIP);
return;
}
tmp = malloc(size);
if (!tmp) {
- ksft_test_result_fail("malloc() failed\n");
+ ksft_print_msg("malloc() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
goto free_tmp;
}
@@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
*/
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
write(comm_pipes.child_ready[1], "0", 1);
@@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
clear_softdirty();
ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret) {
if (errno == EINVAL)
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ ret = KSFT_SKIP;
else
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ ret = KSFT_FAIL;
+ ksft_perror("PIN_LONGTERM_TEST_START failed");
+ log_test_result(ret);
goto wait;
}
@@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
*/
tmp_val = (__u64)(uintptr_t)tmp;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
- if (ret)
- ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
- else
- ksft_test_result(!memcmp(mem, tmp, size),
- "Longterm R/O pin is reliable\n");
+ if (ret) {
+ ksft_perror("PIN_LONGTERM_TEST_READ failed");
+ log_test_result(KSFT_FAIL);
+ } else {
+ if (!memcmp(mem, tmp, size))
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
+ }
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
if (ret)
- ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+ ksft_perror("PIN_LONGTERM_TEST_STOP failed");
wait:
switch (test) {
case RO_PIN_TEST_SHARED:
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
if (!WIFEXITED(ret))
- ksft_print_msg("[INFO] wait() failed\n");
+ ksft_perror("wait() failed");
break;
default:
break;
@@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
/* Ignore if not around on a kernel. */
if (ret && errno != EINVAL) {
- ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ ksft_perror("MADV_NOHUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
if (swapout) {
madvise(mem, pagesize, MADV_PAGEOUT);
if (!pagemap_is_swapped(pagemap_fd, mem)) {
- ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
}
@@ -775,13 +827,13 @@ munmap:
static void run_with_base_page(test_fn fn, const char *desc)
{
- ksft_print_msg("[RUN] %s ... with base page\n", desc);
+ log_test_start("%s ... with base page", desc);
do_run_with_base_page(fn, false);
}
static void run_with_base_page_swap(test_fn fn, const char *desc)
{
- ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
+ log_test_start("%s ... with swapped out base page", desc);
do_run_with_base_page(fn, true);
}
@@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
@@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
ret = madvise(mem, thpsize, MADV_HUGEPAGE);
if (ret) {
- ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ ksft_perror("MADV_HUGEPAGE failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
mem[0] = 1;
if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
- ksft_test_result_skip("Did not get a THP populated\n");
+ ksft_print_msg("Did not get a THP populated\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
memset(mem, 1, thpsize);
@@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
break;
@@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
if (ret) {
- ksft_test_result_fail("MADV_DONTNEED failed\n");
+ ksft_perror("MADV_DONTNEED failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
size = pagesize;
@@ -877,13 +935,15 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mremap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
if (tmp != mremap_mem) {
- ksft_test_result_fail("mremap() failed\n");
+ ksft_perror("mremap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
size = mremap_size;
@@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
*/
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
} else if (!ret) {
exit(0);
@@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
/* Allow for sharing all pages again. */
ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
if (ret) {
- ksft_test_result_fail("MADV_DOFORK failed\n");
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
break;
@@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
case THP_RUN_SINGLE_PTE_SWAPOUT:
madvise(mem, size, MADV_PAGEOUT);
if (!range_is_swapped(mem, size)) {
- ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
break;
@@ -941,56 +1005,56 @@ munmap:
static void run_with_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
+ log_test_start("%s ... with THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PMD, size);
}
static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
+ log_test_start("%s ... with swapped-out THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
}
static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
+ log_test_start("%s ... with PTE-mapped THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PTE, size);
}
static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
+ log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
}
static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
+ log_test_start("%s ... with single PTE of THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
}
static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
+ log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
}
static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
+ log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
}
static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
+ log_test_start("%s ... with partially shared THP (%zu kB)",
desc, size / 1024);
do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
}
@@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
char *mem, *dummy;
- ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
return;
}
@@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
*/
dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
if (dummy == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto munmap;
}
munmap(dummy, hugetlbsize);
@@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
ret = setup_comm_pipes(&comm_pipes);
if (ret) {
- ksft_test_result_fail("pipe() failed\n");
+ log_test_result(KSFT_FAIL);
return;
}
@@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_perror("mprotect() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
@@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Collapse before actually COW-sharing the page. */
ret = madvise(mem, size, MADV_COLLAPSE);
if (ret) {
- ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
- strerror(errno));
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
goto close_comm_pipes;
}
break;
@@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Don't COW-share the upper part of the THP. */
ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Don't COW-share the lower part of the THP. */
ret = madvise(mem, size / 2, MADV_DONTFORK);
if (ret) {
- ksft_test_result_fail("MADV_DONTFORK failed\n");
+ ksft_perror("MADV_DONTFORK failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
}
break;
@@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
ret = fork();
if (ret < 0) {
- ksft_test_result_fail("fork() failed\n");
+ ksft_perror("fork() failed");
+ log_test_result(KSFT_FAIL);
goto close_comm_pipes;
} else if (!ret) {
switch (test) {
@@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
*/
ret = madvise(mem, size, MADV_DOFORK);
if (ret) {
- ksft_test_result_fail("MADV_DOFORK failed\n");
+ ksft_perror("MADV_DOFORK failed");
+ log_test_result(KSFT_FAIL);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
/* Collapse before anyone modified the COW-shared page. */
ret = madvise(mem, size, MADV_COLLAPSE);
if (ret) {
- ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
- strerror(errno));
+ ksft_perror("MADV_COLLAPSE failed");
+ log_test_result(KSFT_SKIP);
write(comm_pipes.parent_ready[1], "0", 1);
wait(&ret);
goto close_comm_pipes;
@@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
else
ret = -EINVAL;
- ksft_test_result(!ret, "No leak from parent into child\n");
+ if (!ret)
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
@@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void)
for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
struct test_case const *test_case = &anon_thp_test_cases[i];
- ksft_print_msg("[RUN] %s\n", test_case->desc);
+ log_test_start("%s", test_case->desc);
do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
}
}
@@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size)
memset(mem, 0xff, size);
/* See if we still read the old values via the other mapping. */
- ksft_test_result(!memcmp(smem, old, size),
- "Other mapping not modified\n");
+ if (!memcmp(smem, old, size))
+ log_test_result(KSFT_PASS);
+ else
+ log_test_result(KSFT_FAIL);
free(old);
}
@@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
{
char *mem, *smem, tmp;
- ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
+ log_test_start("%s ... with shared zeropage", desc);
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
size_t mmap_size;
int ret;
- ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
+ log_test_start("%s ... with huge zeropage", desc);
if (!has_huge_zeropage) {
- ksft_test_result_skip("Huge zeropage not enabled\n");
+ ksft_print_msg("Huge zeropage not enabled\n");
+ log_test_result(KSFT_SKIP);
return;
}
@@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
return;
}
mmap_smem = mmap(NULL, mmap_size, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
+ if (ret != 0) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
+ goto munmap;
+ }
ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
- if (ret) {
- ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ if (ret != 0) {
+ ksft_perror("madvise()");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
char *mem, *smem, tmp;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+ log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed\n");
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_FAIL);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, pagesize)) {
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
/* Create a private mapping of the memfd. */
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
FILE *file;
int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+ log_test_start("%s ... with tmpfile", desc);
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_perror("tmpfile() failed");
+ log_test_result(KSFT_FAIL);
return;
}
fd = fileno(file);
if (fd < 0) {
- ksft_test_result_skip("fileno() failed\n");
+ ksft_perror("fileno() failed");
+ log_test_result(KSFT_SKIP);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, pagesize)) {
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_perror("fallocate() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
/* Create a private mapping of the memfd. */
mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto close;
}
smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
char *mem, *smem, tmp;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed\n");
+ ksft_perror("memfd_create() failed");
+ log_test_result(KSFT_SKIP);
return;
}
/* File consists of a single page filled with zeroes. */
if (fallocate(fd, 0, 0, hugetlbsize)) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto close;
}
@@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
0);
if (mem == MAP_FAILED) {
- ksft_test_result_skip("need more free huge pages\n");
+ ksft_perror("need more free huge pages");
+ log_test_result(KSFT_SKIP);
goto close;
}
smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
if (smem == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
+ ksft_perror("mmap() failed");
+ log_test_result(KSFT_FAIL);
goto munmap;
}
@@ -1771,7 +1872,6 @@ static int tests_per_non_anon_test_case(void)
int main(int argc, char **argv)
{
- int err;
struct thp_settings default_settings;
ksft_print_header();
@@ -1811,9 +1911,5 @@ int main(int argc, char **argv)
thp_restore_settings();
}
- err = ksft_get_fail_cnt();
- if (err)
- ksft_exit_fail_msg("%d out of %d tests failed\n",
- err, ksft_test_num());
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index eba43ead13ae..93af3d3760f9 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -8,6 +8,7 @@
#include <fcntl.h>
#include <linux/limits.h>
#include <linux/userfaultfd.h>
+#include <linux/fs.h>
#include <setjmp.h>
#include <signal.h>
#include <stdbool.h>
@@ -1452,8 +1453,21 @@ TEST_F(guard_regions, uffd)
/* Set up uffd. */
uffd = userfaultfd(0);
- if (uffd == -1 && errno == EPERM)
- ksft_exit_skip("No userfaultfd permissions, try running as root.\n");
+ if (uffd == -1) {
+ switch (errno) {
+ case EPERM:
+ SKIP(return, "No userfaultfd permissions, try running as root.");
+ break;
+ case ENOSYS:
+ SKIP(return, "userfaultfd is not supported/not enabled.");
+ break;
+ default:
+ ksft_exit_fail_msg("userfaultfd failed with %s\n",
+ strerror(errno));
+ break;
+ }
+ }
+
ASSERT_NE(uffd, -1);
ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
@@ -2075,4 +2089,60 @@ TEST_F(guard_regions, pagemap)
ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
}
+/*
+ * Assert that PAGEMAP_SCAN correctly reports guard region ranges.
+ */
+TEST_F(guard_regions, pagemap_scan)
+{
+ const unsigned long page_size = self->page_size;
+ struct page_region pm_regs[10];
+ struct pm_scan_arg pm_scan_args = {
+ .size = sizeof(struct pm_scan_arg),
+ .category_anyof_mask = PAGE_IS_GUARD,
+ .return_mask = PAGE_IS_GUARD,
+ .vec = (long)&pm_regs,
+ .vec_len = ARRAY_SIZE(pm_regs),
+ };
+ int proc_fd, i;
+ char *ptr;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ pm_scan_args.start = (long)ptr;
+ pm_scan_args.end = (long)ptr + 10 * page_size;
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /*
+ * Assert ioctl() returns the count of located regions, where each
+ * region spans every other page within the range of 10 pages.
+ */
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 5; i++) {
+ long ptr_p = (long)&ptr[2 * i * page_size];
+
+ ASSERT_EQ(pm_regs[i].start, ptr_p);
+ ASSERT_EQ(pm_regs[i].end, ptr_p + page_size);
+ ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 21595b20bbc3..8a97ac5176a4 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
__fsword_t fs_type = get_fs_type(fd);
bool should_work;
char *mem;
+ int result = KSFT_PASS;
int ret;
+ if (fd < 0) {
+ result = KSFT_FAIL;
+ goto report;
+ }
+
if (ftruncate(fd, size)) {
if (errno == ENOENT) {
skip_test_dodgy_fs("ftruncate()");
} else {
- ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
+ ksft_print_msg("ftruncate() failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ goto report;
}
return;
}
if (fallocate(fd, 0, 0, size)) {
- if (size == pagesize)
- ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
- else
- ksft_test_result_skip("need more free huge pages\n");
- return;
+ if (size == pagesize) {
+ ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
}
mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
- if (size == pagesize || shared)
- ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
- else
- ksft_test_result_skip("need more free huge pages\n");
- return;
+ if (size == pagesize || shared) {
+ ksft_print_msg("mmap() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ ksft_print_msg("need more free huge pages\n");
+ result = KSFT_SKIP;
+ }
+ goto report;
}
/* Fault in the page such that GUP-fast can pin it directly. */
@@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
*/
ret = mprotect(mem, size, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
+ ksft_print_msg("mprotect() failed (%s)\n", strerror(errno));
+ result = KSFT_FAIL;
goto munmap;
}
/* FALLTHROUGH */
@@ -147,18 +163,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
type == TEST_TYPE_RW_FAST;
if (gup_fd < 0) {
- ksft_test_result_skip("gup_test not available\n");
+ ksft_print_msg("gup_test not available\n");
+ result = KSFT_SKIP;
break;
}
if (rw && shared && fs_is_unknown(fs_type)) {
- ksft_test_result_skip("Unknown filesystem\n");
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
return;
}
/*
* R/O pinning or pinning in a private mapping is always
* expected to work. Otherwise, we expect long-term R/W pinning
- * to only succeed for special fielesystems.
+ * to only succeed for special filesystems.
*/
should_work = !shared || !rw ||
fs_supports_writable_longterm_pinning(fs_type);
@@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret && errno == EINVAL) {
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n");
+ result = KSFT_SKIP;
break;
} else if (ret && errno == EFAULT) {
- ksft_test_result(!should_work, "Should have failed\n");
+ if (should_work)
+ result = KSFT_FAIL;
+ else
+ result = KSFT_PASS;
break;
} else if (ret) {
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
- strerror(errno));
+ ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
break;
}
@@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
* some previously unsupported filesystems, we might want to
* perform some additional tests for possible data corruptions.
*/
- ksft_test_result(should_work, "Should have worked\n");
+ if (should_work)
+ result = KSFT_PASS;
+ else
+ result = KSFT_FAIL;
break;
}
#ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* io_uring always pins pages writable. */
if (shared && fs_is_unknown(fs_type)) {
- ksft_test_result_skip("Unknown filesystem\n");
- return;
+ ksft_print_msg("Unknown filesystem\n");
+ result = KSFT_SKIP;
+ goto report;
}
should_work = !shared ||
fs_supports_writable_longterm_pinning(fs_type);
@@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
- strerror(-ret));
+ ksft_print_msg("io_uring_queue_init() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
break;
}
/*
@@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Only new kernels return EFAULT. */
if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
errno == EFAULT)) {
- ksft_test_result(!should_work, "Should have failed (%s)\n",
- strerror(errno));
+ if (should_work) {
+ ksft_print_msg("Should have failed (%s)\n",
+ strerror(errno));
+ result = KSFT_FAIL;
+ } else {
+ result = KSFT_PASS;
+ }
} else if (ret) {
/*
* We might just lack support or have insufficient
* MEMLOCK limits.
*/
- ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
- strerror(-ret));
+ ksft_print_msg("io_uring_register_buffers() failed (%s)\n",
+ strerror(-ret));
+ result = KSFT_SKIP;
} else {
- ksft_test_result(should_work, "Should have worked\n");
+ if (should_work) {
+ result = KSFT_PASS;
+ } else {
+ ksft_print_msg("Should have worked\n");
+ result = KSFT_FAIL;
+ }
io_uring_unregister_buffers(&ring);
}
@@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
munmap:
munmap(mem, size);
+report:
+ log_test_result(result);
}
typedef void (*test_fn)(int fd, size_t size);
@@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc)
{
int fd;
- ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+ log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
- if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
- return;
- }
+ if (fd < 0)
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
fn(fd, pagesize);
close(fd);
@@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
FILE *file;
int fd;
- ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+ log_test_start("%s ... with tmpfile", desc);
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
- return;
- }
-
- fd = fileno(file);
- if (fd < 0) {
- ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
- goto close;
+ ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno));
+ fd = -1;
+ } else {
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_print_msg("fileno() failed (%s)\n", strerror(errno));
+ }
}
fn(fd, pagesize);
-close:
- fclose(file);
+
+ if (file)
+ fclose(file);
}
static void run_with_local_tmpfile(test_fn fn, const char *desc)
@@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc)
char filename[] = __FILE__"_tmpfile_XXXXXX";
int fd;
- ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+ log_test_start("%s ... with local tmpfile", desc);
fd = mkstemp(filename);
- if (fd < 0) {
- ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
- return;
- }
+ if (fd < 0)
+ ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno));
if (unlink(filename)) {
- ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
- goto close;
+ ksft_print_msg("unlink() failed (%s)\n", strerror(errno));
+ close(fd);
+ fd = -1;
}
fn(fd, pagesize);
-close:
- close(fd);
+
+ if (fd >= 0)
+ close(fd);
}
static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
@@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
int flags = MFD_HUGETLB;
int fd;
- ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
hugetlbsize / 1024);
flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
- return;
+ ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
}
fn(fd, hugetlbsize);
@@ -455,7 +493,7 @@ static int tests_per_test_case(void)
int main(int argc, char **argv)
{
- int i, err;
+ int i;
pagesize = getpagesize();
nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
@@ -469,9 +507,5 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_test_case(&test_cases[i]);
- err = ksft_get_fail_cnt();
- if (err)
- ksft_exit_fail_msg("%d out of %d tests failed\n",
- err, ksft_test_num());
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 0b0d4ba1af27..0dd31892ff67 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -36,7 +36,7 @@ else
do_umount=1
fi
fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -56,10 +56,45 @@ function cleanup() {
rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
rmdir "$CGROUP_ROOT"/a 2>/dev/null
rmdir "$CGROUP_ROOT"/test1 2>/dev/null
- echo 0 >/proc/sys/vm/nr_hugepages
+ echo $nr_hugepgs >/proc/sys/vm/nr_hugepages
set -e
}
+function assert_with_retry() {
+ local actual_path="$1"
+ local expected="$2"
+ local tolerance=$((7 * 1024 * 1024))
+ local timeout=20
+ local interval=1
+ local start_time
+ local now
+ local elapsed
+ local actual
+
+ start_time=$(date +%s)
+
+ while true; do
+ actual="$(cat "$actual_path")"
+
+ if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+ [[ $actual -le $(($expected + $tolerance)) ]]; then
+ return 0
+ fi
+
+ now=$(date +%s)
+ elapsed=$((now - start_time))
+
+ if [[ $elapsed -ge $timeout ]]; then
+ echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+ echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+ cleanup
+ exit 1
+ fi
+
+ sleep $interval
+ done
+}
+
function assert_state() {
local expected_a="$1"
local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
expected_b="$3"
expected_b_hugetlb="$4"
fi
- local tolerance=$((5 * 1024 * 1024))
-
- local actual_a
- actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
- if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
- [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
- echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
- echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- local actual_a_hugetlb
- actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
- [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
- echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
- echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
- return
- fi
-
- local actual_b
- actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
- if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
- [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
- echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
- echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
- local actual_b_hugetlb
- actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
- [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
- echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
- echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
+ assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+ assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
- cleanup
- exit 1
+ if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+ assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+ assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
fi
}
@@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
cleanup
echo
-echo
echo Test charge, rmdir, uncharge
setup
echo mkdir
@@ -195,7 +184,6 @@ cleanup
echo done
echo
-echo
if [[ ! $cgroup2 ]]; then
echo "Test parent and child hugetlb usage"
setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
assert_state 0 $(($size * 2)) 0 $size
rmdir "$CGROUP_ROOT"/a/b
- sleep 5
echo Assert memory reparent correctly.
assert_state 0 $(($size * 2))
@@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then
fi
echo
-echo
echo "Test child only hugetlb usage"
echo setup
setup
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index ef7d911da13e..b6fabd5c27ed 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -172,12 +172,12 @@ static void test_populate_read(void)
if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ksft_test_result(range_is_not_populated(addr, SIZE),
- "range initially not populated\n");
+ "read range initially not populated\n");
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
ksft_test_result(!ret, "MADV_POPULATE_READ\n");
ksft_test_result(range_is_populated(addr, SIZE),
- "range is populated\n");
+ "read range is populated\n");
munmap(addr, SIZE);
}
@@ -194,12 +194,12 @@ static void test_populate_write(void)
if (addr == MAP_FAILED)
ksft_exit_fail_msg("mmap failed\n");
ksft_test_result(range_is_not_populated(addr, SIZE),
- "range initially not populated\n");
+ "write range initially not populated\n");
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
ksft_test_result(range_is_populated(addr, SIZE),
- "range is populated\n");
+ "write range is populated\n");
munmap(addr, SIZE);
}
@@ -247,19 +247,19 @@ static void test_softdirty(void)
/* Clear any softdirty bits. */
clear_softdirty();
ksft_test_result(range_is_not_softdirty(addr, SIZE),
- "range is not softdirty\n");
+ "cleared range is not softdirty\n");
/* Populating READ should set softdirty. */
ret = madvise(addr, SIZE, MADV_POPULATE_READ);
- ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n");
ksft_test_result(range_is_not_softdirty(addr, SIZE),
- "range is not softdirty\n");
+ "range is not softdirty after MADV_POPULATE_READ\n");
/* Populating WRITE should set softdirty. */
ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
- ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n");
ksft_test_result(range_is_softdirty(addr, SIZE),
- "range is softdirty\n");
+ "range is softdirty after MADV_POPULATE_WRITE \n");
munmap(addr, SIZE);
}
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index d53de2486080..1e9980b8993c 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -96,7 +96,7 @@ int main(void)
ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
- ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
+ ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n");
/*
* Second mapping contained within first:
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
new file mode 100644
index 000000000000..c76646cdf6e6
--- /dev/null
+++ b/tools/testing/selftests/mm/merge.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "../kselftest_harness.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include "vm_util.h"
+
+FIXTURE(merge)
+{
+ unsigned int page_size;
+ char *carveout;
+ struct procmap_fd procmap;
+};
+
+FIXTURE_SETUP(merge)
+{
+ self->page_size = psize();
+ /* Carve out PROT_NONE region to map over. */
+ self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(self->carveout, MAP_FAILED);
+ /* Setup PROCMAP_QUERY interface. */
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+}
+
+FIXTURE_TEARDOWN(merge)
+{
+ ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0);
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+}
+
+TEST_F(merge, mprotect_unfaulted_left)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit
+ * merge failure due to lack of VM_ACCOUNT flag by mistake.
+ *
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first 5 pages read-only, splitting the VMA:
+ *
+ * RO RW
+ * |-----------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the last 5 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW
+ * |-----------|-----------|
+ * | unfaulted | faulted |
+ * |-----------|-----------|
+ */
+ ptr[5 * page_size] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge (though for
+ * ~15 years we did not! :):
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the last 5 pages read-only, splitting the VMA:
+ *
+ * RW RO
+ * |-----------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the first 5 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RW RO
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ptr[0] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_both)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first and last 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | unfaulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the middle 3 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | faulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ptr[3 * page_size] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, mprotect_faulted_left_unfaulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the last 3 pages read-only, splitting the VMA:
+ *
+ * RW RO
+ * |-----------------------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the first 6 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RW RO
+ * |-----------------------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ptr[0] = 'x';
+ /*
+ * Now make the first 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | faulted | faulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_left_faulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first 3 pages read-only, splitting the VMA:
+ *
+ * RO RW
+ * |-----------|-----------------------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------------------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the last 6 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW
+ * |-----------|-----------------------|
+ * | unfaulted | faulted |
+ * |-----------|-----------------------|
+ */
+ ptr[3 * page_size] = 'x';
+ /*
+ * Now make the last 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | faulted | faulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, forked_target_vma)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ pid_t pid;
+ char *ptr, *ptr2;
+ int i;
+
+ /*
+ * |-----------|
+ * | unfaulted |
+ * |-----------|
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in process.
+ *
+ * |-----------|
+ * | faulted |
+ * |-----------|
+ */
+ ptr[0] = 'x';
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+
+ if (pid != 0) {
+ wait(NULL);
+ return;
+ }
+
+ /* Child process below: */
+
+ /* Reopen for child. */
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+
+ /* unCOWing everything does not cause the AVC to go away. */
+ for (i = 0; i < 5 * page_size; i += page_size)
+ ptr[i] = 'x';
+
+ /*
+ * Map in adjacent VMA in child.
+ *
+ * forked
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+ ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size);
+}
+
+TEST_F(merge, forked_source_vma)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ pid_t pid;
+ char *ptr, *ptr2;
+ int i;
+
+ /*
+ * |-----------|------------|
+ * | unfaulted | <unmapped> |
+ * |-----------|------------|
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in process.
+ *
+ * |-----------|------------|
+ * | faulted | <unmapped> |
+ * |-----------|------------|
+ */
+ ptr[0] = 'x';
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+
+ if (pid != 0) {
+ wait(NULL);
+ return;
+ }
+
+ /* Child process below: */
+
+ /* Reopen for child. */
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+
+ /* unCOWing everything does not cause the AVC to go away. */
+ for (i = 0; i < 5 * page_size; i += page_size)
+ ptr[i] = 'x';
+
+ /*
+ * Map in adjacent VMA in child, ptr2 after ptr, but incompatible.
+ *
+ * forked RW RWX
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+ ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
+
+ /*
+ * Now mprotect forked region to RWX so it becomes the source for the
+ * merge to unfaulted region:
+ *
+ * forked RWX RWX
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ *
+ * This should NOT result in a merge, as ptr was forked.
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0);
+ /* Again, make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 7f0d50fa361d..3e90ff37e336 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -196,7 +196,7 @@ static void test_mlock_lock(void)
ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__);
+ ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__);
munmap(map, 2 * page_size);
}
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 57b4bba2b45f..b07acc86f4f0 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -34,7 +34,7 @@
#define PAGEMAP "/proc/self/pagemap"
int pagemap_fd;
int uffd;
-unsigned int page_size;
+unsigned long page_size;
unsigned int hpage_size;
const char *progname;
@@ -112,7 +112,7 @@ int init_uffd(void)
return 0;
}
-int wp_init(void *lpBaseAddress, int dwRegionSize)
+int wp_init(void *lpBaseAddress, long dwRegionSize)
{
struct uffdio_register uffdio_register;
struct uffdio_writeprotect wp;
@@ -136,7 +136,7 @@ int wp_init(void *lpBaseAddress, int dwRegionSize)
return 0;
}
-int wp_free(void *lpBaseAddress, int dwRegionSize)
+int wp_free(void *lpBaseAddress, long dwRegionSize)
{
struct uffdio_register uffdio_register;
@@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid)
int userfaultfd_tests(void)
{
- int mem_size, vec_size, written, num_pages = 16;
+ long mem_size, vec_size, written, num_pages = 16;
char *mem, *vec;
mem_size = num_pages * page_size;
@@ -213,7 +213,7 @@ int userfaultfd_tests(void)
written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (written < 0)
- ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno));
ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__);
@@ -995,7 +995,7 @@ int unmapped_region_tests(void)
{
void *start = (void *)0x10000000;
int written, len = 0x00040000;
- int vec_size = len / page_size;
+ long vec_size = len / page_size;
struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);
/* 1. Get written pages */
@@ -1051,7 +1051,7 @@ static void test_simple(void)
int sanity_tests(void)
{
unsigned long long mem_size, vec_size;
- int ret, fd, i, buf_size;
+ long ret, fd, i, buf_size;
struct page_region *vec;
char *mem, *fmem;
struct stat sbuf;
@@ -1160,7 +1160,7 @@ int sanity_tests(void)
ret = stat(progname, &sbuf);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
new file mode 100644
index 000000000000..866ac023baf5
--- /dev/null
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem'
+ *
+ * Copyright 2025, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "../kselftest_harness.h"
+#include "vm_util.h"
+
+static sigjmp_buf sigjmp_buf_env;
+
+static void signal_handler(int sig)
+{
+ siglongjmp(sigjmp_buf_env, -EFAULT);
+}
+
+static int test_read_access(char *addr, size_t size, size_t pagesize)
+{
+ size_t offs;
+ int ret;
+
+ if (signal(SIGSEGV, signal_handler) == SIG_ERR)
+ return -EINVAL;
+
+ ret = sigsetjmp(sigjmp_buf_env, 1);
+ if (!ret) {
+ for (offs = 0; offs < size; offs += pagesize)
+ /* Force a read that the compiler cannot optimize out. */
+ *((volatile char *)(addr + offs));
+ }
+ if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
+ return -EINVAL;
+
+ return ret;
+}
+
+static int find_ram_target(off_t *phys_addr,
+ unsigned long long pagesize)
+{
+ unsigned long long start, end;
+ char line[80], *end_ptr;
+ FILE *file;
+
+ /* Search /proc/iomem for the first suitable "System RAM" range. */
+ file = fopen("/proc/iomem", "r");
+ if (!file)
+ return -errno;
+
+ while (fgets(line, sizeof(line), file)) {
+ /* Ignore any child nodes. */
+ if (!isalnum(line[0]))
+ continue;
+
+ if (!strstr(line, "System RAM\n"))
+ continue;
+
+ start = strtoull(line, &end_ptr, 16);
+ /* Skip over the "-" */
+ end_ptr++;
+ /* Make end "exclusive". */
+ end = strtoull(end_ptr, NULL, 16) + 1;
+
+ /* Actual addresses are not exported */
+ if (!start && !end)
+ break;
+
+ /* We need full pages. */
+ start = (start + pagesize - 1) & ~(pagesize - 1);
+ end &= ~(pagesize - 1);
+
+ if (start != (off_t)start)
+ break;
+
+ /* We need two pages. */
+ if (end > start + 2 * pagesize) {
+ fclose(file);
+ *phys_addr = start;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+FIXTURE(pfnmap)
+{
+ off_t phys_addr;
+ size_t pagesize;
+ int dev_mem_fd;
+ char *addr1;
+ size_t size1;
+ char *addr2;
+ size_t size2;
+};
+
+FIXTURE_SETUP(pfnmap)
+{
+ self->pagesize = getpagesize();
+
+ /* We'll require two physical pages throughout our tests ... */
+ if (find_ram_target(&self->phys_addr, self->pagesize))
+ SKIP(return, "Cannot find ram target in '/proc/iomem'\n");
+
+ self->dev_mem_fd = open("/dev/mem", O_RDONLY);
+ if (self->dev_mem_fd < 0)
+ SKIP(return, "Cannot open '/dev/mem'\n");
+
+ self->size1 = self->pagesize * 2;
+ self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
+ self->dev_mem_fd, self->phys_addr);
+ if (self->addr1 == MAP_FAILED)
+ SKIP(return, "Cannot mmap '/dev/mem'\n");
+
+ /* ... and want to be able to read from them. */
+ if (test_read_access(self->addr1, self->size1, self->pagesize))
+ SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n");
+
+ self->size2 = 0;
+ self->addr2 = MAP_FAILED;
+}
+
+FIXTURE_TEARDOWN(pfnmap)
+{
+ if (self->addr2 != MAP_FAILED)
+ munmap(self->addr2, self->size2);
+ if (self->addr1 != MAP_FAILED)
+ munmap(self->addr1, self->size1);
+ if (self->dev_mem_fd >= 0)
+ close(self->dev_mem_fd);
+}
+
+TEST_F(pfnmap, madvise_disallowed)
+{
+ int advices[] = {
+ MADV_DONTNEED,
+ MADV_DONTNEED_LOCKED,
+ MADV_FREE,
+ MADV_WIPEONFORK,
+ MADV_COLD,
+ MADV_PAGEOUT,
+ MADV_POPULATE_READ,
+ MADV_POPULATE_WRITE,
+ };
+ int i;
+
+ /* All these advices must be rejected. */
+ for (i = 0; i < ARRAY_SIZE(advices); i++) {
+ EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(pfnmap, munmap_split)
+{
+ /*
+ * Unmap the first page. This munmap() call is not really expected to
+ * fail, but we might be able to trigger other internal issues.
+ */
+ ASSERT_EQ(munmap(self->addr1, self->pagesize), 0);
+
+ /*
+ * Remap the first page while the second page is still mapped. This
+ * makes sure that any PAT tracking on x86 will allow for mmap()'ing
+ * a page again while some parts of the first mmap() are still
+ * around.
+ */
+ self->size2 = self->pagesize;
+ self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
+ self->dev_mem_fd, self->phys_addr);
+ ASSERT_NE(self->addr2, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_fixed)
+{
+ char *ret;
+
+ /* Reserve a destination area. */
+ self->size2 = self->size1;
+ self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE,
+ -1, 0);
+ ASSERT_NE(self->addr2, MAP_FAILED);
+
+ /* mremap() over our destination. */
+ ret = mremap(self->addr1, self->size1, self->size2,
+ MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2);
+ ASSERT_NE(ret, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_shrink)
+{
+ char *ret;
+
+ /* Shrinking is expected to work. */
+ ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0);
+ ASSERT_NE(ret, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_expand)
+{
+ /*
+ * Growing is not expected to work, and getting it right would
+ * be challenging. So this test primarily serves as an early warning
+ * that something that probably should never work suddenly works.
+ */
+ self->size2 = self->size1 + self->pagesize;
+ self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE);
+ ASSERT_EQ(self->addr2, MAP_FAILED);
+}
+
+TEST_F(pfnmap, fork)
+{
+ pid_t pid;
+ int ret;
+
+ /* fork() a child and test if the child can access the pages. */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ EXPECT_EQ(test_read_access(self->addr1, self->size1,
+ self->pagesize), 0);
+ exit(0);
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 9aff33b10999..dddd1dd8af14 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -63,6 +63,8 @@ separated by spaces:
test soft dirty page bit semantics
- pagemap
test pagemap_scan IOCTL
+- pfnmap
+ tests for VM_PFNMAP handling
- cow
test copy-on-write semantics
- thp
@@ -79,6 +81,8 @@ separated by spaces:
test prctl(PR_SET_MDWE, ...)
- page_frag
test handling of page fragment allocation and freeing
+- vma_merge
+ test VMA merge cases behave as expected
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@@ -421,6 +425,8 @@ CATEGORY="madv_guard" run_test ./guard-regions
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
+CATEGORY="vma_merge" run_test ./merge
+
if [ -x ./memfd_secret ]
then
(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
@@ -468,6 +474,8 @@ fi
CATEGORY="pagemap" run_test ./pagemap_ioctl
+CATEGORY="pfnmap" run_test ./pfnmap
+
# COW tests
CATEGORY="cow" run_test ./cow
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index cd5174d735be..a41bc1234b37 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags)
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
- "%s mmap %lu\n", __func__, size);
+ "%s mmap %lu %x\n", __func__, size, flags);
if (munmap(map, size * NUM_PAGES))
ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
@@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags)
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
- "%s: mmap %lu\n", __func__, size);
+ "%s: mmap %lu %x\n", __func__, size, flags);
if (shmdt(map))
ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
}
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index e8fd9011c2a3..c73fd5d455c8 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1231,6 +1231,182 @@ static void uffd_move_pmd_split_test(uffd_test_args_t *targs)
uffd_move_pmd_handle_fault);
}
+static bool
+uffdio_verify_results(const char *name, int ret, int error, long result)
+{
+ /*
+ * Should always return -1 with errno=EAGAIN, with corresponding
+ * result field updated in ioctl() args to be -EAGAIN too
+ * (e.g. copy.copy field for UFFDIO_COPY).
+ */
+ if (ret != -1) {
+ uffd_test_fail("%s should have returned -1", name);
+ return false;
+ }
+
+ if (error != EAGAIN) {
+ uffd_test_fail("%s should have errno==EAGAIN", name);
+ return false;
+ }
+
+ if (result != -EAGAIN) {
+ uffd_test_fail("%s should have been updated for -EAGAIN",
+ name);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This defines a function to test one ioctl. Note that here "field" can
+ * be 1 or anything not -EAGAIN. With that initial value set, we can
+ * verify later that it should be updated by kernel (when -EAGAIN
+ * returned), by checking whether it is also updated to -EAGAIN.
+ */
+#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \
+ static bool uffdio_mmap_changing_test_##name(int fd) \
+ { \
+ int ret; \
+ struct uffdio_##name args = { \
+ .field = 1, \
+ }; \
+ ret = ioctl(fd, ioctl_name, &args); \
+ return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \
+ }
+
+DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage)
+DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy)
+DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move)
+DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated)
+DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped)
+
+typedef enum {
+ /* We actually do not care about any state except UNINTERRUPTIBLE.. */
+ THR_STATE_UNKNOWN = 0,
+ THR_STATE_UNINTERRUPTIBLE,
+} thread_state;
+
+static void sleep_short(void)
+{
+ usleep(1000);
+}
+
+static thread_state thread_state_get(pid_t tid)
+{
+ const char *header = "State:\t";
+ char tmp[256], *p, c;
+ FILE *fp;
+
+ snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid);
+ fp = fopen(tmp, "r");
+
+ if (!fp)
+ return THR_STATE_UNKNOWN;
+
+ while (fgets(tmp, sizeof(tmp), fp)) {
+ p = strstr(tmp, header);
+ if (p) {
+ /* For example, "State:\tD (disk sleep)" */
+ c = *(p + sizeof(header) - 1);
+ return c == 'D' ?
+ THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
+ }
+ }
+
+ return THR_STATE_UNKNOWN;
+}
+
+static void thread_state_until(pid_t tid, thread_state state)
+{
+ thread_state s;
+
+ do {
+ s = thread_state_get(tid);
+ sleep_short();
+ } while (s != state);
+}
+
+static void *uffd_mmap_changing_thread(void *opaque)
+{
+ volatile pid_t *pid = opaque;
+ int ret;
+
+ /* Unfortunately, it's only fetch-able from the thread itself.. */
+ assert(*pid == 0);
+ *pid = syscall(SYS_gettid);
+
+ /* Inject an event, this will hang solid until the event read */
+ ret = madvise(area_dst, page_size, MADV_REMOVE);
+ if (ret)
+ err("madvise(MADV_REMOVE) failed");
+
+ return NULL;
+}
+
+static void uffd_consume_message(int fd)
+{
+ struct uffd_msg msg = { 0 };
+
+ while (uffd_read_msg(fd, &msg));
+}
+
+static void uffd_mmap_changing_test(uffd_test_args_t *targs)
+{
+ /*
+ * This stores the real PID (which can be different from how tid is
+ * defined..) for the child thread, 0 means not initialized.
+ */
+ pid_t pid = 0;
+ pthread_t tid;
+ int ret;
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ true, false, false))
+ err("uffd_register() failed");
+
+ /* Create a thread to generate the racy event */
+ ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid);
+ if (ret)
+ err("pthread_create() failed");
+
+ /*
+ * Wait until the thread setup the pid. Use volatile to make sure
+ * it reads from RAM not regs.
+ */
+ while (!(volatile pid_t)pid)
+ sleep_short();
+
+ /* Wait until the thread hangs at REMOVE event */
+ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE);
+
+ if (!uffdio_mmap_changing_test_copy(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_zeropage(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_move(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_poison(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_continue(uffd))
+ return;
+
+ /*
+ * All succeeded above! Recycle everything. Start by reading the
+ * event so as to kick the thread roll again..
+ */
+ uffd_consume_message(uffd);
+
+ ret = pthread_join(tid, NULL);
+ assert(ret == 0);
+
+ uffd_test_pass();
+}
+
static int prevent_hugepages(const char **errmsg)
{
/* This should be done before source area is populated */
@@ -1470,6 +1646,32 @@ uffd_test_case_t uffd_tests[] = {
.mem_targets = MEM_ALL,
.uffd_feature_required = UFFD_FEATURE_POISON,
},
+ {
+ .name = "mmap-changing",
+ .uffd_fn = uffd_mmap_changing_test,
+ /*
+ * There's no point running this test over all mem types as
+ * they share the same code paths.
+ *
+ * Choose shmem for simplicity, because (1) shmem supports
+ * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is
+ * almost always available (unlike hugetlb). Here we
+ * abused SHMEM for UFFDIO_MOVE, but the test we want to
+ * cover doesn't yet need the correct memory type..
+ */
+ .mem_targets = MEM_SHMEM,
+ /*
+ * Any UFFD_FEATURE_EVENT_* should work to trigger the
+ * race logically, but choose the simplest (REMOVE).
+ *
+ * Meanwhile, since we'll cover quite a few new ioctl()s
+ * (CONTINUE, POISON, MOVE), skip this test for old kernels
+ * by choosing all of them.
+ */
+ .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE |
+ UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON |
+ UFFD_FEATURE_MINOR_SHMEM,
+ },
};
static void usage(const char *prog)
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 1f92e8caceac..325de53966b6 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -7,23 +7,20 @@
# real test to check that the kernel is configured to support at least 5
# pagetable levels.
-# 1 means the test failed
-exitcode=1
-
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-fail()
+skip()
{
echo "$1"
- exit $exitcode
+ exit $ksft_skip
}
check_supported_x86_64()
{
local config="/proc/config.gz"
[[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
- [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+ [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
# gzip -dcfq automatically handles both compressed and plaintext input.
# See man 1 gzip under '-f'.
@@ -33,11 +30,9 @@ check_supported_x86_64()
else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
if [[ "${pg_table_levels}" -lt 5 ]]; then
- echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
- exit $ksft_skip
+ skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
- echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
- exit $ksft_skip
+ skip "$0: CPU does not have the necessary la57 flag to support page table level 5"
fi
}
@@ -45,24 +40,21 @@ check_supported_ppc64()
{
local config="/proc/config.gz"
[[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
- [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+ [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot"
local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
if [[ "${pg_table_levels}" -lt 5 ]]; then
- echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
- exit $ksft_skip
+ skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
fi
local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}')
if [[ "$mmu_support" != "radix" ]]; then
- echo "$0: System does not use Radix MMU, required for 5-level paging"
- exit $ksft_skip
+ skip "$0: System does not use Radix MMU, required for 5-level paging"
fi
local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo)
if [[ "${hugepages_total}" -eq 0 ]]; then
- echo "$0: HugePages are not enabled, required for some tests"
- exit $ksft_skip
+ skip "$0: HugePages are not enabled, required for some tests"
fi
}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index a36734fb62f3..61d7bf1f8c62 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
+#include <errno.h>
#include <fcntl.h>
#include <dirent.h>
#include <inttypes.h>
@@ -424,3 +425,64 @@ bool check_vmflag_io(void *addr)
flags += flaglen;
}
}
+
+/*
+ * Open an fd at /proc/$pid/maps and configure procmap_out ready for
+ * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise.
+ */
+int open_procmap(pid_t pid, struct procmap_fd *procmap_out)
+{
+ char path[256];
+ int ret = 0;
+
+ memset(procmap_out, '\0', sizeof(*procmap_out));
+ sprintf(path, "/proc/%d/maps", pid);
+ procmap_out->query.size = sizeof(procmap_out->query);
+ procmap_out->fd = open(path, O_RDONLY);
+ if (procmap_out->fd < 0)
+ ret = -errno;
+
+ return ret;
+}
+
+/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */
+int query_procmap(struct procmap_fd *procmap)
+{
+ int ret = 0;
+
+ if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1)
+ ret = -errno;
+
+ return ret;
+}
+
+/*
+ * Try to find the VMA at specified address, returns true if found, false if not
+ * found, and the test is failed if any other error occurs.
+ *
+ * On success, procmap->query is populated with the results.
+ */
+bool find_vma_procmap(struct procmap_fd *procmap, void *address)
+{
+ int err;
+
+ procmap->query.query_flags = 0;
+ procmap->query.query_addr = (unsigned long)address;
+ err = query_procmap(procmap);
+ if (!err)
+ return true;
+
+ if (err != -ENOENT)
+ ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n",
+ __func__, err);
+ return false;
+}
+
+/*
+ * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error
+ * code otherwise.
+ */
+int close_procmap(struct procmap_fd *procmap)
+{
+ return close(procmap->fd);
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 6effafdc4d8a..adb5d294a220 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -3,9 +3,11 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
+#include <stdarg.h>
#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#include "../kselftest.h"
+#include <linux/fs.h>
#define BIT_ULL(nr) (1ULL << (nr))
#define PM_SOFT_DIRTY BIT_ULL(55)
@@ -19,6 +21,15 @@
extern unsigned int __page_size;
extern unsigned int __page_shift;
+/*
+ * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl)
+ * /proc/$pid/[s]maps lookup.
+ */
+struct procmap_fd {
+ int fd;
+ struct procmap_query query;
+};
+
static inline unsigned int psize(void)
{
if (!__page_size)
@@ -73,6 +84,36 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
unsigned long get_free_hugepages(void);
bool check_vmflag_io(void *addr);
+int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
+int query_procmap(struct procmap_fd *procmap);
+bool find_vma_procmap(struct procmap_fd *procmap, void *address);
+int close_procmap(struct procmap_fd *procmap);
+
+static inline int open_self_procmap(struct procmap_fd *procmap_out)
+{
+ pid_t pid = getpid();
+
+ return open_procmap(pid, procmap_out);
+}
+
+/* These helpers need to be inline to match the kselftest.h idiom. */
+static char test_name[1024];
+
+static inline void log_test_start(const char *name, ...)
+{
+ va_list args;
+ va_start(args, name);
+
+ vsnprintf(test_name, sizeof(test_name), name, args);
+ ksft_print_msg("[RUN] %s\n", test_name);
+
+ va_end(args);
+}
+
+static inline void log_test_result(int result)
+{
+ ksft_test_result_report(result, "%s\n", test_name);
+}
/*
* On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index 1c631740a730..c5e0b76ba6ac 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
-TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
+TEST_GEN_PROGS := get_syscall_info set_syscall_info peeksiginfo vmaccess get_set_sud
include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c
new file mode 100644
index 000000000000..4198248ef874
--- /dev/null
+++ b/tools/testing/selftests/ptrace/set_syscall_info.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2018-2025 Dmitry V. Levin <ldv@strace.io>
+ * All rights reserved.
+ *
+ * Check whether PTRACE_SET_SYSCALL_INFO semantics implemented in the kernel
+ * matches userspace expectations.
+ */
+
+#include "../kselftest_harness.h"
+#include <err.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <asm/unistd.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+#if defined(_MIPS_SIM) && _MIPS_SIM == _MIPS_SIM_NABI32
+/*
+ * MIPS N32 is the only architecture where __kernel_ulong_t
+ * does not match the bitness of syscall arguments.
+ */
+typedef unsigned long long kernel_ulong_t;
+#else
+typedef __kernel_ulong_t kernel_ulong_t;
+#endif
+
+struct si_entry {
+ int nr;
+ kernel_ulong_t args[6];
+};
+struct si_exit {
+ unsigned int is_error;
+ int rval;
+};
+
+static unsigned int ptrace_stop;
+static pid_t tracee_pid;
+
+static int
+kill_tracee(pid_t pid)
+{
+ if (!pid)
+ return 0;
+
+ int saved_errno = errno;
+
+ int rc = kill(pid, SIGKILL);
+
+ errno = saved_errno;
+ return rc;
+}
+
+static long
+sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data)
+{
+ return syscall(__NR_ptrace, request, pid, addr, data);
+}
+
+#define LOG_KILL_TRACEE(fmt, ...) \
+ do { \
+ kill_tracee(tracee_pid); \
+ TH_LOG("wait #%d: " fmt, \
+ ptrace_stop, ##__VA_ARGS__); \
+ } while (0)
+
+static void
+check_psi_entry(struct __test_metadata *_metadata,
+ const struct ptrace_syscall_info *info,
+ const struct si_entry *exp_entry,
+ const char *text)
+{
+ unsigned int i;
+ int exp_nr = exp_entry->nr;
+#if defined __s390__ || defined __s390x__
+ /* s390 is the only architecture that has 16-bit syscall numbers */
+ exp_nr &= 0xffff;
+#endif
+
+ ASSERT_EQ(PTRACE_SYSCALL_INFO_ENTRY, info->op) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_TRUE(info->arch) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_TRUE(info->instruction_pointer) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_TRUE(info->stack_pointer) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_EQ(exp_nr, info->entry.nr) {
+ LOG_KILL_TRACEE("%s: syscall nr mismatch", text);
+ }
+ for (i = 0; i < ARRAY_SIZE(exp_entry->args); ++i) {
+ ASSERT_EQ(exp_entry->args[i], info->entry.args[i]) {
+ LOG_KILL_TRACEE("%s: syscall arg #%u mismatch",
+ text, i);
+ }
+ }
+}
+
+static void
+check_psi_exit(struct __test_metadata *_metadata,
+ const struct ptrace_syscall_info *info,
+ const struct si_exit *exp_exit,
+ const char *text)
+{
+ ASSERT_EQ(PTRACE_SYSCALL_INFO_EXIT, info->op) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_TRUE(info->arch) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_TRUE(info->instruction_pointer) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_TRUE(info->stack_pointer) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_EQ(exp_exit->is_error, info->exit.is_error) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_EQ(exp_exit->rval, info->exit.rval) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+}
+
+TEST(set_syscall_info)
+{
+ const pid_t tracer_pid = getpid();
+ const kernel_ulong_t dummy[] = {
+ (kernel_ulong_t) 0xdad0bef0bad0fed0ULL,
+ (kernel_ulong_t) 0xdad1bef1bad1fed1ULL,
+ (kernel_ulong_t) 0xdad2bef2bad2fed2ULL,
+ (kernel_ulong_t) 0xdad3bef3bad3fed3ULL,
+ (kernel_ulong_t) 0xdad4bef4bad4fed4ULL,
+ (kernel_ulong_t) 0xdad5bef5bad5fed5ULL,
+ };
+ int splice_in[2], splice_out[2];
+
+ ASSERT_EQ(0, pipe(splice_in));
+ ASSERT_EQ(0, pipe(splice_out));
+ ASSERT_EQ(sizeof(dummy), write(splice_in[1], dummy, sizeof(dummy)));
+
+ const struct {
+ struct si_entry entry[2];
+ struct si_exit exit[2];
+ } si[] = {
+ /* change scno, keep non-error rval */
+ {
+ {
+ {
+ __NR_gettid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, tracer_pid }, { 0, tracer_pid }
+ }
+ },
+
+ /* set scno to -1, keep error rval */
+ {
+ {
+ {
+ __NR_chdir,
+ {
+ (uintptr_t) ".",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ -1,
+ {
+ (uintptr_t) ".",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 1, -ENOSYS }, { 1, -ENOSYS }
+ }
+ },
+
+ /* keep scno, change non-error rval */
+ {
+ {
+ {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, tracer_pid }, { 0, tracer_pid + 1 }
+ }
+ },
+
+ /* change arg1, keep non-error rval */
+ {
+ {
+ {
+ __NR_chdir,
+ {
+ (uintptr_t) "",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_chdir,
+ {
+ (uintptr_t) ".",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, 0 }, { 0, 0 }
+ }
+ },
+
+ /* set scno to -1, change error rval to non-error */
+ {
+ {
+ {
+ __NR_gettid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ -1,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 1, -ENOSYS }, { 0, tracer_pid }
+ }
+ },
+
+ /* change scno, change non-error rval to error */
+ {
+ {
+ {
+ __NR_chdir,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, tracer_pid }, { 1, -EISDIR }
+ }
+ },
+
+ /* change scno and all args, change non-error rval */
+ {
+ {
+ {
+ __NR_gettid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_splice,
+ {
+ splice_in[0], 0, splice_out[1], 0,
+ sizeof(dummy), SPLICE_F_NONBLOCK
+ }
+ }
+ }, {
+ { 0, sizeof(dummy) }, { 0, sizeof(dummy) + 1 }
+ }
+ },
+
+ /* change arg1, no exit stop */
+ {
+ {
+ {
+ __NR_exit_group,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_exit_group,
+ {
+ 0, dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, 0 }, { 0, 0 }
+ }
+ },
+ };
+
+ long rc;
+ unsigned int i;
+
+ tracee_pid = fork();
+
+ ASSERT_LE(0, tracee_pid) {
+ TH_LOG("fork: %m");
+ }
+
+ if (tracee_pid == 0) {
+ /* get the pid before PTRACE_TRACEME */
+ tracee_pid = getpid();
+ ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ TH_LOG("PTRACE_TRACEME: %m");
+ }
+ ASSERT_EQ(0, kill(tracee_pid, SIGSTOP)) {
+ /* cannot happen */
+ TH_LOG("kill SIGSTOP: %m");
+ }
+ for (i = 0; i < ARRAY_SIZE(si); ++i) {
+ rc = syscall(si[i].entry[0].nr,
+ si[i].entry[0].args[0],
+ si[i].entry[0].args[1],
+ si[i].entry[0].args[2],
+ si[i].entry[0].args[3],
+ si[i].entry[0].args[4],
+ si[i].entry[0].args[5]);
+ if (si[i].exit[1].is_error) {
+ if (rc != -1 || errno != -si[i].exit[1].rval)
+ break;
+ } else {
+ if (rc != si[i].exit[1].rval)
+ break;
+ }
+ }
+ /*
+ * Something went wrong, but in this state tracee
+ * cannot reliably issue syscalls, so just crash.
+ */
+ *(volatile unsigned char *) (uintptr_t) i = 42;
+ /* unreachable */
+ _exit(i + 1);
+ }
+
+ for (ptrace_stop = 0; ; ++ptrace_stop) {
+ struct ptrace_syscall_info info = {
+ .op = 0xff /* invalid PTRACE_SYSCALL_INFO_* op */
+ };
+ const size_t size = sizeof(info);
+ const int expected_entry_size =
+ (void *) &info.entry.args[6] - (void *) &info;
+ const int expected_exit_size =
+ (void *) (&info.exit.is_error + 1) -
+ (void *) &info;
+ int status;
+
+ ASSERT_EQ(tracee_pid, wait(&status)) {
+ /* cannot happen */
+ LOG_KILL_TRACEE("wait: %m");
+ }
+ if (WIFEXITED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ ASSERT_EQ(0, WEXITSTATUS(status)) {
+ LOG_KILL_TRACEE("unexpected exit status %u",
+ WEXITSTATUS(status));
+ }
+ break;
+ }
+ ASSERT_FALSE(WIFSIGNALED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ LOG_KILL_TRACEE("unexpected signal %u",
+ WTERMSIG(status));
+ }
+ ASSERT_TRUE(WIFSTOPPED(status)) {
+ /* cannot happen */
+ LOG_KILL_TRACEE("unexpected wait status %#x", status);
+ }
+
+ ASSERT_LT(ptrace_stop, ARRAY_SIZE(si) * 2) {
+ LOG_KILL_TRACEE("ptrace stop overflow");
+ }
+
+ switch (WSTOPSIG(status)) {
+ case SIGSTOP:
+ ASSERT_EQ(0, ptrace_stop) {
+ LOG_KILL_TRACEE("unexpected signal stop");
+ }
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, tracee_pid,
+ 0, PTRACE_O_TRACESYSGOOD)) {
+ LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m");
+ }
+ break;
+
+ case SIGTRAP | 0x80:
+ ASSERT_LT(0, ptrace_stop) {
+ LOG_KILL_TRACEE("unexpected syscall stop");
+ }
+ ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info))) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1: %m");
+ }
+ if (ptrace_stop & 1) {
+ /* entering syscall */
+ const struct si_entry *exp_entry =
+ &si[ptrace_stop / 2].entry[0];
+ const struct si_entry *set_entry =
+ &si[ptrace_stop / 2].entry[1];
+
+ /* check ptrace_syscall_info before the changes */
+ ASSERT_EQ(expected_entry_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1"
+ ": entry stop mismatch");
+ }
+ check_psi_entry(_metadata, &info, exp_entry,
+ "PTRACE_GET_SYSCALL_INFO #1");
+
+ /* apply the changes */
+ info.entry.nr = set_entry->nr;
+ for (i = 0; i < ARRAY_SIZE(set_entry->args); ++i)
+ info.entry.args[i] = set_entry->args[i];
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m");
+ }
+
+ /* check ptrace_syscall_info after the changes */
+ memset(&info, 0, sizeof(info));
+ info.op = 0xff;
+ ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info))) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m");
+ }
+ ASSERT_EQ(expected_entry_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2"
+ ": entry stop mismatch");
+ }
+ check_psi_entry(_metadata, &info, set_entry,
+ "PTRACE_GET_SYSCALL_INFO #2");
+ } else {
+ /* exiting syscall */
+ const struct si_exit *exp_exit =
+ &si[ptrace_stop / 2 - 1].exit[0];
+ const struct si_exit *set_exit =
+ &si[ptrace_stop / 2 - 1].exit[1];
+
+ /* check ptrace_syscall_info before the changes */
+ ASSERT_EQ(expected_exit_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1"
+ ": exit stop mismatch");
+ }
+ check_psi_exit(_metadata, &info, exp_exit,
+ "PTRACE_GET_SYSCALL_INFO #1");
+
+ /* apply the changes */
+ info.exit.is_error = set_exit->is_error;
+ info.exit.rval = set_exit->rval;
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m");
+ }
+
+ /* check ptrace_syscall_info after the changes */
+ memset(&info, 0, sizeof(info));
+ info.op = 0xff;
+ ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info))) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2: %m");
+ }
+ ASSERT_EQ(expected_exit_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2"
+ ": exit stop mismatch");
+ }
+ check_psi_exit(_metadata, &info, set_exit,
+ "PTRACE_GET_SYSCALL_INFO #2");
+ }
+ break;
+
+ default:
+ LOG_KILL_TRACEE("unexpected stop signal %u",
+ WSTOPSIG(status));
+ abort();
+ }
+
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SYSCALL, tracee_pid, 0, 0)) {
+ LOG_KILL_TRACEE("PTRACE_SYSCALL: %m");
+ }
+ }
+
+ ASSERT_EQ(ptrace_stop, ARRAY_SIZE(si) * 2);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
index 0326b39a11b9..30cab5d425d2 100644
--- a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
+++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
@@ -56,7 +56,7 @@ int main(int argc, char **argv)
}
if (write(fd, "1\n", 2) < 0) {
- perror("Can' enable power floor notifications\n");
+ perror("Can't enable power floor notifications\n");
exit(1);
}
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
index 217c3a641c53..a40097232967 100644
--- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -37,7 +37,7 @@ void workload_hint_exit(int signum)
}
if (write(fd, "0\n", 2) < 0) {
- perror("Can' disable workload hints\n");
+ perror("Can't disable workload hints\n");
exit(1);
}
@@ -99,7 +99,7 @@ int main(int argc, char **argv)
}
if (write(fd, "1\n", 2) < 0) {
- perror("Can' enable workload hints\n");
+ perror("Can't enable workload hints\n");
exit(1);
}
diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
index 860fd2311dcc..66f3831a668f 100644
--- a/tools/testing/vma/Makefile
+++ b/tools/testing/vma/Makefile
@@ -9,7 +9,7 @@ include ../shared/shared.mk
OFILES = $(SHARED_OFILES) vma.o maple-shim.o
TARGETS = vma
-vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma.h
+vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
vma: $(OFILES)
$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 11f761769b5b..2be7597a2ac2 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -28,6 +28,8 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
* Directly import the VMA implementation here. Our vma_internal.h wrapper
* provides userland-equivalent functionality for everything vma.c uses.
*/
+#include "../../../mm/vma_init.c"
+#include "../../../mm/vma_exec.c"
#include "../../../mm/vma.c"
const struct vm_operations_struct vma_dummy_vm_ops;
@@ -90,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
return res;
}
+static void detach_free_vma(struct vm_area_struct *vma)
+{
+ vma_mark_detached(vma);
+ vm_area_free(vma);
+}
+
/* Helper function to allocate a VMA and link it to the tree. */
static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
unsigned long start,
@@ -103,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
return NULL;
if (attach_vma(mm, vma)) {
- vm_area_free(vma);
+ detach_free_vma(vma);
return NULL;
}
@@ -185,6 +193,15 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
vmg->__adjust_next_start = false;
}
+/* Helper function to set both the VMG range and its anon_vma. */
+static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start,
+ unsigned long end, pgoff_t pgoff, vm_flags_t flags,
+ struct anon_vma *anon_vma)
+{
+ vmg_set_range(vmg, start, end, pgoff, flags);
+ vmg->anon_vma = anon_vma;
+}
+
/*
* Helper function to try to merge a new VMA.
*
@@ -239,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
vma_iter_set(vmi, 0);
for_each_vma(*vmi, vma) {
- vm_area_free(vma);
+ detach_free_vma(vma);
count++;
}
@@ -265,6 +282,22 @@ static void dummy_close(struct vm_area_struct *)
{
}
+static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc,
+ struct anon_vma *anon_vma)
+{
+ vma->anon_vma = anon_vma;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
+ list_add(&avc->same_vma, &vma->anon_vma_chain);
+ avc->anon_vma = vma->anon_vma;
+}
+
+static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc)
+{
+ __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
+}
+
static bool test_simple_merge(void)
{
struct vm_area_struct *vma;
@@ -293,7 +326,7 @@ static bool test_simple_merge(void)
ASSERT_EQ(vma->vm_pgoff, 0);
ASSERT_EQ(vma->vm_flags, flags);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -335,7 +368,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_end, 0x1000);
ASSERT_EQ(vma->vm_pgoff, 0);
- vm_area_free(vma);
+ detach_free_vma(vma);
vma_iter_clear(&vmi);
vma = vma_next(&vmi);
@@ -344,7 +377,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_end, 0x2000);
ASSERT_EQ(vma->vm_pgoff, 1);
- vm_area_free(vma);
+ detach_free_vma(vma);
vma_iter_clear(&vmi);
vma = vma_next(&vmi);
@@ -353,7 +386,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma->vm_pgoff, 2);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -381,7 +414,7 @@ static bool test_simple_expand(void)
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma->vm_pgoff, 0);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -402,7 +435,7 @@ static bool test_simple_shrink(void)
ASSERT_EQ(vma->vm_end, 0x1000);
ASSERT_EQ(vma->vm_pgoff, 0);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -593,7 +626,7 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->vm_pgoff, 0);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
- vm_area_free(vma);
+ detach_free_vma(vma);
count++;
}
@@ -953,6 +986,7 @@ static bool test_merge_existing(void)
const struct vm_operations_struct vm_ops = {
.close = dummy_close,
};
+ struct anon_vma_chain avc = {};
/*
* Merge right case - partial span.
@@ -968,10 +1002,10 @@ static bool test_merge_existing(void)
vma->vm_ops = &vm_ops; /* This should have no impact. */
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
vmg.middle = vma;
vmg.prev = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x3000);
@@ -1001,9 +1035,9 @@ static bool test_merge_existing(void)
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags);
+ vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, flags, &dummy_anon_vma);
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x2000);
@@ -1030,11 +1064,10 @@ static bool test_merge_existing(void)
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
-
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1064,10 +1097,10 @@ static bool test_merge_existing(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1094,10 +1127,10 @@ static bool test_merge_existing(void)
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1180,12 +1213,9 @@ static bool test_anon_vma_non_mergeable(void)
.mm = &mm,
.vmi = &vmi,
};
- struct anon_vma_chain dummy_anon_vma_chain1 = {
- .anon_vma = &dummy_anon_vma,
- };
- struct anon_vma_chain dummy_anon_vma_chain2 = {
- .anon_vma = &dummy_anon_vma,
- };
+ struct anon_vma_chain dummy_anon_vma_chain_1 = {};
+ struct anon_vma_chain dummy_anon_vma_chain_2 = {};
+ struct anon_vma dummy_anon_vma_2;
/*
* In the case of modified VMA merge, merging both left and right VMAs
@@ -1209,24 +1239,11 @@ static bool test_anon_vma_non_mergeable(void)
*
* However, when prev is compared to next, the merge should fail.
*/
-
- INIT_LIST_HEAD(&vma_prev->anon_vma_chain);
- list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain);
- ASSERT_TRUE(list_is_singular(&vma_prev->anon_vma_chain));
- vma_prev->anon_vma = &dummy_anon_vma;
- ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_prev->anon_vma, vma_prev));
-
- INIT_LIST_HEAD(&vma_next->anon_vma_chain);
- list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain);
- ASSERT_TRUE(list_is_singular(&vma_next->anon_vma_chain));
- vma_next->anon_vma = (struct anon_vma *)2;
- ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_next->anon_vma, vma_next));
-
- ASSERT_FALSE(is_mergeable_anon_vma(vma_prev->anon_vma, vma_next->anon_vma, NULL));
-
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
vmg.prev = vma_prev;
vmg.middle = vma;
+ vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
+ __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1253,17 +1270,12 @@ static bool test_anon_vma_non_mergeable(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
- INIT_LIST_HEAD(&vma_prev->anon_vma_chain);
- list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain);
- vma_prev->anon_vma = (struct anon_vma *)1;
-
- INIT_LIST_HEAD(&vma_next->anon_vma_chain);
- list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain);
- vma_next->anon_vma = (struct anon_vma *)2;
-
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
vmg.prev = vma_prev;
+ vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
+ __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
+ vmg.anon_vma = NULL;
ASSERT_EQ(merge_new(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1363,8 +1375,8 @@ static bool test_dup_anon_vma(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
-
- vma->anon_vma = &dummy_anon_vma;
+ vmg.anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1392,7 +1404,7 @@ static bool test_dup_anon_vma(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags);
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1420,7 +1432,7 @@ static bool test_dup_anon_vma(void)
vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma;
vmg.middle = vma;
@@ -1447,6 +1459,7 @@ static bool test_vmi_prealloc_fail(void)
.mm = &mm,
.vmi = &vmi,
};
+ struct anon_vma_chain avc = {};
struct vm_area_struct *vma_prev, *vma;
/*
@@ -1459,9 +1472,10 @@ static bool test_vmi_prealloc_fail(void)
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma->anon_vma = &dummy_anon_vma;
- vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
+ vma_set_dummy_anon_vma(vma, &avc);
fail_prealloc = true;
@@ -1661,6 +1675,7 @@ int main(void)
int num_tests = 0, num_fail = 0;
maple_tree_init();
+ vma_state_init();
#define TEST(name) \
do { \
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 572ab2cea763..441feb21aa5a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -56,6 +56,8 @@ extern unsigned long dac_mmap_min_addr;
#define VM_PFNMAP 0x00000400
#define VM_LOCKED 0x00002000
#define VM_IO 0x00004000
+#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
+#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
#define VM_DONTEXPAND 0x00040000
#define VM_LOCKONFAULT 0x00080000
#define VM_ACCOUNT 0x00100000
@@ -70,6 +72,20 @@ extern unsigned long dac_mmap_min_addr;
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+#ifdef CONFIG_STACK_GROWSUP
+#define VM_STACK VM_GROWSUP
+#define VM_STACK_EARLY VM_GROWSDOWN
+#else
+#define VM_STACK VM_GROWSDOWN
+#define VM_STACK_EARLY 0
+#endif
+
+#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
+#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
+#define STACK_TOP TASK_SIZE_LOW
+#define STACK_TOP_MAX TASK_SIZE_MAX
+
/* This mask represents all the VMA flag bits used by mlock */
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
@@ -82,6 +98,10 @@ extern unsigned long dac_mmap_min_addr;
#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
+
#define RLIMIT_STACK 3 /* max stack size */
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
@@ -135,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
*/
#define pr_warn_once pr_err
+#define data_race(expr) expr
+
+#define ASSERT_EXCLUSIVE_WRITER(x)
+
struct kref {
refcount_t refcount;
};
@@ -229,12 +253,46 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access */
};
+struct vm_area_struct;
+
+/*
+ * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
+ * manipulate mutable fields which will cause those fields to be updated in the
+ * resultant VMA.
+ *
+ * Helper functions are not required for manipulating any field.
+ */
+struct vm_area_desc {
+ /* Immutable state. */
+ struct mm_struct *mm;
+ unsigned long start;
+ unsigned long end;
+
+ /* Mutable fields. Populated with initial state. */
+ pgoff_t pgoff;
+ struct file *file;
+ vm_flags_t vm_flags;
+ pgprot_t page_prot;
+
+ /* Write-only fields. */
+ const struct vm_operations_struct *vm_ops;
+ void *private_data;
+};
+
+struct file_operations {
+ int (*mmap)(struct file *, struct vm_area_struct *);
+ int (*mmap_prepare)(struct vm_area_desc *);
+};
+
struct file {
struct address_space *f_mapping;
+ const struct file_operations *f_op;
};
#define VMA_LOCK_OFFSET 0x40000000
+typedef struct { unsigned long v; } freeptr_t;
+
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
@@ -244,9 +302,7 @@ struct vm_area_struct {
unsigned long vm_start;
unsigned long vm_end;
};
-#ifdef CONFIG_PER_VMA_LOCK
- struct rcu_head vm_rcu; /* Used for deferred freeing. */
-#endif
+ freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
};
struct mm_struct *vm_mm; /* The address space we belong to. */
@@ -421,6 +477,87 @@ struct vm_unmapped_area_info {
unsigned long start_gap;
};
+struct pagetable_move_control {
+ struct vm_area_struct *old; /* Source VMA. */
+ struct vm_area_struct *new; /* Destination VMA. */
+ unsigned long old_addr; /* Address from which the move begins. */
+ unsigned long old_end; /* Exclusive address at which old range ends. */
+ unsigned long new_addr; /* Address to move page tables to. */
+ unsigned long len_in; /* Bytes to remap specified by user. */
+
+ bool need_rmap_locks; /* Do rmap locks need to be taken? */
+ bool for_stack; /* Is this an early temp stack being moved? */
+};
+
+#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
+ struct pagetable_move_control name = { \
+ .old = old_, \
+ .new = new_, \
+ .old_addr = old_addr_, \
+ .old_end = (old_addr_) + (len_), \
+ .new_addr = new_addr_, \
+ .len_in = len_, \
+ }
+
+struct kmem_cache_args {
+ /**
+ * @align: The required alignment for the objects.
+ *
+ * %0 means no specific alignment is requested.
+ */
+ unsigned int align;
+ /**
+ * @useroffset: Usercopy region offset.
+ *
+ * %0 is a valid offset, when @usersize is non-%0
+ */
+ unsigned int useroffset;
+ /**
+ * @usersize: Usercopy region size.
+ *
+ * %0 means no usercopy region is specified.
+ */
+ unsigned int usersize;
+ /**
+ * @freeptr_offset: Custom offset for the free pointer
+ * in &SLAB_TYPESAFE_BY_RCU caches
+ *
+ * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+ * outside of the object. This might cause the object to grow in size.
+ * Cache creators that have a reason to avoid this can specify a custom
+ * free pointer offset in their struct where the free pointer will be
+ * placed.
+ *
+ * Note that placing the free pointer inside the object requires the
+ * caller to ensure that no fields are invalidated that are required to
+ * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+ * details).
+ *
+ * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+ * is specified, %use_freeptr_offset must be set %true.
+ *
+ * Note that @ctor currently isn't supported with custom free pointers
+ * as a @ctor requires an external free pointer.
+ */
+ unsigned int freeptr_offset;
+ /**
+ * @use_freeptr_offset: Whether a @freeptr_offset is used.
+ */
+ bool use_freeptr_offset;
+ /**
+ * @ctor: A constructor for the objects.
+ *
+ * The constructor is invoked for each object in a newly allocated slab
+ * page. It is the cache user's responsibility to free object in the
+ * same state as after calling the constructor, or deal appropriately
+ * with any differences between a freshly constructed and a reallocated
+ * object.
+ *
+ * %NULL means no constructor.
+ */
+ void (*ctor)(void *);
+};
+
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
{
mas_pause(&vmi->mas);
@@ -505,31 +642,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
vma->vm_lock_seq = UINT_MAX;
}
-static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
- struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
+struct kmem_cache {
+ const char *name;
+ size_t object_size;
+ struct kmem_cache_args *args;
+};
- if (!vma)
- return NULL;
+static inline struct kmem_cache *__kmem_cache_create(const char *name,
+ size_t object_size,
+ struct kmem_cache_args *args)
+{
+ struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
- vma_init(vma, mm);
+ ret->name = name;
+ ret->object_size = object_size;
+ ret->args = args;
- return vma;
+ return ret;
}
-static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
- struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
+#define kmem_cache_create(__name, __object_size, __args, ...) \
+ __kmem_cache_create((__name), (__object_size), (__args))
- if (!new)
- return NULL;
+static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+{
+ (void)gfpflags;
- memcpy(new, orig, sizeof(*new));
- refcount_set(&new->vm_refcnt, 0);
- new->vm_lock_seq = UINT_MAX;
- INIT_LIST_HEAD(&new->anon_vma_chain);
+ return calloc(s->object_size, 1);
+}
- return new;
+static inline void kmem_cache_free(struct kmem_cache *s, void *x)
+{
+ free(x);
}
/*
@@ -696,11 +840,6 @@ static inline void mpol_put(struct mempolicy *)
{
}
-static inline void vm_area_free(struct vm_area_struct *vma)
-{
- free(vma);
-}
-
static inline void lru_add_drain(void)
{
}
@@ -1018,11 +1157,6 @@ static inline void vm_flags_clear(struct vm_area_struct *vma,
vma->__vm_flags &= ~flags;
}
-static inline int call_mmap(struct file *, struct vm_area_struct *)
-{
- return 0;
-}
-
static inline int shmem_zero_setup(struct vm_area_struct *)
{
return 0;
@@ -1240,4 +1374,96 @@ static inline int mapping_map_writable(struct address_space *mapping)
return 0;
}
+static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
+{
+ (void)pmc;
+
+ return 0;
+}
+
+static inline void free_pgd_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ (void)tlb;
+ (void)addr;
+ (void)end;
+ (void)floor;
+ (void)ceiling;
+}
+
+static inline int ksm_execve(struct mm_struct *mm)
+{
+ (void)mm;
+
+ return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+ (void)mm;
+}
+
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+ (void)vma;
+ (void)reset_refcnt;
+}
+
+static inline void vma_numab_state_init(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+static inline void vma_numab_state_free(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma)
+{
+ (void)orig_vma;
+ (void)new_vma;
+}
+
+static inline void free_anon_vma_name(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+/* Did the driver provide valid mmap hook configuration? */
+static inline bool file_has_valid_mmap_hooks(struct file *file)
+{
+ bool has_mmap = file->f_op->mmap;
+ bool has_mmap_prepare = file->f_op->mmap_prepare;
+
+ /* Hooks are mutually exclusive. */
+ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
+ return false;
+ if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare))
+ return false;
+
+ return true;
+}
+
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (WARN_ON_ONCE(file->f_op->mmap_prepare))
+ return -EINVAL;
+
+ return file->f_op->mmap(file, vma);
+}
+
+static inline int __call_mmap_prepare(struct file *file,
+ struct vm_area_desc *desc)
+{
+ return file->f_op->mmap_prepare(desc);
+}
+
+static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
#endif /* __MM_VMA_INTERNAL_H */