summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/fs.h19
-rw-r--r--tools/testing/memblock/tests/alloc_api.c22
-rw-r--r--tools/testing/memblock/tests/alloc_helpers_api.c4
-rw-r--r--tools/testing/memblock/tests/alloc_nid_api.c20
-rw-r--r--tools/testing/radix-tree/maple.c126
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c22
-rw-r--r--tools/testing/selftests/damon/Makefile2
-rw-r--r--tools/testing/selftests/damon/_chk_dependency.sh52
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py5
-rw-r--r--tools/testing/selftests/damon/_debugfs_common.sh64
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c7
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c2
-rw-r--r--tools/testing/selftests/kmod/config5
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile2
-rw-r--r--tools/testing/selftests/mm/guard-regions.c57
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c2
-rwxr-xr-xtools/testing/selftests/mm/hugetlb_reparenting_test.sh98
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c2
-rw-r--r--tools/testing/selftests/mm/merge.c455
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c16
-rw-r--r--tools/testing/selftests/mm/pfnmap.c196
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh8
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c202
-rw-r--r--tools/testing/selftests/mm/vm_util.c62
-rw-r--r--tools/testing/selftests/mm/vm_util.h21
-rw-r--r--tools/testing/selftests/ptrace/Makefile2
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c519
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c2
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c4
-rw-r--r--tools/testing/vma/Makefile2
-rw-r--r--tools/testing/vma/vma.c127
-rw-r--r--tools/testing/vma/vma_internal.h281
34 files changed, 2089 insertions, 323 deletions
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 8a27bc5c7a7f..24ddf7bc4f25 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -40,6 +40,15 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
@@ -329,9 +338,16 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO negation of O_APPEND */
#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+ RWF_DONTCACHE)
#define PROCFS_IOCTL_MAGIC 'f'
@@ -347,6 +363,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
/*
* struct page_region - Page region with flags
diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c
index 68f1a75cd72c..c55f67dd367d 100644
--- a/tools/testing/memblock/tests/alloc_api.c
+++ b/tools/testing/memblock/tests/alloc_api.c
@@ -134,7 +134,7 @@ static int alloc_top_down_before_check(void)
PREFIX_PUSH();
setup_memblock();
- memblock_reserve(memblock_end_of_DRAM() - total_size, r1_size);
+ memblock_reserve_kern(memblock_end_of_DRAM() - total_size, r1_size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -182,7 +182,7 @@ static int alloc_top_down_after_check(void)
total_size = r1.size + r2_size;
- memblock_reserve(r1.base, r1.size);
+ memblock_reserve_kern(r1.base, r1.size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -231,8 +231,8 @@ static int alloc_top_down_second_fit_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
@@ -285,8 +285,8 @@ static int alloc_in_between_generic_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
@@ -422,7 +422,7 @@ static int alloc_limited_space_generic_check(void)
setup_memblock();
/* Simulate almost-full memory */
- memblock_reserve(memblock_start_of_DRAM(), reserved_size);
+ memblock_reserve_kern(memblock_start_of_DRAM(), reserved_size);
allocated_ptr = run_memblock_alloc(available_size, SMP_CACHE_BYTES);
@@ -608,7 +608,7 @@ static int alloc_bottom_up_before_check(void)
PREFIX_PUSH();
setup_memblock();
- memblock_reserve(memblock_start_of_DRAM() + r1_size, r2_size);
+ memblock_reserve_kern(memblock_start_of_DRAM() + r1_size, r2_size);
allocated_ptr = run_memblock_alloc(r1_size, SMP_CACHE_BYTES);
@@ -655,7 +655,7 @@ static int alloc_bottom_up_after_check(void)
total_size = r1.size + r2_size;
- memblock_reserve(r1.base, r1.size);
+ memblock_reserve_kern(r1.base, r1.size);
allocated_ptr = run_memblock_alloc(r2_size, SMP_CACHE_BYTES);
@@ -705,8 +705,8 @@ static int alloc_bottom_up_second_fit_check(void)
total_size = r1.size + r2.size + r3_size;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc(r3_size, SMP_CACHE_BYTES);
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c
index 3ef9486da8a0..e5362cfd2ff3 100644
--- a/tools/testing/memblock/tests/alloc_helpers_api.c
+++ b/tools/testing/memblock/tests/alloc_helpers_api.c
@@ -163,7 +163,7 @@ static int alloc_from_top_down_no_space_above_check(void)
min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
/* No space above this address */
- memblock_reserve(min_addr, r2_size);
+ memblock_reserve_kern(min_addr, r2_size);
allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
@@ -199,7 +199,7 @@ static int alloc_from_top_down_min_addr_cap_check(void)
start_addr = (phys_addr_t)memblock_start_of_DRAM();
min_addr = start_addr - SMP_CACHE_BYTES * 3;
- memblock_reserve(start_addr + r1_size, MEM_SIZE - r1_size);
+ memblock_reserve_kern(start_addr + r1_size, MEM_SIZE - r1_size);
allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
index 49bb416d34ff..562e4701b0e0 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -324,7 +324,7 @@ static int alloc_nid_min_reserved_generic_check(void)
min_addr = max_addr - r2_size;
reserved_base = min_addr - r1_size;
- memblock_reserve(reserved_base, r1_size);
+ memblock_reserve_kern(reserved_base, r1_size);
allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -374,7 +374,7 @@ static int alloc_nid_max_reserved_generic_check(void)
max_addr = memblock_end_of_DRAM() - r1_size;
min_addr = max_addr - r2_size;
- memblock_reserve(max_addr, r1_size);
+ memblock_reserve_kern(max_addr, r1_size);
allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -436,8 +436,8 @@ static int alloc_nid_top_down_reserved_with_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -499,8 +499,8 @@ static int alloc_nid_reserved_full_merge_generic_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -563,8 +563,8 @@ static int alloc_nid_top_down_reserved_no_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
@@ -909,8 +909,8 @@ static int alloc_nid_bottom_up_reserved_with_space_check(void)
min_addr = r2.base + r2.size;
max_addr = r1.base;
- memblock_reserve(r1.base, r1.size);
- memblock_reserve(r2.base, r2.size);
+ memblock_reserve_kern(r1.base, r1.size);
+ memblock_reserve_kern(r2.base, r2.size);
allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
min_addr, max_addr,
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index bc30050227fd..2c0b38301253 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -35475,15 +35475,65 @@ static void check_dfs_preorder(struct maple_tree *mt)
}
/* End of depth first search tests */
+/* get height of the lowest non-leaf node with free space */
+static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry)
+{
+ struct ma_state *mas = wr_mas->mas;
+ char vacant_height = 0;
+ enum maple_type type;
+ unsigned long *pivots;
+ unsigned long min = 0;
+ unsigned long max = ULONG_MAX;
+ unsigned char offset;
+
+ /* start traversal */
+ mas_reset(mas);
+ mas_start(mas);
+ if (!xa_is_node(mas_root(mas)))
+ return 0;
+
+ type = mte_node_type(mas->node);
+ wr_mas->type = type;
+ while (!ma_is_leaf(type)) {
+ mas_node_walk(mas, mte_to_node(mas->node), type, &min, &max);
+ offset = mas->offset;
+ mas->end = mas_data_end(mas);
+ pivots = ma_pivots(mte_to_node(mas->node), type);
+
+ if (pivots) {
+ if (offset)
+ min = pivots[mas->offset - 1];
+ if (offset < mas->end)
+ max = pivots[mas->offset];
+ }
+ wr_mas->r_max = offset < mas->end ? pivots[offset] : mas->max;
+
+ /* detect spanning write */
+ if (mas_is_span_wr(wr_mas))
+ break;
+
+ if (mas->end < mt_slot_count(mas->node) - 1)
+ vacant_height = mas->depth + 1;
+
+ mas_descend(mas);
+ type = mte_node_type(mas->node);
+ mas->depth++;
+ }
+
+ return vacant_height;
+}
+
/* Preallocation testing */
static noinline void __init check_prealloc(struct maple_tree *mt)
{
unsigned long i, max = 100;
unsigned long allocated;
unsigned char height;
+ unsigned char vacant_height;
struct maple_node *mn;
void *ptr = check_prealloc;
MA_STATE(mas, mt, 10, 20);
+ MA_WR_STATE(wr_mas, &mas, ptr);
mt_set_non_kernel(1000);
for (i = 0; i <= max; i++)
@@ -35494,8 +35544,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
@@ -35503,8 +35554,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
@@ -35514,7 +35566,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mn->parent = ma_parent_ptr(mn);
@@ -35527,7 +35580,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
@@ -35540,7 +35594,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mas_push_node(&mas, mn);
@@ -35553,7 +35608,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -35578,7 +35634,8 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated != 1 + height * 2);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
+ MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 2);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mt_set_non_kernel(1);
@@ -35595,8 +35652,14 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
+ vacant_height = get_vacant_height(&wr_mas, ptr);
MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ /*
+ * vacant height cannot be used to compute the number of nodes needed
+ * as the root contains two entries which means it is on the verge of
+ * insufficiency. The worst case full height of the tree is needed.
+ */
+ MT_BUG_ON(mt, allocated != height * 3 + 1);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mas_set_range(&mas, 0, 200);
@@ -36248,6 +36311,45 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt)
extern void test_kmem_cache_bulk(void);
+static inline void check_spanning_store_height(struct maple_tree *mt)
+{
+ int index = 0;
+ MA_STATE(mas, mt, 0, 0);
+ mas_lock(&mas);
+ while (mt_height(mt) != 3) {
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ mas_set(&mas, ++index);
+ }
+ mas_set_range(&mas, 90, 140);
+ mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL);
+ MT_BUG_ON(mt, mas_mt_height(&mas) != 2);
+ mas_unlock(&mas);
+}
+
+/*
+ * Test to check the path of a spanning rebalance which results in
+ * a collapse where the rebalancing of the child node leads to
+ * insufficieny in the parent node.
+ */
+static void check_collapsing_rebalance(struct maple_tree *mt)
+{
+ int i = 0;
+ MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX);
+
+ /* create a height 6 tree */
+ while (mt_height(mt) < 6) {
+ mtree_store_range(mt, i, i + 10, xa_mk_value(i), GFP_KERNEL);
+ i += 9;
+ }
+
+ /* delete all entries one at a time, starting from the right */
+ do {
+ mas_erase(&mas);
+ } while (mas_prev(&mas, 0) != NULL);
+
+ mtree_unlock(mt);
+}
+
/* callback function used for check_nomem_writer_race() */
static void writer2(void *maple_tree)
{
@@ -36415,6 +36517,14 @@ void farmer_tests(void)
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_spanning_store_height(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
+ check_collapsing_rebalance(&tree);
+ mtree_destroy(&tree);
+
+ mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_null_expand(&tree);
mtree_destroy(&tree);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 16f5d74ae762..d6534d7301a2 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -380,10 +380,11 @@ static bool reclaim_until(const char *memcg, long goal);
*
* Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/C memory.current ~= 29M
- * A/B/D memory.current ~= 21M
- * A/B/E memory.current ~= 0
- * A/B/F memory.current = 0
+ * A/B/C memory.current ~= 29M [memory.events:low > 0]
+ * A/B/D memory.current ~= 21M [memory.events:low > 0]
+ * A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
+ * undefined otherwise]
+ * A/B/F memory.current = 0 [memory.events:low == 0]
* (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
@@ -495,10 +496,10 @@ static int test_memcg_protection(const char *root, bool min)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(29), 10))
+ if (!values_close(c[0], MB(29), 15))
goto cleanup;
- if (!values_close(c[1], MB(21), 10))
+ if (!values_close(c[1], MB(21), 20))
goto cleanup;
if (c[3] != 0)
@@ -525,7 +526,14 @@ static int test_memcg_protection(const char *root, bool min)
goto cleanup;
}
+ /*
+ * Child 2 has memory.low=0, but some low protection may still be
+ * distributed down from its parent with memory.low=50M if cgroup2
+ * memory_recursiveprot mount option is enabled. Ignore the low
+ * event count in this case.
+ */
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int ignore_low_events_index = has_recursiveprot ? 2 : -1;
int no_low_events_index = 1;
long low, oom;
@@ -534,6 +542,8 @@ static int test_memcg_protection(const char *root, bool min)
if (oom)
goto cleanup;
+ if (i == ignore_low_events_index)
+ continue;
if (i <= no_low_events_index && low <= 0)
goto cleanup;
if (i > no_low_events_index && low)
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index ecbf07afc6dd..ff21524be458 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -3,7 +3,7 @@
TEST_GEN_FILES += access_memory access_memory_even
-TEST_FILES = _chk_dependency.sh _damon_sysfs.py
+TEST_FILES = _damon_sysfs.py
# functionality tests
TEST_PROGS += sysfs.sh
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
deleted file mode 100644
index dda3a87dc00a..000000000000
--- a/tools/testing/selftests/damon/_chk_dependency.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}')
-if [ "$DBGFS" = "" ]
-then
- echo "debugfs not mounted"
- exit $ksft_skip
-fi
-
-DBGFS+="/damon"
-
-if [ $EUID -ne 0 ];
-then
- echo "Run as root"
- exit $ksft_skip
-fi
-
-if [ ! -d "$DBGFS" ]
-then
- echo "$DBGFS not found"
- exit $ksft_skip
-fi
-
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- monitor_on_file="monitor_on_DEPRECATED"
-else
- monitor_on_file="monitor_on"
-fi
-
-for f in attrs target_ids "$monitor_on_file"
-do
- if [ ! -f "$DBGFS/$f" ]
- then
- echo "$f not found"
- exit 1
- fi
-done
-
-permission_error="Operation not permitted"
-for f in attrs target_ids "$monitor_on_file"
-do
- status=$( cat "$DBGFS/$f" 2>&1 )
- if [ "${status#*$permission_error}" != "$status" ]; then
- echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?"
- exit $ksft_skip
- fi
-done
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index 6e136dc3df19..1e587e0b1a39 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -420,11 +420,16 @@ class Kdamond:
tried_regions = []
tried_regions_dir = os.path.join(
scheme.sysfs_dir(), 'tried_regions')
+ region_indices = []
for filename in os.listdir(
os.path.join(scheme.sysfs_dir(), 'tried_regions')):
tried_region_dir = os.path.join(tried_regions_dir, filename)
if not os.path.isdir(tried_region_dir):
continue
+ region_indices.append(int(filename))
+ for region_idx in sorted(region_indices):
+ tried_region_dir = os.path.join(tried_regions_dir,
+ '%d' % region_idx)
region_values = []
for f in ['start', 'end', 'nr_accesses', 'age']:
content, err = read_file(
diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh
deleted file mode 100644
index 54d45791b0d9..000000000000
--- a/tools/testing/selftests/damon/_debugfs_common.sh
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-test_write_result() {
- file=$1
- content=$2
- orig_content=$3
- expect_reason=$4
- expected=$5
-
- if [ "$expected" = "0" ]
- then
- echo "$content" > "$file"
- else
- echo "$content" > "$file" 2> /dev/null
- fi
- if [ $? -ne "$expected" ]
- then
- echo "writing $content to $file doesn't return $expected"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-test_write_succ() {
- test_write_result "$1" "$2" "$3" "$4" 0
-}
-
-test_write_fail() {
- test_write_result "$1" "$2" "$3" "$4" 1
-}
-
-test_content() {
- file=$1
- orig_content=$2
- expected=$3
- expect_reason=$4
-
- content=$(cat "$file")
- if [ "$content" != "$expected" ]
- then
- echo "reading $file expected $expected but $content"
- echo "expected because: $expect_reason"
- echo "$orig_content" > "$file"
- exit 1
- fi
-}
-
-source ./_chk_dependency.sh
-
-damon_onoff="$DBGFS/monitor_on"
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
- damon_onoff="$DBGFS/monitor_on_DEPRECATED"
-else
- damon_onoff="$DBGFS/monitor_on"
-fi
-
-if [ $(cat "$damon_onoff") = "on" ]
-then
- echo "monitoring is on"
- exit $ksft_skip
-fi
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
index e8e0ef1460d2..73e0a4d4fb2f 100644
--- a/tools/testing/selftests/filesystems/anon_inode_test.c
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -7,7 +7,7 @@
#include <sys/stat.h>
#include "../kselftest_harness.h"
-#include "overlayfs/wrappers.h"
+#include "wrappers.h"
TEST(anon_inode_no_chown)
{
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
index 85acb4e3ef00..72d51ad0ee0e 100644
--- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -50,7 +50,7 @@ TEST(eventfd_check_flag_rdwr)
ASSERT_GE(fd, 0);
flags = fcntl(fd, F_GETFL);
- // since the kernel automatically added O_RDWR.
+ // The kernel automatically adds the O_RDWR flag.
EXPECT_EQ(flags, O_RDWR);
close(fd);
@@ -85,7 +85,7 @@ TEST(eventfd_check_flag_nonblock)
close(fd);
}
-TEST(eventfd_chek_flag_cloexec_and_nonblock)
+TEST(eventfd_check_flag_cloexec_and_nonblock)
{
int fd, flags;
@@ -178,8 +178,7 @@ TEST(eventfd_check_flag_semaphore)
// The semaphore could only be obtained from fdinfo.
ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");
if (ret != 0)
- ksft_print_msg("eventfd-semaphore check failed, msg: %s\n",
- err.msg);
+ ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg);
EXPECT_EQ(ret, 0);
close(fd);
diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c
index 1136f93a9977..01dd89f8e52f 100644
--- a/tools/testing/selftests/filesystems/file_stressor.c
+++ b/tools/testing/selftests/filesystems/file_stressor.c
@@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2)
ssize_t nr_read;
/*
- * Concurrently read /proc/<pid>/fd/ which rougly does:
+ * Concurrently read /proc/<pid>/fd/ which roughly does:
*
* f = fget_task_next(p, &fd);
* if (!f)
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
index 259f4fd6b5e2..1f1e63494af9 100644
--- a/tools/testing/selftests/kmod/config
+++ b/tools/testing/selftests/kmod/config
@@ -1,7 +1,2 @@
CONFIG_TEST_KMOD=m
CONFIG_TEST_LKM=m
-CONFIG_XFS_FS=m
-
-# For the module parameter force_init_test is used
-CONFIG_TUN=m
-CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index c5241b193db8..824266982aa3 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -20,6 +20,7 @@ mremap_test
on-fault-limit
transhuge-stress
pagemap_ioctl
+pfnmap
*.tmp*
protection_keys
protection_keys_32
@@ -58,3 +59,4 @@ hugetlb_dio
pkey_sighandler_tests_32
pkey_sighandler_tests_64
guard-regions
+merge
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 8270895039d1..ae6f994d3add 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -84,6 +84,7 @@ TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += mseal_test
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
+TEST_GEN_FILES += pfnmap
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += uffd-stress
@@ -98,6 +99,7 @@ TEST_GEN_FILES += hugetlb_madv_vs_map
TEST_GEN_FILES += hugetlb_dio
TEST_GEN_FILES += droppable
TEST_GEN_FILES += guard-regions
+TEST_GEN_FILES += merge
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index eba43ead13ae..0cd9d236649d 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -8,6 +8,7 @@
#include <fcntl.h>
#include <linux/limits.h>
#include <linux/userfaultfd.h>
+#include <linux/fs.h>
#include <setjmp.h>
#include <signal.h>
#include <stdbool.h>
@@ -2075,4 +2076,60 @@ TEST_F(guard_regions, pagemap)
ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
}
+/*
+ * Assert that PAGEMAP_SCAN correctly reports guard region ranges.
+ */
+TEST_F(guard_regions, pagemap_scan)
+{
+ const unsigned long page_size = self->page_size;
+ struct page_region pm_regs[10];
+ struct pm_scan_arg pm_scan_args = {
+ .size = sizeof(struct pm_scan_arg),
+ .category_anyof_mask = PAGE_IS_GUARD,
+ .return_mask = PAGE_IS_GUARD,
+ .vec = (long)&pm_regs,
+ .vec_len = ARRAY_SIZE(pm_regs),
+ };
+ int proc_fd, i;
+ char *ptr;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ pm_scan_args.start = (long)ptr;
+ pm_scan_args.end = (long)ptr + 10 * page_size;
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /*
+ * Assert ioctl() returns the count of located regions, where each
+ * region spans every other page within the range of 10 pages.
+ */
+ ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5);
+ ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size);
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 5; i++) {
+ long ptr_p = (long)&ptr[2 * i * page_size];
+
+ ASSERT_EQ(pm_regs[i].start, ptr_p);
+ ASSERT_EQ(pm_regs[i].end, ptr_p + page_size);
+ ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 21595b20bbc3..d50ada0c7dbf 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -158,7 +158,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/*
* R/O pinning or pinning in a private mapping is always
* expected to work. Otherwise, we expect long-term R/W pinning
- * to only succeed for special fielesystems.
+ * to only succeed for special filesystems.
*/
should_work = !shared || !rw ||
fs_supports_writable_longterm_pinning(fs_type);
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 0b0d4ba1af27..0dd31892ff67 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -36,7 +36,7 @@ else
do_umount=1
fi
fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -56,10 +56,45 @@ function cleanup() {
rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
rmdir "$CGROUP_ROOT"/a 2>/dev/null
rmdir "$CGROUP_ROOT"/test1 2>/dev/null
- echo 0 >/proc/sys/vm/nr_hugepages
+ echo $nr_hugepgs >/proc/sys/vm/nr_hugepages
set -e
}
+function assert_with_retry() {
+ local actual_path="$1"
+ local expected="$2"
+ local tolerance=$((7 * 1024 * 1024))
+ local timeout=20
+ local interval=1
+ local start_time
+ local now
+ local elapsed
+ local actual
+
+ start_time=$(date +%s)
+
+ while true; do
+ actual="$(cat "$actual_path")"
+
+ if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+ [[ $actual -le $(($expected + $tolerance)) ]]; then
+ return 0
+ fi
+
+ now=$(date +%s)
+ elapsed=$((now - start_time))
+
+ if [[ $elapsed -ge $timeout ]]; then
+ echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+ echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+ cleanup
+ exit 1
+ fi
+
+ sleep $interval
+ done
+}
+
function assert_state() {
local expected_a="$1"
local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
expected_b="$3"
expected_b_hugetlb="$4"
fi
- local tolerance=$((5 * 1024 * 1024))
-
- local actual_a
- actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
- if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
- [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
- echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
- echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- local actual_a_hugetlb
- actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
- [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
- echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
- echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
- return
- fi
-
- local actual_b
- actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
- if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
- [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
- echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
- echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
- local actual_b_hugetlb
- actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
- [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
- echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
- echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
+ assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+ assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
- cleanup
- exit 1
+ if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+ assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+ assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
fi
}
@@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
cleanup
echo
-echo
echo Test charge, rmdir, uncharge
setup
echo mkdir
@@ -195,7 +184,6 @@ cleanup
echo done
echo
-echo
if [[ ! $cgroup2 ]]; then
echo "Test parent and child hugetlb usage"
setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
assert_state 0 $(($size * 2)) 0 $size
rmdir "$CGROUP_ROOT"/a/b
- sleep 5
echo Assert memory reparent correctly.
assert_state 0 $(($size * 2))
@@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then
fi
echo
-echo
echo "Test child only hugetlb usage"
echo setup
setup
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index d53de2486080..1e9980b8993c 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -96,7 +96,7 @@ int main(void)
ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
- ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
+ ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n");
/*
* Second mapping contained within first:
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
new file mode 100644
index 000000000000..c76646cdf6e6
--- /dev/null
+++ b/tools/testing/selftests/mm/merge.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "../kselftest_harness.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include "vm_util.h"
+
+FIXTURE(merge)
+{
+ unsigned int page_size;
+ char *carveout;
+ struct procmap_fd procmap;
+};
+
+FIXTURE_SETUP(merge)
+{
+ self->page_size = psize();
+ /* Carve out PROT_NONE region to map over. */
+ self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(self->carveout, MAP_FAILED);
+ /* Setup PROCMAP_QUERY interface. */
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+}
+
+FIXTURE_TEARDOWN(merge)
+{
+ ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0);
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+}
+
+TEST_F(merge, mprotect_unfaulted_left)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit
+ * merge failure due to lack of VM_ACCOUNT flag by mistake.
+ *
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first 5 pages read-only, splitting the VMA:
+ *
+ * RO RW
+ * |-----------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the last 5 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW
+ * |-----------|-----------|
+ * | unfaulted | faulted |
+ * |-----------|-----------|
+ */
+ ptr[5 * page_size] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge (though for
+ * ~15 years we did not! :):
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the last 5 pages read-only, splitting the VMA:
+ *
+ * RW RO
+ * |-----------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the first 5 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RW RO
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ */
+ ptr[0] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_both)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first and last 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | unfaulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the middle 3 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | faulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ptr[3 * page_size] = 'x';
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, mprotect_faulted_left_unfaulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the last 3 pages read-only, splitting the VMA:
+ *
+ * RW RO
+ * |-----------------------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the first 6 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RW RO
+ * |-----------------------|-----------|
+ * | unfaulted | unfaulted |
+ * |-----------------------|-----------|
+ */
+ ptr[0] = 'x';
+ /*
+ * Now make the first 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | faulted | faulted | unfaulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, mprotect_unfaulted_left_faulted_right)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ char *ptr;
+
+ /*
+ * |-----------------------|
+ * | unfaulted |
+ * |-----------------------|
+ */
+ ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /*
+ * Now make the first 3 pages read-only, splitting the VMA:
+ *
+ * RO RW
+ * |-----------|-----------------------|
+ * | unfaulted | unfaulted |
+ * |-----------|-----------------------|
+ */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ /*
+ * Fault in the first of the last 6 pages so it gets an anon_vma and
+ * thus the whole VMA becomes 'faulted':
+ *
+ * RO RW
+ * |-----------|-----------------------|
+ * | unfaulted | faulted |
+ * |-----------|-----------------------|
+ */
+ ptr[3 * page_size] = 'x';
+ /*
+ * Now make the last 3 pages read-only, splitting the VMA:
+ *
+ * RO RW RO
+ * |-----------|-----------|-----------|
+ * | unfaulted | faulted | faulted |
+ * |-----------|-----------|-----------|
+ */
+ ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0);
+ /*
+ * Now mprotect() the RW region read-only, we should merge:
+ *
+ * RO
+ * |-----------------------|
+ * | faulted |
+ * |-----------------------|
+ */
+ ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Assert that the merge succeeded using PROCMAP_QUERY. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size);
+}
+
+TEST_F(merge, forked_target_vma)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ pid_t pid;
+ char *ptr, *ptr2;
+ int i;
+
+ /*
+ * |-----------|
+ * | unfaulted |
+ * |-----------|
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in process.
+ *
+ * |-----------|
+ * | faulted |
+ * |-----------|
+ */
+ ptr[0] = 'x';
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+
+ if (pid != 0) {
+ wait(NULL);
+ return;
+ }
+
+ /* Child process below: */
+
+ /* Reopen for child. */
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+
+ /* unCOWing everything does not cause the AVC to go away. */
+ for (i = 0; i < 5 * page_size; i += page_size)
+ ptr[i] = 'x';
+
+ /*
+ * Map in adjacent VMA in child.
+ *
+ * forked
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+ ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size);
+}
+
+TEST_F(merge, forked_source_vma)
+{
+ unsigned int page_size = self->page_size;
+ char *carveout = self->carveout;
+ struct procmap_fd *procmap = &self->procmap;
+ pid_t pid;
+ char *ptr, *ptr2;
+ int i;
+
+ /*
+ * |-----------|------------|
+ * | unfaulted | <unmapped> |
+ * |-----------|------------|
+ */
+ ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Fault in process.
+ *
+ * |-----------|------------|
+ * | faulted | <unmapped> |
+ * |-----------|------------|
+ */
+ ptr[0] = 'x';
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+
+ if (pid != 0) {
+ wait(NULL);
+ return;
+ }
+
+ /* Child process below: */
+
+ /* Reopen for child. */
+ ASSERT_EQ(close_procmap(&self->procmap), 0);
+ ASSERT_EQ(open_self_procmap(&self->procmap), 0);
+
+ /* unCOWing everything does not cause the AVC to go away. */
+ for (i = 0; i < 5 * page_size; i += page_size)
+ ptr[i] = 'x';
+
+ /*
+ * Map in adjacent VMA in child, ptr2 after ptr, but incompatible.
+ *
+ * forked RW RWX
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ */
+ ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
+
+ /*
+ * Now mprotect forked region to RWX so it becomes the source for the
+ * merge to unfaulted region:
+ *
+ * forked RWX RWX
+ * |-----------|-----------|
+ * | faulted | unfaulted |
+ * |-----------|-----------|
+ * ptr ptr2
+ *
+ * This should NOT result in a merge, as ptr was forked.
+ */
+ ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0);
+ /* Again, make sure not merged. */
+ ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+ ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+ ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 57b4bba2b45f..b07acc86f4f0 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -34,7 +34,7 @@
#define PAGEMAP "/proc/self/pagemap"
int pagemap_fd;
int uffd;
-unsigned int page_size;
+unsigned long page_size;
unsigned int hpage_size;
const char *progname;
@@ -112,7 +112,7 @@ int init_uffd(void)
return 0;
}
-int wp_init(void *lpBaseAddress, int dwRegionSize)
+int wp_init(void *lpBaseAddress, long dwRegionSize)
{
struct uffdio_register uffdio_register;
struct uffdio_writeprotect wp;
@@ -136,7 +136,7 @@ int wp_init(void *lpBaseAddress, int dwRegionSize)
return 0;
}
-int wp_free(void *lpBaseAddress, int dwRegionSize)
+int wp_free(void *lpBaseAddress, long dwRegionSize)
{
struct uffdio_register uffdio_register;
@@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid)
int userfaultfd_tests(void)
{
- int mem_size, vec_size, written, num_pages = 16;
+ long mem_size, vec_size, written, num_pages = 16;
char *mem, *vec;
mem_size = num_pages * page_size;
@@ -213,7 +213,7 @@ int userfaultfd_tests(void)
written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
if (written < 0)
- ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno));
ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__);
@@ -995,7 +995,7 @@ int unmapped_region_tests(void)
{
void *start = (void *)0x10000000;
int written, len = 0x00040000;
- int vec_size = len / page_size;
+ long vec_size = len / page_size;
struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);
/* 1. Get written pages */
@@ -1051,7 +1051,7 @@ static void test_simple(void)
int sanity_tests(void)
{
unsigned long long mem_size, vec_size;
- int ret, fd, i, buf_size;
+ long ret, fd, i, buf_size;
struct page_region *vec;
char *mem, *fmem;
struct stat sbuf;
@@ -1160,7 +1160,7 @@ int sanity_tests(void)
ret = stat(progname, &sbuf);
if (ret < 0)
- ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c
new file mode 100644
index 000000000000..8a9d19b6020c
--- /dev/null
+++ b/tools/testing/selftests/mm/pfnmap.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem'
+ *
+ * Copyright 2025, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "../kselftest_harness.h"
+#include "vm_util.h"
+
+static sigjmp_buf sigjmp_buf_env;
+
+static void signal_handler(int sig)
+{
+ siglongjmp(sigjmp_buf_env, -EFAULT);
+}
+
+static int test_read_access(char *addr, size_t size, size_t pagesize)
+{
+ size_t offs;
+ int ret;
+
+ if (signal(SIGSEGV, signal_handler) == SIG_ERR)
+ return -EINVAL;
+
+ ret = sigsetjmp(sigjmp_buf_env, 1);
+ if (!ret) {
+ for (offs = 0; offs < size; offs += pagesize)
+ /* Force a read that the compiler cannot optimize out. */
+ *((volatile char *)(addr + offs));
+ }
+ if (signal(SIGSEGV, signal_handler) == SIG_ERR)
+ return -EINVAL;
+
+ return ret;
+}
+
+FIXTURE(pfnmap)
+{
+ size_t pagesize;
+ int dev_mem_fd;
+ char *addr1;
+ size_t size1;
+ char *addr2;
+ size_t size2;
+};
+
+FIXTURE_SETUP(pfnmap)
+{
+ self->pagesize = getpagesize();
+
+ self->dev_mem_fd = open("/dev/mem", O_RDONLY);
+ if (self->dev_mem_fd < 0)
+ SKIP(return, "Cannot open '/dev/mem'\n");
+
+ /* We'll require the first two pages throughout our tests ... */
+ self->size1 = self->pagesize * 2;
+ self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED,
+ self->dev_mem_fd, 0);
+ if (self->addr1 == MAP_FAILED)
+ SKIP(return, "Cannot mmap '/dev/mem'\n");
+
+ /* ... and want to be able to read from them. */
+ if (test_read_access(self->addr1, self->size1, self->pagesize))
+ SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n");
+
+ self->size2 = 0;
+ self->addr2 = MAP_FAILED;
+}
+
+FIXTURE_TEARDOWN(pfnmap)
+{
+ if (self->addr2 != MAP_FAILED)
+ munmap(self->addr2, self->size2);
+ if (self->addr1 != MAP_FAILED)
+ munmap(self->addr1, self->size1);
+ if (self->dev_mem_fd >= 0)
+ close(self->dev_mem_fd);
+}
+
+TEST_F(pfnmap, madvise_disallowed)
+{
+ int advices[] = {
+ MADV_DONTNEED,
+ MADV_DONTNEED_LOCKED,
+ MADV_FREE,
+ MADV_WIPEONFORK,
+ MADV_COLD,
+ MADV_PAGEOUT,
+ MADV_POPULATE_READ,
+ MADV_POPULATE_WRITE,
+ };
+ int i;
+
+ /* All these advices must be rejected. */
+ for (i = 0; i < ARRAY_SIZE(advices); i++) {
+ EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(pfnmap, munmap_split)
+{
+ /*
+ * Unmap the first page. This munmap() call is not really expected to
+ * fail, but we might be able to trigger other internal issues.
+ */
+ ASSERT_EQ(munmap(self->addr1, self->pagesize), 0);
+
+ /*
+ * Remap the first page while the second page is still mapped. This
+ * makes sure that any PAT tracking on x86 will allow for mmap()'ing
+ * a page again while some parts of the first mmap() are still
+ * around.
+ */
+ self->size2 = self->pagesize;
+ self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED,
+ self->dev_mem_fd, 0);
+ ASSERT_NE(self->addr2, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_fixed)
+{
+ char *ret;
+
+ /* Reserve a destination area. */
+ self->size2 = self->size1;
+ self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE,
+ -1, 0);
+ ASSERT_NE(self->addr2, MAP_FAILED);
+
+ /* mremap() over our destination. */
+ ret = mremap(self->addr1, self->size1, self->size2,
+ MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2);
+ ASSERT_NE(ret, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_shrink)
+{
+ char *ret;
+
+ /* Shrinking is expected to work. */
+ ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0);
+ ASSERT_NE(ret, MAP_FAILED);
+}
+
+TEST_F(pfnmap, mremap_expand)
+{
+ /*
+ * Growing is not expected to work, and getting it right would
+ * be challenging. So this test primarily serves as an early warning
+ * that something that probably should never work suddenly works.
+ */
+ self->size2 = self->size1 + self->pagesize;
+ self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE);
+ ASSERT_EQ(self->addr2, MAP_FAILED);
+}
+
+TEST_F(pfnmap, fork)
+{
+ pid_t pid;
+ int ret;
+
+ /* fork() a child and test if the child can access the pages. */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (!pid) {
+ EXPECT_EQ(test_read_access(self->addr1, self->size1,
+ self->pagesize), 0);
+ exit(0);
+ }
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 9aff33b10999..dddd1dd8af14 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -63,6 +63,8 @@ separated by spaces:
test soft dirty page bit semantics
- pagemap
test pagemap_scan IOCTL
+- pfnmap
+ tests for VM_PFNMAP handling
- cow
test copy-on-write semantics
- thp
@@ -79,6 +81,8 @@ separated by spaces:
test prctl(PR_SET_MDWE, ...)
- page_frag
test handling of page fragment allocation and freeing
+- vma_merge
+ test VMA merge cases behave as expected
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@@ -421,6 +425,8 @@ CATEGORY="madv_guard" run_test ./guard-regions
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
+CATEGORY="vma_merge" run_test ./merge
+
if [ -x ./memfd_secret ]
then
(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
@@ -468,6 +474,8 @@ fi
CATEGORY="pagemap" run_test ./pagemap_ioctl
+CATEGORY="pfnmap" run_test ./pfnmap
+
# COW tests
CATEGORY="cow" run_test ./cow
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index e8fd9011c2a3..c73fd5d455c8 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1231,6 +1231,182 @@ static void uffd_move_pmd_split_test(uffd_test_args_t *targs)
uffd_move_pmd_handle_fault);
}
+static bool
+uffdio_verify_results(const char *name, int ret, int error, long result)
+{
+ /*
+ * Should always return -1 with errno=EAGAIN, with corresponding
+ * result field updated in ioctl() args to be -EAGAIN too
+ * (e.g. copy.copy field for UFFDIO_COPY).
+ */
+ if (ret != -1) {
+ uffd_test_fail("%s should have returned -1", name);
+ return false;
+ }
+
+ if (error != EAGAIN) {
+ uffd_test_fail("%s should have errno==EAGAIN", name);
+ return false;
+ }
+
+ if (result != -EAGAIN) {
+ uffd_test_fail("%s should have been updated for -EAGAIN",
+ name);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This defines a function to test one ioctl. Note that here "field" can
+ * be 1 or anything not -EAGAIN. With that initial value set, we can
+ * verify later that it should be updated by kernel (when -EAGAIN
+ * returned), by checking whether it is also updated to -EAGAIN.
+ */
+#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \
+ static bool uffdio_mmap_changing_test_##name(int fd) \
+ { \
+ int ret; \
+ struct uffdio_##name args = { \
+ .field = 1, \
+ }; \
+ ret = ioctl(fd, ioctl_name, &args); \
+ return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \
+ }
+
+DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage)
+DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy)
+DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move)
+DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated)
+DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped)
+
+typedef enum {
+ /* We actually do not care about any state except UNINTERRUPTIBLE.. */
+ THR_STATE_UNKNOWN = 0,
+ THR_STATE_UNINTERRUPTIBLE,
+} thread_state;
+
+static void sleep_short(void)
+{
+ usleep(1000);
+}
+
+static thread_state thread_state_get(pid_t tid)
+{
+ const char *header = "State:\t";
+ char tmp[256], *p, c;
+ FILE *fp;
+
+ snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid);
+ fp = fopen(tmp, "r");
+
+ if (!fp)
+ return THR_STATE_UNKNOWN;
+
+ while (fgets(tmp, sizeof(tmp), fp)) {
+ p = strstr(tmp, header);
+ if (p) {
+ /* For example, "State:\tD (disk sleep)" */
+ c = *(p + sizeof(header) - 1);
+ return c == 'D' ?
+ THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
+ }
+ }
+
+ return THR_STATE_UNKNOWN;
+}
+
+static void thread_state_until(pid_t tid, thread_state state)
+{
+ thread_state s;
+
+ do {
+ s = thread_state_get(tid);
+ sleep_short();
+ } while (s != state);
+}
+
+static void *uffd_mmap_changing_thread(void *opaque)
+{
+ volatile pid_t *pid = opaque;
+ int ret;
+
+ /* Unfortunately, it's only fetch-able from the thread itself.. */
+ assert(*pid == 0);
+ *pid = syscall(SYS_gettid);
+
+ /* Inject an event, this will hang solid until the event read */
+ ret = madvise(area_dst, page_size, MADV_REMOVE);
+ if (ret)
+ err("madvise(MADV_REMOVE) failed");
+
+ return NULL;
+}
+
+static void uffd_consume_message(int fd)
+{
+ struct uffd_msg msg = { 0 };
+
+ while (uffd_read_msg(fd, &msg));
+}
+
+static void uffd_mmap_changing_test(uffd_test_args_t *targs)
+{
+ /*
+ * This stores the real PID (which can be different from how tid is
+ * defined..) for the child thread, 0 means not initialized.
+ */
+ pid_t pid = 0;
+ pthread_t tid;
+ int ret;
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ true, false, false))
+ err("uffd_register() failed");
+
+ /* Create a thread to generate the racy event */
+ ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid);
+ if (ret)
+ err("pthread_create() failed");
+
+ /*
+ * Wait until the thread setup the pid. Use volatile to make sure
+ * it reads from RAM not regs.
+ */
+ while (!(volatile pid_t)pid)
+ sleep_short();
+
+ /* Wait until the thread hangs at REMOVE event */
+ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE);
+
+ if (!uffdio_mmap_changing_test_copy(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_zeropage(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_move(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_poison(uffd))
+ return;
+
+ if (!uffdio_mmap_changing_test_continue(uffd))
+ return;
+
+ /*
+ * All succeeded above! Recycle everything. Start by reading the
+ * event so as to kick the thread roll again..
+ */
+ uffd_consume_message(uffd);
+
+ ret = pthread_join(tid, NULL);
+ assert(ret == 0);
+
+ uffd_test_pass();
+}
+
static int prevent_hugepages(const char **errmsg)
{
/* This should be done before source area is populated */
@@ -1470,6 +1646,32 @@ uffd_test_case_t uffd_tests[] = {
.mem_targets = MEM_ALL,
.uffd_feature_required = UFFD_FEATURE_POISON,
},
+ {
+ .name = "mmap-changing",
+ .uffd_fn = uffd_mmap_changing_test,
+ /*
+ * There's no point running this test over all mem types as
+ * they share the same code paths.
+ *
+ * Choose shmem for simplicity, because (1) shmem supports
+ * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is
+ * almost always available (unlike hugetlb). Here we
+ * abused SHMEM for UFFDIO_MOVE, but the test we want to
+ * cover doesn't yet need the correct memory type..
+ */
+ .mem_targets = MEM_SHMEM,
+ /*
+ * Any UFFD_FEATURE_EVENT_* should work to trigger the
+ * race logically, but choose the simplest (REMOVE).
+ *
+ * Meanwhile, since we'll cover quite a few new ioctl()s
+ * (CONTINUE, POISON, MOVE), skip this test for old kernels
+ * by choosing all of them.
+ */
+ .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE |
+ UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON |
+ UFFD_FEATURE_MINOR_SHMEM,
+ },
};
static void usage(const char *prog)
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index a36734fb62f3..1357e2d6a7b6 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
+#include <errno.h>
#include <fcntl.h>
#include <dirent.h>
#include <inttypes.h>
@@ -424,3 +425,64 @@ bool check_vmflag_io(void *addr)
flags += flaglen;
}
}
+
+/*
+ * Open an fd at /proc/$pid/maps and configure procmap_out ready for
+ * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise.
+ */
+int open_procmap(pid_t pid, struct procmap_fd *procmap_out)
+{
+ char path[256];
+ int ret = 0;
+
+ memset(procmap_out, '\0', sizeof(*procmap_out));
+ sprintf(path, "/proc/%d/maps", pid);
+ procmap_out->query.size = sizeof(procmap_out->query);
+ procmap_out->fd = open(path, O_RDONLY);
+ if (procmap_out < 0)
+ ret = -errno;
+
+ return ret;
+}
+
+/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */
+int query_procmap(struct procmap_fd *procmap)
+{
+ int ret = 0;
+
+ if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1)
+ ret = -errno;
+
+ return ret;
+}
+
+/*
+ * Try to find the VMA at specified address, returns true if found, false if not
+ * found, and the test is failed if any other error occurs.
+ *
+ * On success, procmap->query is populated with the results.
+ */
+bool find_vma_procmap(struct procmap_fd *procmap, void *address)
+{
+ int err;
+
+ procmap->query.query_flags = 0;
+ procmap->query.query_addr = (unsigned long)address;
+ err = query_procmap(procmap);
+ if (!err)
+ return true;
+
+ if (err != -ENOENT)
+ ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n",
+ __func__, err);
+ return false;
+}
+
+/*
+ * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error
+ * code otherwise.
+ */
+int close_procmap(struct procmap_fd *procmap)
+{
+ return close(procmap->fd);
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 6effafdc4d8a..9211ba640d9c 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -6,6 +6,7 @@
#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#include "../kselftest.h"
+#include <linux/fs.h>
#define BIT_ULL(nr) (1ULL << (nr))
#define PM_SOFT_DIRTY BIT_ULL(55)
@@ -19,6 +20,15 @@
extern unsigned int __page_size;
extern unsigned int __page_shift;
+/*
+ * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl)
+ * /proc/$pid/[s]maps lookup.
+ */
+struct procmap_fd {
+ int fd;
+ struct procmap_query query;
+};
+
static inline unsigned int psize(void)
{
if (!__page_size)
@@ -73,6 +83,17 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
unsigned long get_free_hugepages(void);
bool check_vmflag_io(void *addr);
+int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
+int query_procmap(struct procmap_fd *procmap);
+bool find_vma_procmap(struct procmap_fd *procmap, void *address);
+int close_procmap(struct procmap_fd *procmap);
+
+static inline int open_self_procmap(struct procmap_fd *procmap_out)
+{
+ pid_t pid = getpid();
+
+ return open_procmap(pid, procmap_out);
+}
/*
* On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index 1c631740a730..c5e0b76ba6ac 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
-TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
+TEST_GEN_PROGS := get_syscall_info set_syscall_info peeksiginfo vmaccess get_set_sud
include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c
new file mode 100644
index 000000000000..4198248ef874
--- /dev/null
+++ b/tools/testing/selftests/ptrace/set_syscall_info.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2018-2025 Dmitry V. Levin <ldv@strace.io>
+ * All rights reserved.
+ *
+ * Check whether PTRACE_SET_SYSCALL_INFO semantics implemented in the kernel
+ * matches userspace expectations.
+ */
+
+#include "../kselftest_harness.h"
+#include <err.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <asm/unistd.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+#if defined(_MIPS_SIM) && _MIPS_SIM == _MIPS_SIM_NABI32
+/*
+ * MIPS N32 is the only architecture where __kernel_ulong_t
+ * does not match the bitness of syscall arguments.
+ */
+typedef unsigned long long kernel_ulong_t;
+#else
+typedef __kernel_ulong_t kernel_ulong_t;
+#endif
+
+struct si_entry {
+ int nr;
+ kernel_ulong_t args[6];
+};
+struct si_exit {
+ unsigned int is_error;
+ int rval;
+};
+
+static unsigned int ptrace_stop;
+static pid_t tracee_pid;
+
+static int
+kill_tracee(pid_t pid)
+{
+ if (!pid)
+ return 0;
+
+ int saved_errno = errno;
+
+ int rc = kill(pid, SIGKILL);
+
+ errno = saved_errno;
+ return rc;
+}
+
+static long
+sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data)
+{
+ return syscall(__NR_ptrace, request, pid, addr, data);
+}
+
+#define LOG_KILL_TRACEE(fmt, ...) \
+ do { \
+ kill_tracee(tracee_pid); \
+ TH_LOG("wait #%d: " fmt, \
+ ptrace_stop, ##__VA_ARGS__); \
+ } while (0)
+
+static void
+check_psi_entry(struct __test_metadata *_metadata,
+ const struct ptrace_syscall_info *info,
+ const struct si_entry *exp_entry,
+ const char *text)
+{
+ unsigned int i;
+ int exp_nr = exp_entry->nr;
+#if defined __s390__ || defined __s390x__
+ /* s390 is the only architecture that has 16-bit syscall numbers */
+ exp_nr &= 0xffff;
+#endif
+
+ ASSERT_EQ(PTRACE_SYSCALL_INFO_ENTRY, info->op) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_TRUE(info->arch) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_TRUE(info->instruction_pointer) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_TRUE(info->stack_pointer) {
+ LOG_KILL_TRACEE("%s: entry stop mismatch", text);
+ }
+ ASSERT_EQ(exp_nr, info->entry.nr) {
+ LOG_KILL_TRACEE("%s: syscall nr mismatch", text);
+ }
+ for (i = 0; i < ARRAY_SIZE(exp_entry->args); ++i) {
+ ASSERT_EQ(exp_entry->args[i], info->entry.args[i]) {
+ LOG_KILL_TRACEE("%s: syscall arg #%u mismatch",
+ text, i);
+ }
+ }
+}
+
+static void
+check_psi_exit(struct __test_metadata *_metadata,
+ const struct ptrace_syscall_info *info,
+ const struct si_exit *exp_exit,
+ const char *text)
+{
+ ASSERT_EQ(PTRACE_SYSCALL_INFO_EXIT, info->op) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_TRUE(info->arch) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_TRUE(info->instruction_pointer) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_TRUE(info->stack_pointer) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_EQ(exp_exit->is_error, info->exit.is_error) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+ ASSERT_EQ(exp_exit->rval, info->exit.rval) {
+ LOG_KILL_TRACEE("%s: exit stop mismatch", text);
+ }
+}
+
+TEST(set_syscall_info)
+{
+ const pid_t tracer_pid = getpid();
+ const kernel_ulong_t dummy[] = {
+ (kernel_ulong_t) 0xdad0bef0bad0fed0ULL,
+ (kernel_ulong_t) 0xdad1bef1bad1fed1ULL,
+ (kernel_ulong_t) 0xdad2bef2bad2fed2ULL,
+ (kernel_ulong_t) 0xdad3bef3bad3fed3ULL,
+ (kernel_ulong_t) 0xdad4bef4bad4fed4ULL,
+ (kernel_ulong_t) 0xdad5bef5bad5fed5ULL,
+ };
+ int splice_in[2], splice_out[2];
+
+ ASSERT_EQ(0, pipe(splice_in));
+ ASSERT_EQ(0, pipe(splice_out));
+ ASSERT_EQ(sizeof(dummy), write(splice_in[1], dummy, sizeof(dummy)));
+
+ const struct {
+ struct si_entry entry[2];
+ struct si_exit exit[2];
+ } si[] = {
+ /* change scno, keep non-error rval */
+ {
+ {
+ {
+ __NR_gettid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, tracer_pid }, { 0, tracer_pid }
+ }
+ },
+
+ /* set scno to -1, keep error rval */
+ {
+ {
+ {
+ __NR_chdir,
+ {
+ (uintptr_t) ".",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ -1,
+ {
+ (uintptr_t) ".",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 1, -ENOSYS }, { 1, -ENOSYS }
+ }
+ },
+
+ /* keep scno, change non-error rval */
+ {
+ {
+ {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, tracer_pid }, { 0, tracer_pid + 1 }
+ }
+ },
+
+ /* change arg1, keep non-error rval */
+ {
+ {
+ {
+ __NR_chdir,
+ {
+ (uintptr_t) "",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_chdir,
+ {
+ (uintptr_t) ".",
+ dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, 0 }, { 0, 0 }
+ }
+ },
+
+ /* set scno to -1, change error rval to non-error */
+ {
+ {
+ {
+ __NR_gettid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ -1,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 1, -ENOSYS }, { 0, tracer_pid }
+ }
+ },
+
+ /* change scno, change non-error rval to error */
+ {
+ {
+ {
+ __NR_chdir,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_getppid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, tracer_pid }, { 1, -EISDIR }
+ }
+ },
+
+ /* change scno and all args, change non-error rval */
+ {
+ {
+ {
+ __NR_gettid,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_splice,
+ {
+ splice_in[0], 0, splice_out[1], 0,
+ sizeof(dummy), SPLICE_F_NONBLOCK
+ }
+ }
+ }, {
+ { 0, sizeof(dummy) }, { 0, sizeof(dummy) + 1 }
+ }
+ },
+
+ /* change arg1, no exit stop */
+ {
+ {
+ {
+ __NR_exit_group,
+ {
+ dummy[0], dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }, {
+ __NR_exit_group,
+ {
+ 0, dummy[1], dummy[2],
+ dummy[3], dummy[4], dummy[5]
+ }
+ }
+ }, {
+ { 0, 0 }, { 0, 0 }
+ }
+ },
+ };
+
+ long rc;
+ unsigned int i;
+
+ tracee_pid = fork();
+
+ ASSERT_LE(0, tracee_pid) {
+ TH_LOG("fork: %m");
+ }
+
+ if (tracee_pid == 0) {
+ /* get the pid before PTRACE_TRACEME */
+ tracee_pid = getpid();
+ ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ TH_LOG("PTRACE_TRACEME: %m");
+ }
+ ASSERT_EQ(0, kill(tracee_pid, SIGSTOP)) {
+ /* cannot happen */
+ TH_LOG("kill SIGSTOP: %m");
+ }
+ for (i = 0; i < ARRAY_SIZE(si); ++i) {
+ rc = syscall(si[i].entry[0].nr,
+ si[i].entry[0].args[0],
+ si[i].entry[0].args[1],
+ si[i].entry[0].args[2],
+ si[i].entry[0].args[3],
+ si[i].entry[0].args[4],
+ si[i].entry[0].args[5]);
+ if (si[i].exit[1].is_error) {
+ if (rc != -1 || errno != -si[i].exit[1].rval)
+ break;
+ } else {
+ if (rc != si[i].exit[1].rval)
+ break;
+ }
+ }
+ /*
+ * Something went wrong, but in this state tracee
+ * cannot reliably issue syscalls, so just crash.
+ */
+ *(volatile unsigned char *) (uintptr_t) i = 42;
+ /* unreachable */
+ _exit(i + 1);
+ }
+
+ for (ptrace_stop = 0; ; ++ptrace_stop) {
+ struct ptrace_syscall_info info = {
+ .op = 0xff /* invalid PTRACE_SYSCALL_INFO_* op */
+ };
+ const size_t size = sizeof(info);
+ const int expected_entry_size =
+ (void *) &info.entry.args[6] - (void *) &info;
+ const int expected_exit_size =
+ (void *) (&info.exit.is_error + 1) -
+ (void *) &info;
+ int status;
+
+ ASSERT_EQ(tracee_pid, wait(&status)) {
+ /* cannot happen */
+ LOG_KILL_TRACEE("wait: %m");
+ }
+ if (WIFEXITED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ ASSERT_EQ(0, WEXITSTATUS(status)) {
+ LOG_KILL_TRACEE("unexpected exit status %u",
+ WEXITSTATUS(status));
+ }
+ break;
+ }
+ ASSERT_FALSE(WIFSIGNALED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ LOG_KILL_TRACEE("unexpected signal %u",
+ WTERMSIG(status));
+ }
+ ASSERT_TRUE(WIFSTOPPED(status)) {
+ /* cannot happen */
+ LOG_KILL_TRACEE("unexpected wait status %#x", status);
+ }
+
+ ASSERT_LT(ptrace_stop, ARRAY_SIZE(si) * 2) {
+ LOG_KILL_TRACEE("ptrace stop overflow");
+ }
+
+ switch (WSTOPSIG(status)) {
+ case SIGSTOP:
+ ASSERT_EQ(0, ptrace_stop) {
+ LOG_KILL_TRACEE("unexpected signal stop");
+ }
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, tracee_pid,
+ 0, PTRACE_O_TRACESYSGOOD)) {
+ LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m");
+ }
+ break;
+
+ case SIGTRAP | 0x80:
+ ASSERT_LT(0, ptrace_stop) {
+ LOG_KILL_TRACEE("unexpected syscall stop");
+ }
+ ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info))) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1: %m");
+ }
+ if (ptrace_stop & 1) {
+ /* entering syscall */
+ const struct si_entry *exp_entry =
+ &si[ptrace_stop / 2].entry[0];
+ const struct si_entry *set_entry =
+ &si[ptrace_stop / 2].entry[1];
+
+ /* check ptrace_syscall_info before the changes */
+ ASSERT_EQ(expected_entry_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1"
+ ": entry stop mismatch");
+ }
+ check_psi_entry(_metadata, &info, exp_entry,
+ "PTRACE_GET_SYSCALL_INFO #1");
+
+ /* apply the changes */
+ info.entry.nr = set_entry->nr;
+ for (i = 0; i < ARRAY_SIZE(set_entry->args); ++i)
+ info.entry.args[i] = set_entry->args[i];
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m");
+ }
+
+ /* check ptrace_syscall_info after the changes */
+ memset(&info, 0, sizeof(info));
+ info.op = 0xff;
+ ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info))) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m");
+ }
+ ASSERT_EQ(expected_entry_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2"
+ ": entry stop mismatch");
+ }
+ check_psi_entry(_metadata, &info, set_entry,
+ "PTRACE_GET_SYSCALL_INFO #2");
+ } else {
+ /* exiting syscall */
+ const struct si_exit *exp_exit =
+ &si[ptrace_stop / 2 - 1].exit[0];
+ const struct si_exit *set_exit =
+ &si[ptrace_stop / 2 - 1].exit[1];
+
+ /* check ptrace_syscall_info before the changes */
+ ASSERT_EQ(expected_exit_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #1"
+ ": exit stop mismatch");
+ }
+ check_psi_exit(_metadata, &info, exp_exit,
+ "PTRACE_GET_SYSCALL_INFO #1");
+
+ /* apply the changes */
+ info.exit.is_error = set_exit->is_error;
+ info.exit.rval = set_exit->rval;
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m");
+ }
+
+ /* check ptrace_syscall_info after the changes */
+ memset(&info, 0, sizeof(info));
+ info.op = 0xff;
+ ASSERT_LT(0, (rc = sys_ptrace(PTRACE_GET_SYSCALL_INFO,
+ tracee_pid, size,
+ (uintptr_t) &info))) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2: %m");
+ }
+ ASSERT_EQ(expected_exit_size, rc) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO #2"
+ ": exit stop mismatch");
+ }
+ check_psi_exit(_metadata, &info, set_exit,
+ "PTRACE_GET_SYSCALL_INFO #2");
+ }
+ break;
+
+ default:
+ LOG_KILL_TRACEE("unexpected stop signal %u",
+ WSTOPSIG(status));
+ abort();
+ }
+
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SYSCALL, tracee_pid, 0, 0)) {
+ LOG_KILL_TRACEE("PTRACE_SYSCALL: %m");
+ }
+ }
+
+ ASSERT_EQ(ptrace_stop, ARRAY_SIZE(si) * 2);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
index 0326b39a11b9..30cab5d425d2 100644
--- a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
+++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
@@ -56,7 +56,7 @@ int main(int argc, char **argv)
}
if (write(fd, "1\n", 2) < 0) {
- perror("Can' enable power floor notifications\n");
+ perror("Can't enable power floor notifications\n");
exit(1);
}
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
index 217c3a641c53..a40097232967 100644
--- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -37,7 +37,7 @@ void workload_hint_exit(int signum)
}
if (write(fd, "0\n", 2) < 0) {
- perror("Can' disable workload hints\n");
+ perror("Can't disable workload hints\n");
exit(1);
}
@@ -99,7 +99,7 @@ int main(int argc, char **argv)
}
if (write(fd, "1\n", 2) < 0) {
- perror("Can' enable workload hints\n");
+ perror("Can't enable workload hints\n");
exit(1);
}
diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
index 860fd2311dcc..66f3831a668f 100644
--- a/tools/testing/vma/Makefile
+++ b/tools/testing/vma/Makefile
@@ -9,7 +9,7 @@ include ../shared/shared.mk
OFILES = $(SHARED_OFILES) vma.o maple-shim.o
TARGETS = vma
-vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma.h
+vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_init.c ../../../mm/vma_exec.c ../../../mm/vma.h
vma: $(OFILES)
$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 11f761769b5b..2be7597a2ac2 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -28,6 +28,8 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
* Directly import the VMA implementation here. Our vma_internal.h wrapper
* provides userland-equivalent functionality for everything vma.c uses.
*/
+#include "../../../mm/vma_init.c"
+#include "../../../mm/vma_exec.c"
#include "../../../mm/vma.c"
const struct vm_operations_struct vma_dummy_vm_ops;
@@ -90,6 +92,12 @@ static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
return res;
}
+static void detach_free_vma(struct vm_area_struct *vma)
+{
+ vma_mark_detached(vma);
+ vm_area_free(vma);
+}
+
/* Helper function to allocate a VMA and link it to the tree. */
static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
unsigned long start,
@@ -103,7 +111,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
return NULL;
if (attach_vma(mm, vma)) {
- vm_area_free(vma);
+ detach_free_vma(vma);
return NULL;
}
@@ -185,6 +193,15 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
vmg->__adjust_next_start = false;
}
+/* Helper function to set both the VMG range and its anon_vma. */
+static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start,
+ unsigned long end, pgoff_t pgoff, vm_flags_t flags,
+ struct anon_vma *anon_vma)
+{
+ vmg_set_range(vmg, start, end, pgoff, flags);
+ vmg->anon_vma = anon_vma;
+}
+
/*
* Helper function to try to merge a new VMA.
*
@@ -239,7 +256,7 @@ static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi)
vma_iter_set(vmi, 0);
for_each_vma(*vmi, vma) {
- vm_area_free(vma);
+ detach_free_vma(vma);
count++;
}
@@ -265,6 +282,22 @@ static void dummy_close(struct vm_area_struct *)
{
}
+static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc,
+ struct anon_vma *anon_vma)
+{
+ vma->anon_vma = anon_vma;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
+ list_add(&avc->same_vma, &vma->anon_vma_chain);
+ avc->anon_vma = vma->anon_vma;
+}
+
+static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc)
+{
+ __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
+}
+
static bool test_simple_merge(void)
{
struct vm_area_struct *vma;
@@ -293,7 +326,7 @@ static bool test_simple_merge(void)
ASSERT_EQ(vma->vm_pgoff, 0);
ASSERT_EQ(vma->vm_flags, flags);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -335,7 +368,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_end, 0x1000);
ASSERT_EQ(vma->vm_pgoff, 0);
- vm_area_free(vma);
+ detach_free_vma(vma);
vma_iter_clear(&vmi);
vma = vma_next(&vmi);
@@ -344,7 +377,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_end, 0x2000);
ASSERT_EQ(vma->vm_pgoff, 1);
- vm_area_free(vma);
+ detach_free_vma(vma);
vma_iter_clear(&vmi);
vma = vma_next(&vmi);
@@ -353,7 +386,7 @@ static bool test_simple_modify(void)
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma->vm_pgoff, 2);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -381,7 +414,7 @@ static bool test_simple_expand(void)
ASSERT_EQ(vma->vm_end, 0x3000);
ASSERT_EQ(vma->vm_pgoff, 0);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -402,7 +435,7 @@ static bool test_simple_shrink(void)
ASSERT_EQ(vma->vm_end, 0x1000);
ASSERT_EQ(vma->vm_pgoff, 0);
- vm_area_free(vma);
+ detach_free_vma(vma);
mtree_destroy(&mm.mm_mt);
return true;
@@ -593,7 +626,7 @@ static bool test_merge_new(void)
ASSERT_EQ(vma->vm_pgoff, 0);
ASSERT_EQ(vma->anon_vma, &dummy_anon_vma);
- vm_area_free(vma);
+ detach_free_vma(vma);
count++;
}
@@ -953,6 +986,7 @@ static bool test_merge_existing(void)
const struct vm_operations_struct vm_ops = {
.close = dummy_close,
};
+ struct anon_vma_chain avc = {};
/*
* Merge right case - partial span.
@@ -968,10 +1002,10 @@ static bool test_merge_existing(void)
vma->vm_ops = &vm_ops; /* This should have no impact. */
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
vmg.middle = vma;
vmg.prev = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x3000);
@@ -1001,9 +1035,9 @@ static bool test_merge_existing(void)
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags);
+ vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, flags, &dummy_anon_vma);
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x2000);
@@ -1030,11 +1064,10 @@ static bool test_merge_existing(void)
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
-
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1064,10 +1097,10 @@ static bool test_merge_existing(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1094,10 +1127,10 @@ static bool test_merge_existing(void)
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1180,12 +1213,9 @@ static bool test_anon_vma_non_mergeable(void)
.mm = &mm,
.vmi = &vmi,
};
- struct anon_vma_chain dummy_anon_vma_chain1 = {
- .anon_vma = &dummy_anon_vma,
- };
- struct anon_vma_chain dummy_anon_vma_chain2 = {
- .anon_vma = &dummy_anon_vma,
- };
+ struct anon_vma_chain dummy_anon_vma_chain_1 = {};
+ struct anon_vma_chain dummy_anon_vma_chain_2 = {};
+ struct anon_vma dummy_anon_vma_2;
/*
* In the case of modified VMA merge, merging both left and right VMAs
@@ -1209,24 +1239,11 @@ static bool test_anon_vma_non_mergeable(void)
*
* However, when prev is compared to next, the merge should fail.
*/
-
- INIT_LIST_HEAD(&vma_prev->anon_vma_chain);
- list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain);
- ASSERT_TRUE(list_is_singular(&vma_prev->anon_vma_chain));
- vma_prev->anon_vma = &dummy_anon_vma;
- ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_prev->anon_vma, vma_prev));
-
- INIT_LIST_HEAD(&vma_next->anon_vma_chain);
- list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain);
- ASSERT_TRUE(list_is_singular(&vma_next->anon_vma_chain));
- vma_next->anon_vma = (struct anon_vma *)2;
- ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_next->anon_vma, vma_next));
-
- ASSERT_FALSE(is_mergeable_anon_vma(vma_prev->anon_vma, vma_next->anon_vma, NULL));
-
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
vmg.prev = vma_prev;
vmg.middle = vma;
+ vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
+ __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1253,17 +1270,12 @@ static bool test_anon_vma_non_mergeable(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
- INIT_LIST_HEAD(&vma_prev->anon_vma_chain);
- list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain);
- vma_prev->anon_vma = (struct anon_vma *)1;
-
- INIT_LIST_HEAD(&vma_next->anon_vma_chain);
- list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain);
- vma_next->anon_vma = (struct anon_vma *)2;
-
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
vmg.prev = vma_prev;
+ vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
+ __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
+ vmg.anon_vma = NULL;
ASSERT_EQ(merge_new(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1363,8 +1375,8 @@ static bool test_dup_anon_vma(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
-
- vma->anon_vma = &dummy_anon_vma;
+ vmg.anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1392,7 +1404,7 @@ static bool test_dup_anon_vma(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags);
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1420,7 +1432,7 @@ static bool test_dup_anon_vma(void)
vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma;
vmg.middle = vma;
@@ -1447,6 +1459,7 @@ static bool test_vmi_prealloc_fail(void)
.mm = &mm,
.vmi = &vmi,
};
+ struct anon_vma_chain avc = {};
struct vm_area_struct *vma_prev, *vma;
/*
@@ -1459,9 +1472,10 @@ static bool test_vmi_prealloc_fail(void)
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma->anon_vma = &dummy_anon_vma;
- vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
+ vma_set_dummy_anon_vma(vma, &avc);
fail_prealloc = true;
@@ -1661,6 +1675,7 @@ int main(void)
int num_tests = 0, num_fail = 0;
maple_tree_init();
+ vma_state_init();
#define TEST(name) \
do { \
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 572ab2cea763..f6e45e62da3a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -56,6 +56,8 @@ extern unsigned long dac_mmap_min_addr;
#define VM_PFNMAP 0x00000400
#define VM_LOCKED 0x00002000
#define VM_IO 0x00004000
+#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
+#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
#define VM_DONTEXPAND 0x00040000
#define VM_LOCKONFAULT 0x00080000
#define VM_ACCOUNT 0x00100000
@@ -70,6 +72,20 @@ extern unsigned long dac_mmap_min_addr;
#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+#ifdef CONFIG_STACK_GROWSUP
+#define VM_STACK VM_GROWSUP
+#define VM_STACK_EARLY VM_GROWSDOWN
+#else
+#define VM_STACK VM_GROWSDOWN
+#define VM_STACK_EARLY 0
+#endif
+
+#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
+#define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
+#define STACK_TOP TASK_SIZE_LOW
+#define STACK_TOP_MAX TASK_SIZE_MAX
+
/* This mask represents all the VMA flag bits used by mlock */
#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
@@ -82,6 +98,10 @@ extern unsigned long dac_mmap_min_addr;
#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
+
#define RLIMIT_STACK 3 /* max stack size */
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
@@ -135,6 +155,10 @@ typedef __bitwise unsigned int vm_fault_t;
*/
#define pr_warn_once pr_err
+#define data_race(expr) expr
+
+#define ASSERT_EXCLUSIVE_WRITER(x)
+
struct kref {
refcount_t refcount;
};
@@ -229,12 +253,46 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access */
};
+struct vm_area_struct;
+
+/*
+ * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
+ * manipulate mutable fields which will cause those fields to be updated in the
+ * resultant VMA.
+ *
+ * Helper functions are not required for manipulating any field.
+ */
+struct vm_area_desc {
+ /* Immutable state. */
+ struct mm_struct *mm;
+ unsigned long start;
+ unsigned long end;
+
+ /* Mutable fields. Populated with initial state. */
+ pgoff_t pgoff;
+ struct file *file;
+ vm_flags_t vm_flags;
+ pgprot_t page_prot;
+
+ /* Write-only fields. */
+ const struct vm_operations_struct *vm_ops;
+ void *private_data;
+};
+
+struct file_operations {
+ int (*mmap)(struct file *, struct vm_area_struct *);
+ int (*mmap_prepare)(struct vm_area_desc *);
+};
+
struct file {
struct address_space *f_mapping;
+ const struct file_operations *f_op;
};
#define VMA_LOCK_OFFSET 0x40000000
+typedef struct { unsigned long v; } freeptr_t;
+
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
@@ -244,9 +302,7 @@ struct vm_area_struct {
unsigned long vm_start;
unsigned long vm_end;
};
-#ifdef CONFIG_PER_VMA_LOCK
- struct rcu_head vm_rcu; /* Used for deferred freeing. */
-#endif
+ freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
};
struct mm_struct *vm_mm; /* The address space we belong to. */
@@ -421,6 +477,87 @@ struct vm_unmapped_area_info {
unsigned long start_gap;
};
+struct pagetable_move_control {
+ struct vm_area_struct *old; /* Source VMA. */
+ struct vm_area_struct *new; /* Destination VMA. */
+ unsigned long old_addr; /* Address from which the move begins. */
+ unsigned long old_end; /* Exclusive address at which old range ends. */
+ unsigned long new_addr; /* Address to move page tables to. */
+ unsigned long len_in; /* Bytes to remap specified by user. */
+
+ bool need_rmap_locks; /* Do rmap locks need to be taken? */
+ bool for_stack; /* Is this an early temp stack being moved? */
+};
+
+#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
+ struct pagetable_move_control name = { \
+ .old = old_, \
+ .new = new_, \
+ .old_addr = old_addr_, \
+ .old_end = (old_addr_) + (len_), \
+ .new_addr = new_addr_, \
+ .len_in = len_, \
+ }
+
+struct kmem_cache_args {
+ /**
+ * @align: The required alignment for the objects.
+ *
+ * %0 means no specific alignment is requested.
+ */
+ unsigned int align;
+ /**
+ * @useroffset: Usercopy region offset.
+ *
+ * %0 is a valid offset, when @usersize is non-%0
+ */
+ unsigned int useroffset;
+ /**
+ * @usersize: Usercopy region size.
+ *
+ * %0 means no usercopy region is specified.
+ */
+ unsigned int usersize;
+ /**
+ * @freeptr_offset: Custom offset for the free pointer
+ * in &SLAB_TYPESAFE_BY_RCU caches
+ *
+ * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+ * outside of the object. This might cause the object to grow in size.
+ * Cache creators that have a reason to avoid this can specify a custom
+ * free pointer offset in their struct where the free pointer will be
+ * placed.
+ *
+ * Note that placing the free pointer inside the object requires the
+ * caller to ensure that no fields are invalidated that are required to
+ * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+ * details).
+ *
+ * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+ * is specified, %use_freeptr_offset must be set %true.
+ *
+ * Note that @ctor currently isn't supported with custom free pointers
+ * as a @ctor requires an external free pointer.
+ */
+ unsigned int freeptr_offset;
+ /**
+ * @use_freeptr_offset: Whether a @freeptr_offset is used.
+ */
+ bool use_freeptr_offset;
+ /**
+ * @ctor: A constructor for the objects.
+ *
+ * The constructor is invoked for each object in a newly allocated slab
+ * page. It is the cache user's responsibility to free object in the
+ * same state as after calling the constructor, or deal appropriately
+ * with any differences between a freshly constructed and a reallocated
+ * object.
+ *
+ * %NULL means no constructor.
+ */
+ void (*ctor)(void *);
+};
+
static inline void vma_iter_invalidate(struct vma_iterator *vmi)
{
mas_pause(&vmi->mas);
@@ -505,31 +642,38 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
vma->vm_lock_seq = UINT_MAX;
}
-static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
-{
- struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct));
+struct kmem_cache {
+ const char *name;
+ size_t object_size;
+ struct kmem_cache_args *args;
+};
- if (!vma)
- return NULL;
+static inline struct kmem_cache *__kmem_cache_create(const char *name,
+ size_t object_size,
+ struct kmem_cache_args *args)
+{
+ struct kmem_cache *ret = malloc(sizeof(struct kmem_cache));
- vma_init(vma, mm);
+ ret->name = name;
+ ret->object_size = object_size;
+ ret->args = args;
- return vma;
+ return ret;
}
-static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
-{
- struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct));
+#define kmem_cache_create(__name, __object_size, __args, ...) \
+ __kmem_cache_create((__name), (__object_size), (__args))
- if (!new)
- return NULL;
+static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+{
+ (void)gfpflags;
- memcpy(new, orig, sizeof(*new));
- refcount_set(&new->vm_refcnt, 0);
- new->vm_lock_seq = UINT_MAX;
- INIT_LIST_HEAD(&new->anon_vma_chain);
+ return calloc(s->object_size, 1);
+}
- return new;
+static inline void kmem_cache_free(struct kmem_cache *s, void *x)
+{
+ free(x);
}
/*
@@ -696,11 +840,6 @@ static inline void mpol_put(struct mempolicy *)
{
}
-static inline void vm_area_free(struct vm_area_struct *vma)
-{
- free(vma);
-}
-
static inline void lru_add_drain(void)
{
}
@@ -1018,11 +1157,6 @@ static inline void vm_flags_clear(struct vm_area_struct *vma,
vma->__vm_flags &= ~flags;
}
-static inline int call_mmap(struct file *, struct vm_area_struct *)
-{
- return 0;
-}
-
static inline int shmem_zero_setup(struct vm_area_struct *)
{
return 0;
@@ -1240,4 +1374,91 @@ static inline int mapping_map_writable(struct address_space *mapping)
return 0;
}
+static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
+{
+ (void)pmc;
+
+ return 0;
+}
+
+static inline void free_pgd_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor, unsigned long ceiling)
+{
+ (void)tlb;
+ (void)addr;
+ (void)end;
+ (void)floor;
+ (void)ceiling;
+}
+
+static inline int ksm_execve(struct mm_struct *mm)
+{
+ (void)mm;
+
+ return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+ (void)mm;
+}
+
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+ (void)vma;
+ (void)reset_refcnt;
+}
+
+static inline void vma_numab_state_init(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+static inline void vma_numab_state_free(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
+ struct vm_area_struct *new_vma)
+{
+ (void)orig_vma;
+ (void)new_vma;
+}
+
+static inline void free_anon_vma_name(struct vm_area_struct *vma)
+{
+ (void)vma;
+}
+
+/* Did the driver provide valid mmap hook configuration? */
+static inline bool file_has_valid_mmap_hooks(struct file *file)
+{
+ bool has_mmap = file->f_op->mmap;
+ bool has_mmap_prepare = file->f_op->mmap_prepare;
+
+ /* Hooks are mutually exclusive. */
+ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
+ return false;
+ if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare))
+ return false;
+
+ return true;
+}
+
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (WARN_ON_ONCE(file->f_op->mmap_prepare))
+ return -EINVAL;
+
+ return file->f_op->mmap(file, vma);
+}
+
+static inline int __call_mmap_prepare(struct file *file,
+ struct vm_area_desc *desc)
+{
+ return file->f_op->mmap_prepare(desc);
+}
+
#endif /* __MM_VMA_INTERNAL_H */