summaryrefslogtreecommitdiff
path: root/mm/mremap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-23 09:58:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-23 09:58:07 -0800
commit5c00ff742bf5caf85f60e1c73999f99376fb865d (patch)
treefa484e83c27af79f1c0511e7e0673507461c9379 /mm/mremap.c
parent228a1157fb9fec47eb135b51c0202b574e079ebf (diff)
parent2532e6c74a67e65b95f310946e0c0e0a41b3a34b (diff)
Merge tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - The series "zram: optimal post-processing target selection" from Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from MaĆ­ra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. * tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (215 commits) cma: enforce non-zero pageblock_order during cma_init_reserved_mem() mm/kfence: add a new kunit test test_use_after_free_read_nofault() zram: fix NULL pointer in comp_algorithm_show() memcg/hugetlb: add hugeTLB counters to memcg vmstat: call fold_vm_zone_numa_events() before show per zone NUMA event mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount zram: ZRAM_DEF_COMP should depend on ZRAM MAINTAINERS/MEMORY MANAGEMENT: add document files for mm Docs/mm/damon: recommend academic papers to read and/or cite mm: define general function pXd_init() kmemleak: iommu/iova: fix transient kmemleak false positive mm/list_lru: simplify the list_lru walk callback function mm/list_lru: split the lock to per-cgroup scope mm/list_lru: simplify reparenting and initial allocation mm/list_lru: code clean up for reparenting mm/list_lru: don't export list_lru_add mm/list_lru: don't pass unnecessary key parameters kasan: add kunit tests for kmalloc_track_caller, kmalloc_node_track_caller kasan: change kasan_atomics kunit test as KUNIT_CASE_SLOW kasan: use EXPORT_SYMBOL_IF_KUNIT to export symbols ...
Diffstat (limited to 'mm/mremap.c')
-rw-r--r--mm/mremap.c104
1 files changed, 65 insertions, 39 deletions
diff --git a/mm/mremap.c b/mm/mremap.c
index dee98ff2bbd6..60473413836b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -140,6 +140,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
{
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
+ pmd_t dummy_pmdval;
spinlock_t *old_ptl, *new_ptl;
bool force_flush = false;
unsigned long len = old_end - old_addr;
@@ -175,7 +176,15 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
err = -EAGAIN;
goto out;
}
- new_pte = pte_offset_map_nolock(mm, new_pmd, new_addr, &new_ptl);
+ /*
+ * Now new_pte is none, so hpage_collapse_scan_file() path can not find
+ * this by traversing file->f_mapping, so there is no concurrency with
+ * retract_page_tables(). In addition, we already hold the exclusive
+ * mmap_lock, so this new_pte page is stable, so there is no need to get
+ * pmdval and do pmd_same() check.
+ */
+ new_pte = pte_offset_map_rw_nolock(mm, new_pmd, new_addr, &dummy_pmdval,
+ &new_ptl);
if (!new_pte) {
pte_unmap_unlock(old_pte, old_ptl);
err = -EAGAIN;
@@ -817,17 +826,24 @@ static unsigned long move_vma(struct vm_area_struct *vma,
return new_addr;
}
-static struct vm_area_struct *vma_to_resize(unsigned long addr,
+/*
+ * resize_is_valid() - Ensure the vma can be resized to the new length at the give
+ * address.
+ *
+ * @vma: The vma to resize
+ * @addr: The old address
+ * @old_len: The current size
+ * @new_len: The desired size
+ * @flags: The vma flags
+ *
+ * Return 0 on success, error otherwise.
+ */
+static int resize_is_valid(struct vm_area_struct *vma, unsigned long addr,
unsigned long old_len, unsigned long new_len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
unsigned long pgoff;
- vma = vma_lookup(mm, addr);
- if (!vma)
- return ERR_PTR(-EFAULT);
-
/*
* !old_len is a special case where an attempt is made to 'duplicate'
* a mapping. This makes no sense for private mappings as it will
@@ -838,39 +854,53 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
*/
if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if ((flags & MREMAP_DONTUNMAP) &&
(vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
/* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr)
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
if (new_len == old_len)
- return vma;
+ return 0;
/* Need to be careful about a growing mapping */
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
if (!mlock_future_ok(mm, vma->vm_flags, new_len - old_len))
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
if (!may_expand_vm(mm, vma->vm_flags,
(new_len - old_len) >> PAGE_SHIFT))
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
- return vma;
+ return 0;
}
+/*
+ * mremap_to() - remap a vma to a new location
+ * @addr: The old address
+ * @old_len: The old size
+ * @new_addr: The target address
+ * @new_len: The new size
+ * @locked: If the returned vma is locked (VM_LOCKED)
+ * @flags: the mremap flags
+ * @uf: The mremap userfaultfd context
+ * @uf_unmap_early: The userfaultfd unmap early context
+ * @uf_unmap: The userfaultfd unmap context
+ *
+ * Returns: The new address of the vma or an error.
+ */
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
unsigned long new_addr, unsigned long new_len, bool *locked,
unsigned long flags, struct vm_userfaultfd_ctx *uf,
@@ -879,18 +909,18 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long ret = -EINVAL;
+ unsigned long ret;
unsigned long map_flags = 0;
if (offset_in_page(new_addr))
- goto out;
+ return -EINVAL;
if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
- goto out;
+ return -EINVAL;
/* Ensure the old/new locations do not overlap */
if (addr + old_len > new_addr && new_addr + new_len > addr)
- goto out;
+ return -EINVAL;
/*
* move_vma() need us to stay 4 maps below the threshold, otherwise
@@ -917,27 +947,28 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
*/
ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
if (ret)
- goto out;
+ return ret;
}
if (old_len > new_len) {
ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
if (ret)
- goto out;
+ return ret;
old_len = new_len;
}
- vma = vma_to_resize(addr, old_len, new_len, flags);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto out;
- }
+ vma = vma_lookup(mm, addr);
+ if (!vma)
+ return -EFAULT;
+
+ ret = resize_is_valid(vma, addr, old_len, new_len, flags);
+ if (ret)
+ return ret;
/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
if (flags & MREMAP_DONTUNMAP &&
!may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) {
- ret = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
if (flags & MREMAP_FIXED)
@@ -950,17 +981,14 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
((addr - vma->vm_start) >> PAGE_SHIFT),
map_flags);
if (IS_ERR_VALUE(ret))
- goto out;
+ return ret;
/* We got a new mapping */
if (!(flags & MREMAP_FIXED))
new_addr = ret;
- ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
- uf_unmap);
-
-out:
- return ret;
+ return move_vma(vma, addr, old_len, new_len, new_addr, locked, flags,
+ uf, uf_unmap);
}
static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
@@ -1105,11 +1133,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
/*
* Ok, we need to grow..
*/
- vma = vma_to_resize(addr, old_len, new_len, flags);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
+ ret = resize_is_valid(vma, addr, old_len, new_len, flags);
+ if (ret)
goto out;
- }
/* old_len exactly to the end of the area..
*/