diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/damon/modules-common.c | 2 | ||||
-rw-r--r-- | mm/damon/modules-common.h | 2 | ||||
-rw-r--r-- | mm/damon/ops-common.c | 2 | ||||
-rw-r--r-- | mm/damon/ops-common.h | 2 | ||||
-rw-r--r-- | mm/damon/paddr.c | 2 | ||||
-rw-r--r-- | mm/damon/sysfs-common.c | 2 | ||||
-rw-r--r-- | mm/damon/sysfs-common.h | 2 | ||||
-rw-r--r-- | mm/damon/vaddr.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 67 | ||||
-rw-r--r-- | mm/kmsan/kmsan_test.c | 1 | ||||
-rw-r--r-- | mm/madvise.c | 5 | ||||
-rw-r--r-- | mm/mempolicy.c | 4 | ||||
-rw-r--r-- | mm/mremap.c | 2 | ||||
-rw-r--r-- | mm/vma.c | 27 | ||||
-rw-r--r-- | mm/vma.h | 7 | ||||
-rw-r--r-- | mm/vmstat.c | 1 |
16 files changed, 98 insertions, 32 deletions
diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c index 7cf96574cde7..86d58f8c4f63 100644 --- a/mm/damon/modules-common.c +++ b/mm/damon/modules-common.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Common Primitives for DAMON Modules + * Common Code for DAMON Modules * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h index f49cdb417005..f103ad556368 100644 --- a/mm/damon/modules-common.h +++ b/mm/damon/modules-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Common Primitives for DAMON Modules + * Common Code for DAMON Modules * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 0db1fc70c84d..b43620fee6bb 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Common Primitives for Data Access Monitoring + * Common Code for Data Access Monitoring * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h index 18d837d11bce..cc9f5da9c012 100644 --- a/mm/damon/ops-common.h +++ b/mm/damon/ops-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Common Primitives for Data Access Monitoring + * Common Code for Data Access Monitoring * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index e8464f7e0014..4102a8c5f992 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * DAMON Primitives for The Physical Address Space + * DAMON Code for The Physical Address Space * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c index 70edf45c2174..ffaf285e241a 100644 --- a/mm/damon/sysfs-common.c +++ b/mm/damon/sysfs-common.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Common Primitives for DAMON Sysfs Interface + * Common Code for DAMON Sysfs Interface * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h index 70d84bdc9f5f..2099adee11d0 100644 --- a/mm/damon/sysfs-common.h +++ b/mm/damon/sysfs-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Common Primitives for DAMON Sysfs Interface + * Common Code for DAMON Sysfs Interface * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index e6d99106a7f9..46554e49a478 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * DAMON Primitives for Virtual Address Spaces + * DAMON Code for Virtual Address Spaces * * Author: SeongJae Park <sj@kernel.org> */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f0b1d53079f9..8746ed2fec13 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -121,7 +121,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool take_locks); static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static void hugetlb_free_folio(struct folio *folio) @@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + return 0; +} +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) +{ /* * PMD sharing is only possible for PUD_SIZE-aligned address ranges * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + * This function is called in the middle of a VMA split operation, with + * MM, VMA and rmap all write-locked to prevent concurrent page table + * walks (except hardware and gup_fast()). */ + vma_assert_write_locked(vma); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + if (addr & ~PUD_MASK) { - /* - * hugetlb_vm_op_split is called right before we attempt to - * split the VMA. We will need to unshare PMDs in the old and - * new VMAs, so let's unshare before we split. - */ unsigned long floor = addr & PUD_MASK; unsigned long ceil = floor + PUD_SIZE; - if (floor >= vma->vm_start && ceil <= vma->vm_end) - hugetlb_unshare_pmds(vma, floor, ceil); + if (floor >= vma->vm_start && ceil <= vma->vm_end) { + /* + * Locking: + * Use take_locks=false here. + * The file rmap lock is already held. + * The hugetlb VMA lock can't be taken when we already + * hold the file rmap lock, and we don't need it because + * its purpose is to synchronize against concurrent page + * table walks, which are not possible thanks to the + * locks held by our caller. + */ + hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); + } } - - return 0; } static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) @@ -7615,6 +7629,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, return 0; pud_clear(pud); + /* + * Once our caller drops the rmap lock, some other process might be + * using this page table as a normal, non-hugetlb page table. + * Wait for pending gup_fast() in other threads to finish before letting + * that happen. + */ + tlb_remove_table_sync_one(); ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep)); mm_dec_nr_pmds(mm); return 1; @@ -7885,9 +7906,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re spin_unlock_irq(&hugetlb_lock); } +/* + * If @take_locks is false, the caller must ensure that no concurrent page table + * access can happen (except for gup_fast() and hardware page walks). + * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like + * concurrent page fault handling) and the file rmap lock. + */ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, - unsigned long end) + unsigned long end, + bool take_locks) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); @@ -7911,8 +7939,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, start, end); mmu_notifier_invalidate_range_start(&range); - hugetlb_vma_lock_write(vma); - i_mmap_lock_write(vma->vm_file->f_mapping); + if (take_locks) { + hugetlb_vma_lock_write(vma); + i_mmap_lock_write(vma->vm_file->f_mapping); + } else { + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + } for (address = start; address < end; address += PUD_SIZE) { ptep = hugetlb_walk(vma, address, sz); if (!ptep) @@ -7922,8 +7954,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, spin_unlock(ptl); } flush_hugetlb_tlb_range(vma, start, end); - i_mmap_unlock_write(vma->vm_file->f_mapping); - hugetlb_vma_unlock_write(vma); + if (take_locks) { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } /* * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see * Documentation/mm/mmu_notifier.rst. @@ -7938,7 +7972,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), - ALIGN_DOWN(vma->vm_end, PUD_SIZE)); + ALIGN_DOWN(vma->vm_end, PUD_SIZE), + /* take_locks = */ true); } /* diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c index 9733a22c46c1..c6c5b2bbede0 100644 --- a/mm/kmsan/kmsan_test.c +++ b/mm/kmsan/kmsan_test.c @@ -732,3 +732,4 @@ kunit_test_suites(&kmsan_test_suite); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alexander Potapenko <glider@google.com>"); +MODULE_DESCRIPTION("Test cases for KMSAN"); diff --git a/mm/madvise.c b/mm/madvise.c index 8433ac9b27e0..5f7a66a1617e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1881,7 +1881,9 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, /* Drop and reacquire lock to unwind race. */ madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); - madvise_lock(mm, behavior); + ret = madvise_lock(mm, behavior); + if (ret) + goto out; madvise_init_tlb(&madv_behavior, mm); continue; } @@ -1892,6 +1894,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, madvise_finish_tlb(&madv_behavior); madvise_unlock(mm, behavior); +out: ret = (total_len - iov_iter_count(iter)) ? : ret; return ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 72fd72e156b1..3b1dfd08338b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -3708,15 +3708,13 @@ static void wi_state_free(void) lockdep_is_held(&wi_state_lock)); if (!old_wi_state) { mutex_unlock(&wi_state_lock); - goto out; + return; } rcu_assign_pointer(wi_state, NULL); mutex_unlock(&wi_state_lock); synchronize_rcu(); kfree(old_wi_state); -out: - kfree(&wi_group->wi_kobj); } static struct kobj_attribute wi_auto_attr = diff --git a/mm/mremap.c b/mm/mremap.c index 83e359754961..60f6b8d0d5f0 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc, for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { + VM_WARN_ON_ONCE(!pte_none(*new_pte)); + if (pte_none(ptep_get(old_pte))) continue; @@ -169,6 +169,9 @@ static void init_multi_vma_prep(struct vma_prepare *vp, vp->file = vma->vm_file; if (vp->file) vp->mapping = vma->vm_file->f_mapping; + + if (vmg && vmg->skip_vma_uprobe) + vp->skip_vma_uprobe = true; } /* @@ -358,10 +361,13 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, if (vp->file) { i_mmap_unlock_write(vp->mapping); - uprobe_mmap(vp->vma); - if (vp->adj_next) - uprobe_mmap(vp->adj_next); + if (!vp->skip_vma_uprobe) { + uprobe_mmap(vp->vma); + + if (vp->adj_next) + uprobe_mmap(vp->adj_next); + } } if (vp->remove) { @@ -539,7 +545,14 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp); + + /* + * Get rid of huge pages and shared page tables straddling the split + * boundary. + */ vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); + if (is_vm_hugetlb_page(vma)) + hugetlb_split(vma, addr); if (new_below) { vma->vm_start = addr; @@ -1823,6 +1836,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, faulted_in_anon_vma = false; } + /* + * If the VMA we are copying might contain a uprobe PTE, ensure + * that we do not establish one upon merge. Otherwise, when mremap() + * moves page tables, it will orphan the newly created PTE. + */ + if (vma->vm_file) + vmg.skip_vma_uprobe = true; + new_vma = find_vma_prev(mm, addr, &vmg.prev); if (new_vma && new_vma->vm_start < addr + len) return NULL; /* should never get here */ @@ -19,6 +19,8 @@ struct vma_prepare { struct vm_area_struct *insert; struct vm_area_struct *remove; struct vm_area_struct *remove2; + + bool skip_vma_uprobe :1; }; struct unlink_vma_file_batch { @@ -120,6 +122,11 @@ struct vma_merge_struct { */ bool give_up_on_oom :1; + /* + * If set, skip uprobe_mmap upon merged vma. + */ + bool skip_vma_uprobe :1; + /* Internal flags set during merge process: */ /* diff --git a/mm/vmstat.c b/mm/vmstat.c index 6f740f070b3d..429ae5339bfe 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1201,7 +1201,6 @@ const char * const vmstat_text[] = { "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", - "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", #endif |