summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKiryl Shutsemau <kas@kernel.org>2025-09-23 12:07:07 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-09-28 11:51:30 -0700
commita2880202767daded2898f62265f6cdf4cfb53bc4 (patch)
treeea9d0ce6d52ac9b4a681c83670809ba1d263179c
parent2db579838296239545554443234fafb8f485cca0 (diff)
mm/rmap: fix a mlock race condition in folio_referenced_one()
The mlock_vma_folio() function requires the page table lock to be held in order to safely mlock the folio. However, folio_referenced_one() mlocks a large folios outside of the page_vma_mapped_walk() loop where the page table lock has already been dropped. Rework the mlock logic to use the same code path inside the loop for both large and small folios. Use PVMW_PGTABLE_CROSSED to detect when the folio is mapped across a page table boundary. [akpm@linux-foundation.org: s/CROSSSED/CROSSED/] Link: https://lkml.kernel.org/r/20250923110711.690639-3-kirill@shutemov.name Signed-off-by: Kiryl Shutsemau <kas@kernel.org> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: David Hildenbrand <david@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--mm/rmap.c57
1 files changed, 20 insertions, 37 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 34333ae3bd80..d174168b8f93 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -850,34 +850,34 @@ static bool folio_referenced_one(struct folio *folio,
{
struct folio_referenced_arg *pra = arg;
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
- int referenced = 0;
- unsigned long start = address, ptes = 0;
+ int ptes = 0, referenced = 0;
while (page_vma_mapped_walk(&pvmw)) {
address = pvmw.address;
if (vma->vm_flags & VM_LOCKED) {
- if (!folio_test_large(folio) || !pvmw.pte) {
- /* Restore the mlock which got missed */
- mlock_vma_folio(folio, vma);
- page_vma_mapped_walk_done(&pvmw);
- pra->vm_flags |= VM_LOCKED;
- return false; /* To break the loop */
- }
+ ptes++;
+ pra->mapcount--;
+
+ /* Only mlock fully mapped pages */
+ if (pvmw.pte && ptes != pvmw.nr_pages)
+ continue;
+
/*
- * For large folio fully mapped to VMA, will
- * be handled after the pvmw loop.
+ * All PTEs must be protected by page table lock in
+ * order to mlock the page.
*
- * For large folio cross VMA boundaries, it's
- * expected to be picked by page reclaim. But
- * should skip reference of pages which are in
- * the range of VM_LOCKED vma. As page reclaim
- * should just count the reference of pages out
- * the range of VM_LOCKED vma.
+ * If page table boundary has been cross, current ptl
+ * only protect part of ptes.
*/
- ptes++;
- pra->mapcount--;
- continue;
+ if (pvmw.flags & PVMW_PGTABLE_CROSSED)
+ continue;
+
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma);
+ page_vma_mapped_walk_done(&pvmw);
+ pra->vm_flags |= VM_LOCKED;
+ return false; /* To break the loop */
}
/*
@@ -913,23 +913,6 @@ static bool folio_referenced_one(struct folio *folio,
pra->mapcount--;
}
- if ((vma->vm_flags & VM_LOCKED) &&
- folio_test_large(folio) &&
- folio_within_vma(folio, vma)) {
- unsigned long s_align, e_align;
-
- s_align = ALIGN_DOWN(start, PMD_SIZE);
- e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);
-
- /* folio doesn't cross page table boundary and fully mapped */
- if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
- /* Restore the mlock which got missed */
- mlock_vma_folio(folio, vma);
- pra->vm_flags |= VM_LOCKED;
- return false; /* To break the loop */
- }
- }
-
if (referenced)
folio_clear_idle(folio);
if (folio_test_clear_young(folio))