summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Song <v-songbaohua@oppo.com>2025-05-09 10:09:12 +1200
committerAndrew Morton <akpm@linux-foundation.org>2025-05-11 17:29:55 -0700
commit75cb1cca2c880179a11c7dd9380b6f14e41a06a4 (patch)
tree2321e15327932be58750b91f008ccc42104f3147
parent02f5bf89f0b0a50f821425932a3590eeb9f193ac (diff)
mm: userfaultfd: correct dirty flags set for both present and swap pte
As David pointed out, what truly matters for mremap and userfaultfd move operations is the soft dirty bit. The current comment and implementation—which always sets the dirty bit for present PTEs and fails to set the soft dirty bit for swap PTEs—are incorrect. This could break features like Checkpoint-Restore in Userspace (CRIU). This patch updates the behavior to correctly set the soft dirty bit for both present and swap PTEs in accordance with mremap. Link: https://lkml.kernel.org/r/20250508220912.7275-1-21cnbao@gmail.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Barry Song <v-songbaohua@oppo.com> Reported-by: David Hildenbrand <david@redhat.com> Closes: https://lore.kernel.org/linux-mm/02f14ee1-923f-47e3-a994-4950afb9afcc@redhat.com/ Acked-by: Peter Xu <peterx@redhat.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Cc: Lokesh Gidra <lokeshgidra@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--mm/userfaultfd.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7d5d709cc838..e0db855c89b4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1064,8 +1064,13 @@ static int move_present_pte(struct mm_struct *mm,
src_folio->index = linear_page_index(dst_vma, dst_addr);
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
- /* Follow mremap() behavior and treat the entry dirty after the move */
- orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
+ /* Set soft dirty bit so userspace can notice the pte was moved */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
+#endif
+ if (pte_dirty(orig_src_pte))
+ orig_dst_pte = pte_mkdirty(orig_dst_pte);
+ orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte);
out:
@@ -1100,6 +1105,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
}
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
+#endif
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
double_pt_unlock(dst_ptl, src_ptl);