summaryrefslogtreecommitdiff
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorAlistair Popple <apopple@nvidia.com>2025-02-28 14:31:11 +1100
committerAndrew Morton <akpm@linux-foundation.org>2025-03-17 22:06:40 -0700
commit6c88f72691f88fd1fc493a3c2eed97f91b82b3f0 (patch)
treea54f9b1ae5b6aa5169d90a103c5f5f0728a27d7c /mm/huge_memory.c
parentdbe54153296d0931144a63295fc9367794bbe797 (diff)
mm/huge_memory: add vmf_insert_folio_pmd()
Currently DAX folio/page reference counts are managed differently to normal pages. To allow these to be managed the same as normal pages introduce vmf_insert_folio_pmd. This will map the entire PMD-sized folio and take references as it would for a normally mapped page. This is distinct from the current mechanism, vmf_insert_pfn_pmd, which simply inserts a special devmap PMD entry into the page table without holding a reference to the page for the mapping. It is not currently useful to implement a more generic vmf_insert_folio() which selects the correct behaviour based on folio_order(). This is because PTE faults require only a subpage of the folio to be PTE mapped rather than the entire folio. It would be possible to add this context somewhere but callers already need to handle PTE faults and PMD faults separately so a more generic function is not useful. Link: https://lkml.kernel.org/r/7bf92a2e68225d13ea368d53bbfee327314d1c40.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Acked-by: David Hildenbrand <david@redhat.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Asahi Lina <lina@asahilina.net> Cc: Balbir Singh <balbirs@nvidia.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chunyan Zhang <zhang.lyra@gmail.com> Cc: Dan Wiliams <dan.j.williams@intel.com> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: linmiaohe <linmiaohe@huawei.com> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Xu <peterx@redhat.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c65
1 files changed, 53 insertions, 12 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4cd79784f841..e6f4189c1c94 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1375,20 +1375,20 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
return __do_huge_pmd_anonymous_page(vmf);
}
-static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write,
pgtable_t pgtable)
{
struct mm_struct *mm = vma->vm_mm;
pmd_t entry;
- spinlock_t *ptl;
- ptl = pmd_lock(mm, pmd);
+ lockdep_assert_held(pmd_lockptr(mm, pmd));
+
if (!pmd_none(*pmd)) {
if (write) {
if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
- goto out_unlock;
+ return -EEXIST;
}
entry = pmd_mkyoung(*pmd);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -1396,7 +1396,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pmd(vma, addr, pmd);
}
- goto out_unlock;
+ return -EEXIST;
}
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
@@ -1412,16 +1412,11 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm_inc_nr_ptes(mm);
- pgtable = NULL;
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
-
-out_unlock:
- spin_unlock(ptl);
- if (pgtable)
- pte_free(mm, pgtable);
+ return 0;
}
/**
@@ -1440,6 +1435,8 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
struct vm_area_struct *vma = vmf->vma;
pgprot_t pgprot = vma->vm_page_prot;
pgtable_t pgtable = NULL;
+ spinlock_t *ptl;
+ int error;
/*
* If we had pmd_special, we could avoid all these restrictions,
@@ -1462,12 +1459,56 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
}
track_pfn_insert(vma, &pgprot, pfn);
+ ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write,
+ pgtable);
+ spin_unlock(ptl);
+ if (error && pgtable)
+ pte_free(vma->vm_mm, pgtable);
- insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
+vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
+ bool write)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ unsigned long addr = vmf->address & PMD_MASK;
+ struct mm_struct *mm = vma->vm_mm;
+ spinlock_t *ptl;
+ pgtable_t pgtable = NULL;
+ int error;
+
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ return VM_FAULT_SIGBUS;
+
+ if (WARN_ON_ONCE(folio_order(folio) != PMD_ORDER))
+ return VM_FAULT_SIGBUS;
+
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
+ ptl = pmd_lock(mm, vmf->pmd);
+ if (pmd_none(*vmf->pmd)) {
+ folio_get(folio);
+ folio_add_file_rmap_pmd(folio, &folio->page, vma);
+ add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
+ }
+ error = insert_pfn_pmd(vma, addr, vmf->pmd,
+ pfn_to_pfn_t(folio_pfn(folio)), vma->vm_page_prot,
+ write, pgtable);
+ spin_unlock(ptl);
+ if (error && pgtable)
+ pte_free(mm, pgtable);
+
+ return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(vmf_insert_folio_pmd);
+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
{