summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/dump_pagetables.c71
-rw-r--r--arch/x86/mm/init_64.c15
-rw-r--r--arch/x86/mm/ioremap.c7
-rw-r--r--arch/x86/mm/pat/memtype.c194
-rw-r--r--arch/x86/mm/pat/memtype_interval.c63
-rw-r--r--arch/x86/mm/pat/set_memory.c13
-rw-r--r--arch/x86/mm/pgtable.c9
7 files changed, 130 insertions, 242 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 89079ea73e65..a4700ef6eb64 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -266,6 +266,32 @@ static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
st->prot_levels[level] = effective;
}
+static void effective_prot_pte(struct ptdump_state *st, pte_t pte)
+{
+ effective_prot(st, 4, pte_val(pte));
+}
+
+static void effective_prot_pmd(struct ptdump_state *st, pmd_t pmd)
+{
+ effective_prot(st, 3, pmd_val(pmd));
+}
+
+static void effective_prot_pud(struct ptdump_state *st, pud_t pud)
+{
+ effective_prot(st, 2, pud_val(pud));
+}
+
+static void effective_prot_p4d(struct ptdump_state *st, p4d_t p4d)
+{
+ effective_prot(st, 1, p4d_val(p4d));
+}
+
+static void effective_prot_pgd(struct ptdump_state *st, pgd_t pgd)
+{
+ effective_prot(st, 0, pgd_val(pgd));
+}
+
+
/*
* This function gets called on a break in a continuous series
* of PTE entries; the next one is different so we need to
@@ -362,6 +388,38 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
}
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+ note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+ note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+ note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+ note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+ note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+ pte_t pte_zero = {0};
+
+ note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
bool ptdump_walk_pgd_level_core(struct seq_file *m,
struct mm_struct *mm, pgd_t *pgd,
bool checkwx, bool dmesg)
@@ -378,8 +436,17 @@ bool ptdump_walk_pgd_level_core(struct seq_file *m,
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
- .effective_prot = effective_prot,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
+ .effective_prot_pte = effective_prot_pte,
+ .effective_prot_pmd = effective_prot_pmd,
+ .effective_prot_pud = effective_prot_pud,
+ .effective_prot_p4d = effective_prot_p4d,
+ .effective_prot_pgd = effective_prot_pgd,
.range = ptdump_ranges
},
.level = -1,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 66330fe4e18c..ee66fae9ebcc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1467,16 +1467,21 @@ static unsigned long probe_memory_block_size(void)
}
/*
- * Use max block size to minimize overhead on bare metal, where
- * alignment for memory hotplug isn't a concern.
+ * When hotplug alignment is not a concern, maximize blocksize
+ * to minimize overhead. Otherwise, align to the lesser of advice
+ * alignment and end of memory alignment.
*/
- if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ bz = memory_block_advised_max_size();
+ if (!bz) {
bz = MAX_BLOCK_SIZE;
- goto done;
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ goto done;
+ } else {
+ bz = max(min(bz, MAX_BLOCK_SIZE), MIN_MEMORY_BLOCK_SIZE);
}
/* Find the largest allowed block size that aligns to memory end */
- for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
+ for (; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
if (IS_ALIGNED(boot_mem_end, bz))
break;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 331e101bf801..12c8180ca1ba 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -71,7 +71,7 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
static unsigned int __ioremap_check_ram(struct resource *res)
{
unsigned long start_pfn, stop_pfn;
- unsigned long i;
+ unsigned long pfn;
if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
return 0;
@@ -79,9 +79,8 @@ static unsigned int __ioremap_check_ram(struct resource *res)
start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
stop_pfn = (res->end + 1) >> PAGE_SHIFT;
if (stop_pfn > start_pfn) {
- for (i = 0; i < (stop_pfn - start_pfn); ++i)
- if (pfn_valid(start_pfn + i) &&
- !PageReserved(pfn_to_page(start_pfn + i)))
+ for_each_valid_pfn(pfn, start_pfn, stop_pfn)
+ if (!PageReserved(pfn_to_page(pfn)))
return IORES_MAP_SYSTEM_RAM;
}
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index c97b527c66fe..2e7923844afe 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -775,6 +775,12 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
return vma_prot;
}
+static inline void pgprot_set_cachemode(pgprot_t *prot, enum page_cache_mode pcm)
+{
+ *prot = __pgprot((pgprot_val(*prot) & ~_PAGE_CACHE_MASK) |
+ cachemode2protval(pcm));
+}
+
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
@@ -789,8 +795,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (file->f_flags & O_DSYNC)
pcm = _PAGE_CACHE_MODE_UC_MINUS;
- *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
- cachemode2protval(pcm));
+ pgprot_set_cachemode(vma_prot, pcm);
return 1;
}
@@ -831,8 +836,7 @@ int memtype_kernel_map_sync(u64 base, unsigned long size,
* Reserved non RAM regions only and after successful memtype_reserve,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
-static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
- int strict_prot)
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot)
{
int is_ram = 0;
int ret;
@@ -858,9 +862,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
(unsigned long long)paddr,
(unsigned long long)(paddr + size - 1),
cattr_name(pcm));
- *vma_prot = __pgprot((pgprot_val(*vma_prot) &
- (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
+ pgprot_set_cachemode(vma_prot, pcm);
}
return 0;
}
@@ -870,8 +872,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
return ret;
if (pcm != want_pcm) {
- if (strict_prot ||
- !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
+ if (!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
memtype_free(paddr, paddr + size);
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
@@ -881,13 +882,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
cattr_name(pcm));
return -EINVAL;
}
- /*
- * We allow returning different type than the one requested in
- * non strict case.
- */
- *vma_prot = __pgprot((pgprot_val(*vma_prot) &
- (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
+ pgprot_set_cachemode(vma_prot, pcm);
}
if (memtype_kernel_map_sync(paddr, size, pcm) < 0) {
@@ -910,124 +905,14 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
-static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
- resource_size_t *phys)
-{
- struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
-
- if (follow_pfnmap_start(&args))
- return -EINVAL;
-
- /* Never return PFNs of anon folios in COW mappings. */
- if (!args.special) {
- follow_pfnmap_end(&args);
- return -EINVAL;
- }
-
- *prot = pgprot_val(args.pgprot);
- *phys = (resource_size_t)args.pfn << PAGE_SHIFT;
- follow_pfnmap_end(&args);
- return 0;
-}
-
-static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
- pgprot_t *pgprot)
-{
- unsigned long prot;
-
- VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
-
- /*
- * We need the starting PFN and cachemode used for track_pfn_remap()
- * that covered the whole VMA. For most mappings, we can obtain that
- * information from the page tables. For COW mappings, we might now
- * suddenly have anon folios mapped and follow_phys() will fail.
- *
- * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
- * detect the PFN. If we need the cachemode as well, we're out of luck
- * for now and have to fail fork().
- */
- if (!follow_phys(vma, &prot, paddr)) {
- if (pgprot)
- *pgprot = __pgprot(prot);
- return 0;
- }
- if (is_cow_mapping(vma->vm_flags)) {
- if (pgprot)
- return -EINVAL;
- *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- return 0;
- }
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-int track_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma, unsigned long *pfn)
-{
- const unsigned long vma_size = src_vma->vm_end - src_vma->vm_start;
- resource_size_t paddr;
- pgprot_t pgprot;
- int rc;
-
- if (!(src_vma->vm_flags & VM_PAT))
- return 0;
-
- /*
- * Duplicate the PAT information for the dst VMA based on the src
- * VMA.
- */
- if (get_pat_info(src_vma, &paddr, &pgprot))
- return -EINVAL;
- rc = reserve_pfn_range(paddr, vma_size, &pgprot, 1);
- if (rc)
- return rc;
-
- /* Reservation for the destination VMA succeeded. */
- vm_flags_set(dst_vma, VM_PAT);
- *pfn = PHYS_PFN(paddr);
- return 0;
-}
-
-void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn)
-{
- untrack_pfn(dst_vma, pfn, dst_vma->vm_end - dst_vma->vm_start, true);
- /*
- * Reservation was freed, any copied page tables will get cleaned
- * up later, but without getting PAT involved again.
- */
-}
-
-/*
- * prot is passed in as a parameter for the new mapping. If the vma has
- * a linear pfn mapping for the entire range, or no vma is provided,
- * reserve the entire pfn + size range with single reserve_pfn_range
- * call.
- */
-int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long addr, unsigned long size)
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot)
{
resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
enum page_cache_mode pcm;
- /* reserve the whole chunk starting from paddr */
- if (!vma || (addr == vma->vm_start
- && size == (vma->vm_end - vma->vm_start))) {
- int ret;
-
- ret = reserve_pfn_range(paddr, size, prot, 0);
- if (ret == 0 && vma)
- vm_flags_set(vma, VM_PAT);
- return ret;
- }
-
if (!pat_enabled())
return 0;
- /*
- * For anything smaller than the vma size we set prot based on the
- * lookup.
- */
pcm = lookup_memtype(paddr);
/* Check memtype for the remaining pages */
@@ -1038,70 +923,35 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return -EINVAL;
}
- *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
-
+ pgprot_set_cachemode(prot, pcm);
return 0;
}
-void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot)
{
- enum page_cache_mode pcm;
+ const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
- if (!pat_enabled())
- return;
-
- /* Set prot based on lookup */
- pcm = lookup_memtype(pfn_t_to_phys(pfn));
- *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
+ return reserve_pfn_range(paddr, size, prot);
}
-/*
- * untrack_pfn is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case pfn, size are zero).
- */
-void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size, bool mm_wr_locked)
+void pfnmap_untrack(unsigned long pfn, unsigned long size)
{
- resource_size_t paddr;
-
- if (vma && !(vma->vm_flags & VM_PAT))
- return;
+ const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
- /* free the chunk starting from pfn or the whole chunk */
- paddr = (resource_size_t)pfn << PAGE_SHIFT;
- if (!paddr && !size) {
- if (get_pat_info(vma, &paddr, NULL))
- return;
- size = vma->vm_end - vma->vm_start;
- }
free_pfn_range(paddr, size);
- if (vma) {
- if (mm_wr_locked)
- vm_flags_clear(vma, VM_PAT);
- else
- __vm_flags_mod(vma, 0, VM_PAT);
- }
-}
-
-void untrack_pfn_clear(struct vm_area_struct *vma)
-{
- vm_flags_clear(vma, VM_PAT);
}
pgprot_t pgprot_writecombine(pgprot_t prot)
{
- return __pgprot(pgprot_val(prot) |
- cachemode2protval(_PAGE_CACHE_MODE_WC));
+ pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WC);
+ return prot;
}
EXPORT_SYMBOL_GPL(pgprot_writecombine);
pgprot_t pgprot_writethrough(pgprot_t prot)
{
- return __pgprot(pgprot_val(prot) |
- cachemode2protval(_PAGE_CACHE_MODE_WT));
+ pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WT);
+ return prot;
}
EXPORT_SYMBOL_GPL(pgprot_writethrough);
diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c
index 645613d59942..e5844ed1311e 100644
--- a/arch/x86/mm/pat/memtype_interval.c
+++ b/arch/x86/mm/pat/memtype_interval.c
@@ -49,32 +49,6 @@ INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_match(u64 start, u64 end, int match_type)
-{
- struct memtype *entry_match;
-
- entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
-
- while (entry_match != NULL && entry_match->start < end) {
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (entry_match->start == start) && (entry_match->end == end))
- return entry_match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (entry_match->start < start) && (entry_match->end == end))
- return entry_match;
-
- entry_match = interval_iter_next(entry_match, start, end-1);
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
static int memtype_check_conflict(u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
@@ -130,35 +104,16 @@ int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_ty
struct memtype *memtype_erase(u64 start, u64 end)
{
- struct memtype *entry_old;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
- if (!entry_old) {
- entry_old = memtype_match(start, end, MEMTYPE_END_MATCH);
- if (!entry_old)
- return ERR_PTR(-EINVAL);
+ struct memtype *entry = interval_iter_first(&memtype_rbroot, start, end - 1);
+
+ while (entry && entry->start < end) {
+ if (entry->start == start && entry->end == end) {
+ interval_remove(entry, &memtype_rbroot);
+ return entry;
+ }
+ entry = interval_iter_next(entry, start, end - 1);
}
-
- if (entry_old->start == start) {
- /* munmap: erase this node */
- interval_remove(entry_old, &memtype_rbroot);
- } else {
- /* mremap: update the end value of this node */
- interval_remove(entry_old, &memtype_rbroot);
- entry_old->end = start;
- interval_insert(entry_old, &memtype_rbroot);
-
- return NULL;
- }
-
- return entry_old;
+ return ERR_PTR(-EINVAL);
}
struct memtype *memtype_lookup(u64 addr)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 30ab4aced761..46edc11726b7 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2148,6 +2148,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages,
CPA_PAGES_ARRAY, pages);
}
+/*
+ * __set_memory_prot is an internal helper for callers that have been passed
+ * a pgprot_t value from upper layers and a reservation has already been taken.
+ * If you want to set the pgprot to a specific page protocol, use the
+ * set_memory_xx() functions.
+ */
+int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot)
+{
+ return change_page_attr_set_clr(&addr, numpages, prot,
+ __pgprot(~pgprot_val(prot)), 0, 0,
+ NULL);
+}
+
int _set_memory_uc(unsigned long addr, int numpages)
{
/*
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 62777ba4de1a..ddf248c3ee7d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -189,7 +189,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
if (!ptdesc)
failed = true;
- if (ptdesc && !pagetable_pmd_ctor(ptdesc)) {
+ if (ptdesc && !pagetable_pmd_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
ptdesc = NULL;
failed = true;
@@ -751,14 +751,13 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
for (i = 0; i < PTRS_PER_PMD; i++) {
if (!pmd_none(pmd_sv[i])) {
pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
- free_page((unsigned long)pte);
+ pte_free_kernel(&init_mm, pte);
}
}
free_page((unsigned long)pmd_sv);
- pagetable_dtor(virt_to_ptdesc(pmd));
- free_page((unsigned long)pmd);
+ pmd_free(&init_mm, pmd);
return 1;
}
@@ -781,7 +780,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
/* INVLPG to clear all paging-structure caches */
flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
- free_page((unsigned long)pte);
+ pte_free_kernel(&init_mm, pte);
return 1;
}