diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 71 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 15 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype.c | 194 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype_interval.c | 63 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 9 |
7 files changed, 130 insertions, 242 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 89079ea73e65..a4700ef6eb64 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -266,6 +266,32 @@ static void effective_prot(struct ptdump_state *pt_st, int level, u64 val) st->prot_levels[level] = effective; } +static void effective_prot_pte(struct ptdump_state *st, pte_t pte) +{ + effective_prot(st, 4, pte_val(pte)); +} + +static void effective_prot_pmd(struct ptdump_state *st, pmd_t pmd) +{ + effective_prot(st, 3, pmd_val(pmd)); +} + +static void effective_prot_pud(struct ptdump_state *st, pud_t pud) +{ + effective_prot(st, 2, pud_val(pud)); +} + +static void effective_prot_p4d(struct ptdump_state *st, p4d_t p4d) +{ + effective_prot(st, 1, p4d_val(p4d)); +} + +static void effective_prot_pgd(struct ptdump_state *st, pgd_t pgd) +{ + effective_prot(st, 0, pgd_val(pgd)); +} + + /* * This function gets called on a break in a continuous series * of PTE entries; the next one is different so we need to @@ -362,6 +388,38 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +static void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + bool ptdump_walk_pgd_level_core(struct seq_file *m, struct mm_struct *mm, pgd_t *pgd, bool checkwx, bool dmesg) @@ -378,8 +436,17 @@ bool ptdump_walk_pgd_level_core(struct seq_file *m, struct pg_state st = { .ptdump = { - .note_page = note_page, - .effective_prot = effective_prot, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, + .effective_prot_pte = effective_prot_pte, + .effective_prot_pmd = effective_prot_pmd, + .effective_prot_pud = effective_prot_pud, + .effective_prot_p4d = effective_prot_p4d, + .effective_prot_pgd = effective_prot_pgd, .range = ptdump_ranges }, .level = -1, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 66330fe4e18c..ee66fae9ebcc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1467,16 +1467,21 @@ static unsigned long probe_memory_block_size(void) } /* - * Use max block size to minimize overhead on bare metal, where - * alignment for memory hotplug isn't a concern. + * When hotplug alignment is not a concern, maximize blocksize + * to minimize overhead. Otherwise, align to the lesser of advice + * alignment and end of memory alignment. */ - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + bz = memory_block_advised_max_size(); + if (!bz) { bz = MAX_BLOCK_SIZE; - goto done; + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + goto done; + } else { + bz = max(min(bz, MAX_BLOCK_SIZE), MIN_MEMORY_BLOCK_SIZE); } /* Find the largest allowed block size that aligns to memory end */ - for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { + for (; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { if (IS_ALIGNED(boot_mem_end, bz)) break; } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 331e101bf801..12c8180ca1ba 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -71,7 +71,7 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, static unsigned int __ioremap_check_ram(struct resource *res) { unsigned long start_pfn, stop_pfn; - unsigned long i; + unsigned long pfn; if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) return 0; @@ -79,9 +79,8 @@ static unsigned int __ioremap_check_ram(struct resource *res) start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; stop_pfn = (res->end + 1) >> PAGE_SHIFT; if (stop_pfn > start_pfn) { - for (i = 0; i < (stop_pfn - start_pfn); ++i) - if (pfn_valid(start_pfn + i) && - !PageReserved(pfn_to_page(start_pfn + i))) + for_each_valid_pfn(pfn, start_pfn, stop_pfn) + if (!PageReserved(pfn_to_page(pfn))) return IORES_MAP_SYSTEM_RAM; } diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index c97b527c66fe..2e7923844afe 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -775,6 +775,12 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } +static inline void pgprot_set_cachemode(pgprot_t *prot, enum page_cache_mode pcm) +{ + *prot = __pgprot((pgprot_val(*prot) & ~_PAGE_CACHE_MASK) | + cachemode2protval(pcm)); +} + int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { @@ -789,8 +795,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (file->f_flags & O_DSYNC) pcm = _PAGE_CACHE_MODE_UC_MINUS; - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | - cachemode2protval(pcm)); + pgprot_set_cachemode(vma_prot, pcm); return 1; } @@ -831,8 +836,7 @@ int memtype_kernel_map_sync(u64 base, unsigned long size, * Reserved non RAM regions only and after successful memtype_reserve, * this func also keeps identity mapping (if any) in sync with this new prot. */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, - int strict_prot) +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot) { int is_ram = 0; int ret; @@ -858,9 +862,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, (unsigned long long)paddr, (unsigned long long)(paddr + size - 1), cattr_name(pcm)); - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); + pgprot_set_cachemode(vma_prot, pcm); } return 0; } @@ -870,8 +872,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, return ret; if (pcm != want_pcm) { - if (strict_prot || - !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { + if (!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { memtype_free(paddr, paddr + size); pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, @@ -881,13 +882,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, cattr_name(pcm)); return -EINVAL; } - /* - * We allow returning different type than the one requested in - * non strict case. - */ - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); + pgprot_set_cachemode(vma_prot, pcm); } if (memtype_kernel_map_sync(paddr, size, pcm) < 0) { @@ -910,124 +905,14 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } -static int follow_phys(struct vm_area_struct *vma, unsigned long *prot, - resource_size_t *phys) -{ - struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start }; - - if (follow_pfnmap_start(&args)) - return -EINVAL; - - /* Never return PFNs of anon folios in COW mappings. */ - if (!args.special) { - follow_pfnmap_end(&args); - return -EINVAL; - } - - *prot = pgprot_val(args.pgprot); - *phys = (resource_size_t)args.pfn << PAGE_SHIFT; - follow_pfnmap_end(&args); - return 0; -} - -static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, - pgprot_t *pgprot) -{ - unsigned long prot; - - VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); - - /* - * We need the starting PFN and cachemode used for track_pfn_remap() - * that covered the whole VMA. For most mappings, we can obtain that - * information from the page tables. For COW mappings, we might now - * suddenly have anon folios mapped and follow_phys() will fail. - * - * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to - * detect the PFN. If we need the cachemode as well, we're out of luck - * for now and have to fail fork(). - */ - if (!follow_phys(vma, &prot, paddr)) { - if (pgprot) - *pgprot = __pgprot(prot); - return 0; - } - if (is_cow_mapping(vma->vm_flags)) { - if (pgprot) - return -EINVAL; - *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return 0; - } - WARN_ON_ONCE(1); - return -EINVAL; -} - -int track_pfn_copy(struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, unsigned long *pfn) -{ - const unsigned long vma_size = src_vma->vm_end - src_vma->vm_start; - resource_size_t paddr; - pgprot_t pgprot; - int rc; - - if (!(src_vma->vm_flags & VM_PAT)) - return 0; - - /* - * Duplicate the PAT information for the dst VMA based on the src - * VMA. - */ - if (get_pat_info(src_vma, &paddr, &pgprot)) - return -EINVAL; - rc = reserve_pfn_range(paddr, vma_size, &pgprot, 1); - if (rc) - return rc; - - /* Reservation for the destination VMA succeeded. */ - vm_flags_set(dst_vma, VM_PAT); - *pfn = PHYS_PFN(paddr); - return 0; -} - -void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) -{ - untrack_pfn(dst_vma, pfn, dst_vma->vm_end - dst_vma->vm_start, true); - /* - * Reservation was freed, any copied page tables will get cleaned - * up later, but without getting PAT involved again. - */ -} - -/* - * prot is passed in as a parameter for the new mapping. If the vma has - * a linear pfn mapping for the entire range, or no vma is provided, - * reserve the entire pfn + size range with single reserve_pfn_range - * call. - */ -int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, unsigned long size) +int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot) { resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; enum page_cache_mode pcm; - /* reserve the whole chunk starting from paddr */ - if (!vma || (addr == vma->vm_start - && size == (vma->vm_end - vma->vm_start))) { - int ret; - - ret = reserve_pfn_range(paddr, size, prot, 0); - if (ret == 0 && vma) - vm_flags_set(vma, VM_PAT); - return ret; - } - if (!pat_enabled()) return 0; - /* - * For anything smaller than the vma size we set prot based on the - * lookup. - */ pcm = lookup_memtype(paddr); /* Check memtype for the remaining pages */ @@ -1038,70 +923,35 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return -EINVAL; } - *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); - + pgprot_set_cachemode(prot, pcm); return 0; } -void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn) +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot) { - enum page_cache_mode pcm; + const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; - if (!pat_enabled()) - return; - - /* Set prot based on lookup */ - pcm = lookup_memtype(pfn_t_to_phys(pfn)); - *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); + return reserve_pfn_range(paddr, size, prot); } -/* - * untrack_pfn is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case pfn, size are zero). - */ -void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size, bool mm_wr_locked) +void pfnmap_untrack(unsigned long pfn, unsigned long size) { - resource_size_t paddr; - - if (vma && !(vma->vm_flags & VM_PAT)) - return; + const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; - /* free the chunk starting from pfn or the whole chunk */ - paddr = (resource_size_t)pfn << PAGE_SHIFT; - if (!paddr && !size) { - if (get_pat_info(vma, &paddr, NULL)) - return; - size = vma->vm_end - vma->vm_start; - } free_pfn_range(paddr, size); - if (vma) { - if (mm_wr_locked) - vm_flags_clear(vma, VM_PAT); - else - __vm_flags_mod(vma, 0, VM_PAT); - } -} - -void untrack_pfn_clear(struct vm_area_struct *vma) -{ - vm_flags_clear(vma, VM_PAT); } pgprot_t pgprot_writecombine(pgprot_t prot) { - return __pgprot(pgprot_val(prot) | - cachemode2protval(_PAGE_CACHE_MODE_WC)); + pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WC); + return prot; } EXPORT_SYMBOL_GPL(pgprot_writecombine); pgprot_t pgprot_writethrough(pgprot_t prot) { - return __pgprot(pgprot_val(prot) | - cachemode2protval(_PAGE_CACHE_MODE_WT)); + pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WT); + return prot; } EXPORT_SYMBOL_GPL(pgprot_writethrough); diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c index 645613d59942..e5844ed1311e 100644 --- a/arch/x86/mm/pat/memtype_interval.c +++ b/arch/x86/mm/pat/memtype_interval.c @@ -49,32 +49,6 @@ INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_match(u64 start, u64 end, int match_type) -{ - struct memtype *entry_match; - - entry_match = interval_iter_first(&memtype_rbroot, start, end-1); - - while (entry_match != NULL && entry_match->start < end) { - if ((match_type == MEMTYPE_EXACT_MATCH) && - (entry_match->start == start) && (entry_match->end == end)) - return entry_match; - - if ((match_type == MEMTYPE_END_MATCH) && - (entry_match->start < start) && (entry_match->end == end)) - return entry_match; - - entry_match = interval_iter_next(entry_match, start, end-1); - } - - return NULL; /* Returns NULL if there is no match */ -} - static int memtype_check_conflict(u64 start, u64 end, enum page_cache_mode reqtype, enum page_cache_mode *newtype) @@ -130,35 +104,16 @@ int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_ty struct memtype *memtype_erase(u64 start, u64 end) { - struct memtype *entry_old; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!entry_old) { - entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!entry_old) - return ERR_PTR(-EINVAL); + struct memtype *entry = interval_iter_first(&memtype_rbroot, start, end - 1); + + while (entry && entry->start < end) { + if (entry->start == start && entry->end == end) { + interval_remove(entry, &memtype_rbroot); + return entry; + } + entry = interval_iter_next(entry, start, end - 1); } - - if (entry_old->start == start) { - /* munmap: erase this node */ - interval_remove(entry_old, &memtype_rbroot); - } else { - /* mremap: update the end value of this node */ - interval_remove(entry_old, &memtype_rbroot); - entry_old->end = start; - interval_insert(entry_old, &memtype_rbroot); - - return NULL; - } - - return entry_old; + return ERR_PTR(-EINVAL); } struct memtype *memtype_lookup(u64 addr) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 30ab4aced761..46edc11726b7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2148,6 +2148,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * __set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 62777ba4de1a..ddf248c3ee7d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -189,7 +189,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) if (!ptdesc) failed = true; - if (ptdesc && !pagetable_pmd_ctor(ptdesc)) { + if (ptdesc && !pagetable_pmd_ctor(mm, ptdesc)) { pagetable_free(ptdesc); ptdesc = NULL; failed = true; @@ -751,14 +751,13 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(pmd_sv[i])) { pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); - free_page((unsigned long)pte); + pte_free_kernel(&init_mm, pte); } } free_page((unsigned long)pmd_sv); - pagetable_dtor(virt_to_ptdesc(pmd)); - free_page((unsigned long)pmd); + pmd_free(&init_mm, pmd); return 1; } @@ -781,7 +780,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); - free_page((unsigned long)pte); + pte_free_kernel(&init_mm, pte); return 1; } |