diff options
Diffstat (limited to 'arch/arm64')
27 files changed, 377 insertions, 211 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bcc1773fec77..55fc331af337 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1616,6 +1616,9 @@ config ARCH_SUPPORTS_KEXEC_IMAGE_VERIFY_SIG config ARCH_DEFAULT_KEXEC_IMAGE_VERIFY_SIG def_bool y +config ARCH_SUPPORTS_KEXEC_HANDOVER + def_bool y + config ARCH_SUPPORTS_CRASH_DUMP def_bool y diff --git a/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts b/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts index 712f29fbe85e..b9a0f7ac4d9c 100644 --- a/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts +++ b/arch/arm64/boot/dts/qcom/qcm6490-shift-otter.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause /* * Copyright (c) 2023, Luca Weiss <luca.weiss@fairphone.com> - * Copyright (c) 2024, Caleb Connolly <caleb@postmarketos.org> + * Copyright (c) 2024, Casey Connolly <casey.connolly@linaro.org> */ /dts-v1/; diff --git a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts index e5da58d11064..2cf7b5e1243c 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-shift-axolotl.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2022, Alexander Martinz <amartinz@shiftphones.com> - * Copyright (c) 2022, Caleb Connolly <caleb@connolly.tech> + * Copyright (c) 2022, Casey Connolly <casey.connolly@linaro.org> * Copyright (c) 2022, Dylan Van Assche <me@dylanvanassche.be> */ diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 1e99db100607..897fc686e6a9 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -774,7 +774,7 @@ CONFIG_MFD_MT6397=y CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_RK8XX_I2C=y CONFIG_MFD_RK8XX_SPI=y -CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_SEC_I2C=y CONFIG_MFD_SL28CPLD=y CONFIG_RZ_MTU3=y CONFIG_MFD_TI_AM335X_TSCADC=m diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index 4e27cc29c79e..4fdc26ade1d7 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -28,6 +28,48 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) } EXPORT_SYMBOL_GPL(hv_get_hypervisor_version); +#ifdef CONFIG_ACPI + +static bool __init hyperv_detect_via_acpi(void) +{ + if (acpi_disabled) + return false; + /* + * Hypervisor ID is only available in ACPI v6+, and the + * structure layout was extended in v6 to accommodate that + * new field. + * + * At the very minimum, this check makes sure not to read + * past the FADT structure. + * + * It is also needed to catch running in some unknown + * non-Hyper-V environment that has ACPI 5.x or less. + * In such a case, it can't be Hyper-V. + */ + if (acpi_gbl_FADT.header.revision < 6) + return false; + return strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8) == 0; +} + +#else + +static bool __init hyperv_detect_via_acpi(void) +{ + return false; +} + +#endif + +static bool __init hyperv_detect_via_smccc(void) +{ + uuid_t hyperv_uuid = UUID_INIT( + 0x58ba324d, 0x6447, 0x24cd, + 0x75, 0x6c, 0xef, 0x8e, + 0x24, 0x70, 0x59, 0x16); + + return arm_smccc_hypervisor_has_uuid(&hyperv_uuid); +} + static int __init hyperv_init(void) { struct hv_get_vp_registers_output result; @@ -36,13 +78,11 @@ static int __init hyperv_init(void) /* * Allow for a kernel built with CONFIG_HYPERV to be running in - * a non-Hyper-V environment, including on DT instead of ACPI. + * a non-Hyper-V environment. + * * In such cases, do nothing and return success. */ - if (acpi_disabled) - return 0; - - if (strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8)) + if (!hyperv_detect_via_acpi() && !hyperv_detect_via_smccc()) return 0; /* Setup the guest ID */ @@ -77,6 +117,9 @@ static int __init hyperv_init(void) if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID) hv_get_partition_id(); + ms_hyperv.vtl = get_vtl(); + if (ms_hyperv.vtl > 0) /* non default VTL */ + pr_info("Linux runs in Hyper-V Virtual Trust Level %d\n", ms_hyperv.vtl); ms_hyperv_late_init(); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d941abc6b5ee..6ce2c5173482 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1320,9 +1320,6 @@ int __init populate_sysreg_config(const struct sys_reg_desc *sr, unsigned int idx); int __init populate_nv_trap_config(void); -bool lock_all_vcpus(struct kvm *kvm); -void unlock_all_vcpus(struct kvm *kvm); - void kvm_calculate_traps(struct kvm_vcpu *vcpu); /* MMIO helpers */ diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 6d6d4065b0cb..265e8301d7ba 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -11,11 +11,19 @@ #include <asm/types.h> -typedef u64 pteval_t; -typedef u64 pmdval_t; -typedef u64 pudval_t; -typedef u64 p4dval_t; -typedef u64 pgdval_t; +/* + * Page Table Descriptor + * + * Generic page table descriptor format from which + * all level specific descriptors can be derived. + */ +typedef u64 ptdesc_t; + +typedef ptdesc_t pteval_t; +typedef ptdesc_t pmdval_t; +typedef ptdesc_t pudval_t; +typedef ptdesc_t p4dval_t; +typedef ptdesc_t pgdval_t; /* * These are used to make use of C type-checking.. @@ -46,7 +54,7 @@ typedef struct { pgdval_t pgd; } pgd_t; #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) -typedef struct { pteval_t pgprot; } pgprot_t; +typedef struct { ptdesc_t pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5285757ee0c1..88db8a0c0b37 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -673,7 +673,6 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd) #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) #define pud_young(pud) pte_young(pud_pte(pud)) #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) @@ -906,12 +905,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) - #if CONFIG_PGTABLE_LEVELS > 2 #define pmd_ERROR(e) \ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index b2931d1ae0fb..fded5358641f 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -24,8 +24,8 @@ struct ptdump_info { }; struct ptdump_prot_bits { - u64 mask; - u64 val; + ptdesc_t mask; + ptdesc_t val; const char *set; const char *clear; }; @@ -34,7 +34,7 @@ struct ptdump_pg_level { const struct ptdump_prot_bits *bits; char name[4]; int num; - u64 mask; + ptdesc_t mask; }; /* @@ -51,7 +51,7 @@ struct ptdump_pg_state { const struct mm_struct *mm; unsigned long start_address; int level; - u64 current_prot; + ptdesc_t current_prot; bool check_wx; unsigned long wx_pages; unsigned long uxn_pages; @@ -59,7 +59,13 @@ struct ptdump_pg_state { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - u64 val); + pteval_t val); +void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte); +void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd); +void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud); +void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d); +void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd); +void note_page_flush(struct ptdump_state *st); #ifdef CONFIG_PTDUMP_DEBUGFS #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); @@ -69,7 +75,13 @@ static inline void ptdump_debugfs_register(struct ptdump_info *info, #endif /* CONFIG_PTDUMP_DEBUGFS */ #else static inline void note_page(struct ptdump_state *pt_st, unsigned long addr, - int level, u64 val) { } + int level, pteval_t val) { } +static inline void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte) { } +static inline void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd) { } +static inline void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud) { } +static inline void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d) { } +static inline void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd) { } +static inline void note_page_flush(struct ptdump_state *st) { } #endif /* CONFIG_PTDUMP */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ab8e14b96f68..712daa90e643 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -61,6 +61,22 @@ static inline void syscall_set_return_value(struct task_struct *task, regs->regs[0] = val; } +static inline void syscall_set_nr(struct task_struct *task, + struct pt_regs *regs, + int nr) +{ + regs->syscallno = nr; + if (nr == -1) { + /* + * When the syscall number is set to -1, the syscall will be + * skipped. In this case the syscall return value has to be + * set explicitly, otherwise the first syscall argument is + * returned as the syscall return value. + */ + syscall_set_return_value(task, regs, -ENOSYS, 0); + } +} + #define SYSCALL_MAX_ARGS 6 static inline void syscall_get_arguments(struct task_struct *task, @@ -73,6 +89,19 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + const unsigned long *args) +{ + memcpy(®s->regs[0], args, 6 * sizeof(args[0])); + /* + * Also copy the first argument into orig_x0 + * so that syscall_get_arguments() would return it + * instead of the previous value. + */ + regs->orig_x0 = regs->regs[0]; +} + /* * We don't care about endianness (__AUDIT_ARCH_LE bit) here because * AArch64 has the same system calls both on little- and big- endian. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd853801a8f7..f1bb0d10c39a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -12,6 +12,7 @@ #include <linux/bits.h> #include <linux/stringify.h> #include <linux/kasan-tags.h> +#include <linux/kconfig.h> #include <asm/gpr-num.h> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 250e9d7c08a7..3857fd7ee8d4 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -29,7 +29,7 @@ static bool region_is_misaligned(const efi_memory_desc_t *md) * executable, everything else can be mapped with the XN bits * set. Also take the new (optional) RO/XP bits into account. */ -static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) +static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md) { u64 attr = md->attribute; u32 type = md->type; @@ -83,7 +83,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { - pteval_t prot_val = create_mapping_protection(md); + ptdesc_t prot_val = create_mapping_protection(md); bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_RUNTIME_SERVICES_DATA); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index c6650cfe706c..0f4bd7771859 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -159,7 +159,7 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr) static void __init remap_idmap_for_lpa2(void) { /* clear the bits that change meaning once LPA2 is turned on */ - pteval_t mask = PTE_SHARED; + ptdesc_t mask = PTE_SHARED; /* * We have to clear bits [9:8] in all block or page descriptors in the diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c index 81345f68f9fc..7982788e7b9a 100644 --- a/arch/arm64/kernel/pi/map_range.c +++ b/arch/arm64/kernel/pi/map_range.c @@ -30,7 +30,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, u64 va_offset) { u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX; - pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; + ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; int lshift = (3 - level) * PTDESC_TABLE_SHIFT; u64 lmask = (PAGE_SIZE << lshift) - 1; @@ -87,7 +87,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, } } -asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask) +asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask) { u64 ptep = (u64)pg_dir + PAGE_SIZE; pgprot_t text_prot = PAGE_KERNEL_ROX; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 1f4731a4e17e..46cafee7829f 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -34,4 +34,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot, asmlinkage void early_map_kernel(u64 boot_status, void *fdt); -asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask); +asmlinkage u64 create_init_idmap(pgd_t *pgd, ptdesc_t clrmask); diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 5133dcbfe9f7..fdbc8beec930 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1766,7 +1766,7 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, mutex_lock(&kvm->lock); - if (lock_all_vcpus(kvm)) { + if (!kvm_trylock_all_vcpus(kvm)) { set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); /* @@ -1778,7 +1778,7 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, kvm->arch.timer_data.voffset = offset->counter_offset; kvm->arch.timer_data.poffset = offset->counter_offset; - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); } else { ret = -EBUSY; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 36cfcffb40d8..de2b4e9c9f9f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1924,49 +1924,6 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) } } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - lockdep_assert_held(&kvm->lock); - - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - lockdep_assert_held(&kvm->lock); - - /* - * Any time a vcpu is in an ioctl (including running), the - * core KVM code tries to grab the vcpu->mutex. - * - * By grabbing the vcpu->mutex of all VCPUs we ensure that no - * other VCPUs can fiddle with the state while we access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - static unsigned long nvhe_percpu_size(void) { return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - @@ -2790,6 +2747,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, &irqfd->irq_entry); } + void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { @@ -2800,8 +2758,29 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, if (irq_entry->type != KVM_IRQ_ROUTING_MSI) return; - kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, - &irqfd->irq_entry); + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq); +} + +bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new) +{ + if (new->type != KVM_IRQ_ROUTING_MSI) + return true; + + return memcmp(&old->msi, &new->msi, sizeof(new->msi)); +} + +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + /* + * Remapping the vLPI requires taking the its_lock mutex to resolve + * the new translation. We're in spinlock land at this point, so no + * chance of resolving the translation. + * + * Unmap the vLPI and fall back to software LPI injection. + */ + return kvm_vgic_v4_unset_forwarding(kvm, host_irq); } void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 569941eeb3fe..58c5fe7d7572 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -270,6 +270,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) u32 feature; u8 action; gpa_t gpa; + uuid_t uuid; action = kvm_smccc_get_action(vcpu, func_id); switch (action) { @@ -355,10 +356,11 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) val[0] = gpa; break; case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: - val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0; - val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1; - val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2; - val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3; + uuid = ARM_SMCCC_VENDOR_HYP_UID_KVM; + val[0] = smccc_uuid_to_reg(&uuid, 0); + val[1] = smccc_uuid_to_reg(&uuid, 1); + val[2] = smccc_uuid_to_reg(&uuid, 2); + val[3] = smccc_uuid_to_reg(&uuid, 3); break; case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: val[0] = smccc_feat->vendor_hyp_bmap; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 291dbe38eb5c..4a53e4147fb0 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -918,6 +918,8 @@ static void invalidate_vncr_va(struct kvm *kvm, } } +#define tlbi_va_s1_to_va(v) (u64)sign_extend64((v) << 12, 48) + static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val, struct s1e2_tlbi_scope *scope) { @@ -964,7 +966,7 @@ static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val, scope->size = ttl_to_size(FIELD_GET(TLBI_TTL_MASK, val)); if (!scope->size) scope->size = SZ_1G; - scope->va = (val << 12) & ~(scope->size - 1); + scope->va = tlbi_va_s1_to_va(val) & ~(scope->size - 1); scope->asid = FIELD_GET(TLBIR_ASID_MASK, val); break; case OP_TLBI_ASIDE1: @@ -992,7 +994,7 @@ static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val, scope->size = ttl_to_size(FIELD_GET(TLBI_TTL_MASK, val)); if (!scope->size) scope->size = SZ_1G; - scope->va = (val << 12) & ~(scope->size - 1); + scope->va = tlbi_va_s1_to_va(val) & ~(scope->size - 1); break; case OP_TLBI_RVAE2: case OP_TLBI_RVAE2IS: diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index f8425f381de9..2684f273d9e1 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -490,6 +490,9 @@ static int vgic_its_debug_show(struct seq_file *s, void *v) struct its_device *dev = iter->dev; struct its_ite *ite = iter->ite; + if (!ite) + return 0; + if (list_is_first(&ite->ite_list, &dev->itt_head)) { seq_printf(s, "\n"); seq_printf(s, "Device ID: 0x%x, Event ID Range: [0 - %llu]\n", @@ -498,7 +501,7 @@ static int vgic_its_debug_show(struct seq_file *s, void *v) seq_printf(s, "-----------------------------------------------\n"); } - if (ite && ite->irq && ite->collection) { + if (ite->irq && ite->collection) { seq_printf(s, "%8u %8u %8u %8u %8u %2d\n", ite->event_id, ite->irq->intid, ite->irq->hwintid, ite->collection->target_addr, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1f33e71c2a73..eb1205654ac8 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -84,15 +84,40 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) !kvm_vgic_global_state.can_emulate_gicv2) return -ENODEV; - /* Must be held to avoid race with vCPU creation */ + /* + * Ensure mutual exclusion with vCPU creation and any vCPU ioctls by: + * + * - Holding kvm->lock to prevent KVM_CREATE_VCPU from reaching + * kvm_arch_vcpu_precreate() and ensuring created_vcpus is stable. + * This alone is insufficient, as kvm_vm_ioctl_create_vcpu() drops + * the kvm->lock before completing the vCPU creation. + */ lockdep_assert_held(&kvm->lock); + /* + * - Acquiring the vCPU mutex for every *online* vCPU to prevent + * concurrent vCPU ioctls for vCPUs already visible to userspace. + */ ret = -EBUSY; - if (!lock_all_vcpus(kvm)) + if (kvm_trylock_all_vcpus(kvm)) return ret; + /* + * - Taking the config_lock which protects VGIC data structures such + * as the per-vCPU arrays of private IRQs (SGIs, PPIs). + */ mutex_lock(&kvm->arch.config_lock); + /* + * - Bailing on the entire thing if a vCPU is in the middle of creation, + * dropped the kvm->lock, but hasn't reached kvm_arch_vcpu_create(). + * + * The whole combination of this guarantees that no vCPU can get into + * KVM with a VGIC configuration inconsistent with the VM's VGIC. + */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + goto out_unlock; + if (irqchip_in_kernel(kvm)) { ret = -EEXIST; goto out_unlock; @@ -142,7 +167,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) out_unlock: mutex_unlock(&kvm->arch.config_lock); - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 569f9da9049f..534049c7c94b 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -306,39 +306,34 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, } } - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - if (irq->hw) - return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); + ret = its_prop_update_vlpi(irq->host_irq, prop, needs_inv); - return 0; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + return ret; } static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) { - int ret = 0; - unsigned long flags; + struct its_vlpi_map map; + int ret; - raw_spin_lock_irqsave(&irq->irq_lock, flags); + guard(raw_spinlock_irqsave)(&irq->irq_lock); irq->target_vcpu = vcpu; - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - if (irq->hw) { - struct its_vlpi_map map; - - ret = its_get_vlpi(irq->host_irq, &map); - if (ret) - return ret; + if (!irq->hw) + return 0; - if (map.vpe) - atomic_dec(&map.vpe->vlpi_count); - map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; - atomic_inc(&map.vpe->vlpi_count); + ret = its_get_vlpi(irq->host_irq, &map); + if (ret) + return ret; - ret = its_map_vlpi(irq->host_irq, &map); - } + if (map.vpe) + atomic_dec(&map.vpe->vlpi_count); - return ret; + map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + atomic_inc(&map.vpe->vlpi_count); + return its_map_vlpi(irq->host_irq, &map); } static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm, @@ -756,12 +751,17 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) /* Requires the its_lock to be held. */ static void its_free_ite(struct kvm *kvm, struct its_ite *ite) { + struct vgic_irq *irq = ite->irq; list_del(&ite->ite_list); /* This put matches the get in vgic_add_lpi. */ - if (ite->irq) { - if (ite->irq->hw) - WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + if (irq) { + scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) { + if (irq->hw) + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + + irq->hw = false; + } vgic_put_irq(kvm, ite->irq); } @@ -1971,7 +1971,7 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -2006,7 +2006,7 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, } out: mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return ret; } @@ -2676,7 +2676,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) mutex_lock(&kvm->lock); - if (!lock_all_vcpus(kvm)) { + if (kvm_trylock_all_vcpus(kvm)) { mutex_unlock(&kvm->lock); return -EBUSY; } @@ -2698,7 +2698,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) mutex_unlock(&its->its_lock); mutex_unlock(&kvm->arch.config_lock); - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); mutex_unlock(&kvm->lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 359094f68c23..f9ae790163fb 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -268,7 +268,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, return -ENXIO; mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -276,7 +276,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, mutex_lock(&dev->kvm->arch.config_lock); r = vgic_v3_save_pending_tables(dev->kvm); mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; } @@ -390,7 +390,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -415,7 +415,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, out: mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && !is_write) @@ -554,7 +554,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (!lock_all_vcpus(dev->kvm)) { + if (kvm_trylock_all_vcpus(dev->kvm)) { mutex_unlock(&dev->kvm->lock); return -EBUSY; } @@ -611,7 +611,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, out: mutex_unlock(&dev->kvm->arch.config_lock); - unlock_all_vcpus(dev->kvm); + kvm_unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && uaccess && !is_write) { diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index c7de6154627c..193946108192 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -444,7 +444,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, if (IS_ERR(its)) return 0; - mutex_lock(&its->its_lock); + guard(mutex)(&its->its_lock); /* * Perform the actual DevID/EventID -> LPI translation. @@ -455,11 +455,13 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, */ if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, irq_entry->msi.data, &irq)) - goto out; + return 0; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Silently exit if the vLPI is already mapped */ if (irq->hw) - goto out; + goto out_unlock_irq; /* * Emit the mapping request. If it fails, the ITS probably @@ -479,68 +481,74 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, ret = its_map_vlpi(virq, &map); if (ret) - goto out; + goto out_unlock_irq; irq->hw = true; irq->host_irq = virq; atomic_inc(&map.vpe->vlpi_count); /* Transfer pending state */ - raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->pending_latch) { - ret = irq_set_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - irq->pending_latch); - WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq); + if (!irq->pending_latch) + goto out_unlock_irq; - /* - * Clear pending_latch and communicate this state - * change via vgic_queue_irq_unlock. - */ - irq->pending_latch = false; - vgic_queue_irq_unlock(kvm, irq, flags); - } else { - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } + ret = irq_set_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq); -out: - mutex_unlock(&its->its_lock); + /* + * Clear pending_latch and communicate this state + * change via vgic_queue_irq_unlock. + */ + irq->pending_latch = false; + vgic_queue_irq_unlock(kvm, irq, flags); + return ret; + +out_unlock_irq: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); return ret; } -int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, - struct kvm_kernel_irq_routing_entry *irq_entry) +static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq) { - struct vgic_its *its; struct vgic_irq *irq; - int ret; + unsigned long idx; + + guard(rcu)(); + xa_for_each(&kvm->arch.vgic.lpi_xa, idx, irq) { + if (!irq->hw || irq->host_irq != host_irq) + continue; + + if (!vgic_try_get_irq_kref(irq)) + return NULL; + + return irq; + } + + return NULL; +} + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq) +{ + struct vgic_irq *irq; + unsigned long flags; + int ret = 0; if (!vgic_supports_direct_msis(kvm)) return 0; - /* - * Get the ITS, and escape early on error (not a valid - * doorbell for any of our vITSs). - */ - its = vgic_get_its(kvm, irq_entry); - if (IS_ERR(its)) + irq = __vgic_host_irq_get_vlpi(kvm, host_irq); + if (!irq) return 0; - mutex_lock(&its->its_lock); - - ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, - irq_entry->msi.data, &irq); - if (ret) - goto out; - - WARN_ON(irq->hw && irq->host_irq != virq); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + WARN_ON(irq->hw && irq->host_irq != host_irq); if (irq->hw) { atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); irq->hw = false; - ret = its_unmap_vlpi(virq); + ret = its_unmap_vlpi(host_irq); } -out: - mutex_unlock(&its->its_lock); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(kvm, irq); return ret; } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 07aeab8a7606..c86c348857c4 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -83,7 +83,7 @@ arch_initcall(adjust_protection_map); pgprot_t vm_get_page_prot(unsigned long vm_flags) { - pteval_t prot; + ptdesc_t prot; /* Short circuit GCS to avoid bloating the table. */ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ea6695d53fb9..8fcf59ba39db 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -46,6 +46,13 @@ #define NO_CONT_MAPPINGS BIT(1) #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ +enum pgtable_type { + TABLE_PTE, + TABLE_PMD, + TABLE_PUD, + TABLE_P4D, +}; + u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -107,7 +114,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static phys_addr_t __init early_pgtable_alloc(int shift) +static phys_addr_t __init early_pgtable_alloc(enum pgtable_type pgtable_type) { phys_addr_t phys; @@ -192,7 +199,7 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -207,7 +214,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, if (flags & NO_EXEC_MAPPINGS) pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(PAGE_SHIFT); + pte_phys = pgtable_alloc(TABLE_PTE); ptep = pte_set_fixmap(pte_phys); init_clear_pgtable(ptep); ptep += pte_index(addr); @@ -243,7 +250,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags) + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -277,7 +284,8 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags) + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags) { unsigned long next; pud_t pud = READ_ONCE(*pudp); @@ -294,7 +302,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, if (flags & NO_EXEC_MAPPINGS) pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(PMD_SHIFT); + pmd_phys = pgtable_alloc(TABLE_PMD); pmdp = pmd_set_fixmap(pmd_phys); init_clear_pgtable(pmdp); pmdp += pmd_index(addr); @@ -325,7 +333,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -339,7 +347,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, if (flags & NO_EXEC_MAPPINGS) p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); - pud_phys = pgtable_alloc(PUD_SHIFT); + pud_phys = pgtable_alloc(TABLE_PUD); pudp = pud_set_fixmap(pud_phys); init_clear_pgtable(pudp); pudp += pud_index(addr); @@ -383,7 +391,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long next; @@ -397,7 +405,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, if (flags & NO_EXEC_MAPPINGS) pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); - p4d_phys = pgtable_alloc(P4D_SHIFT); + p4d_phys = pgtable_alloc(TABLE_P4D); p4dp = p4d_set_fixmap(p4d_phys); init_clear_pgtable(p4dp); p4dp += p4d_index(addr); @@ -427,7 +435,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { unsigned long addr, end, next; @@ -455,7 +463,7 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), + phys_addr_t (*pgtable_alloc)(enum pgtable_type), int flags) { mutex_lock(&fixmap_lock); @@ -468,37 +476,48 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, extern __alias(__create_pgd_mapping_locked) void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags); + phys_addr_t (*pgtable_alloc)(enum pgtable_type), + int flags); #endif -static phys_addr_t __pgd_pgtable_alloc(int shift) +static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, + enum pgtable_type pgtable_type) { /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); + struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0); + phys_addr_t pa; + + BUG_ON(!ptdesc); + pa = page_to_phys(ptdesc_page(ptdesc)); + + switch (pgtable_type) { + case TABLE_PTE: + BUG_ON(!pagetable_pte_ctor(mm, ptdesc)); + break; + case TABLE_PMD: + BUG_ON(!pagetable_pmd_ctor(mm, ptdesc)); + break; + case TABLE_PUD: + pagetable_pud_ctor(ptdesc); + break; + case TABLE_P4D: + pagetable_p4d_ctor(ptdesc); + break; + } - BUG_ON(!ptr); - return __pa(ptr); + return pa; } -static phys_addr_t pgd_pgtable_alloc(int shift) +static phys_addr_t __maybe_unused +pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type) { - phys_addr_t pa = __pgd_pgtable_alloc(shift); - struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); - - /* - * Call proper page table ctor in case later we need to - * call core mm functions like apply_to_page_range() on - * this pre-allocated page table. - * - * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is - * folded, and if so pagetable_pte_ctor() becomes nop. - */ - if (shift == PAGE_SHIFT) - BUG_ON(!pagetable_pte_ctor(ptdesc)); - else if (shift == PMD_SHIFT) - BUG_ON(!pagetable_pmd_ctor(ptdesc)); + return __pgd_pgtable_alloc(&init_mm, pgtable_type); +} - return pa; +static phys_addr_t +pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type) +{ + return __pgd_pgtable_alloc(NULL, pgtable_type); } /* @@ -530,7 +549,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, flags); + pgd_pgtable_alloc_special_mm, flags); } static void update_mapping_prot(phys_addr_t phys, unsigned long virt, @@ -744,7 +763,7 @@ static int __init map_entry_trampoline(void) memset(tramp_pg_dir, 0, PGD_SIZE); __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, entry_tramp_text_size(), prot, - __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); + pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) @@ -1350,7 +1369,7 @@ int arch_add_memory(int nid, u64 start, u64 size, flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, params->pgprot, __pgd_pgtable_alloc, + size, params->pgprot, pgd_pgtable_alloc_init_mm, flags); memblock_clear_nomap(start, size); diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 8cec0da4cff2..421a5de806c6 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -189,12 +189,12 @@ static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr) } void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - u64 val) + pteval_t val) { struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump); struct ptdump_pg_level *pg_level = st->pg_level; static const char units[] = "KMGTPE"; - u64 prot = 0; + ptdesc_t prot = 0; /* check if the current level has been folded dynamically */ if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) || @@ -251,6 +251,38 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } +void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { unsigned long end = ~0UL; @@ -266,7 +298,12 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) .pg_level = &kernel_pg_levels[0], .level = -1, .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]){ {info->base_addr, end}, {0, 0} @@ -303,7 +340,12 @@ bool ptdump_check_wx(void) .level = -1, .check_wx = true, .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {_PAGE_OFFSET(vabits_actual), ~0UL}, {0, 0} |