From 0cbc2659123e6946ba926c5ec3871efe310123e8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:28:34 +0100 Subject: arm64: vdso32: Remove a bunch of #ifdef CONFIG_COMPAT_VDSO guards Most of the compat vDSO code can be built and guarded using IS_ENABLED, so drop the unnecessary #ifdefs. Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index d4202a32abc9..1501b22a3cf6 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -30,15 +30,11 @@ #include extern char vdso_start[], vdso_end[]; -#ifdef CONFIG_COMPAT_VDSO extern char vdso32_start[], vdso32_end[]; -#endif /* CONFIG_COMPAT_VDSO */ enum vdso_abi { VDSO_ABI_AA64, -#ifdef CONFIG_COMPAT_VDSO VDSO_ABI_AA32, -#endif /* CONFIG_COMPAT_VDSO */ }; enum vvar_pages { @@ -284,21 +280,17 @@ up_fail: /* * Create and map the vectors page for AArch32 tasks. */ -#ifdef CONFIG_COMPAT_VDSO static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { return __vdso_remap(VDSO_ABI_AA32, sm, new_vma); } -#endif /* CONFIG_COMPAT_VDSO */ enum aarch32_map { AA32_MAP_VECTORS, /* kuser helpers */ -#ifdef CONFIG_COMPAT_VDSO + AA32_MAP_SIGPAGE, AA32_MAP_VVAR, AA32_MAP_VDSO, -#endif - AA32_MAP_SIGPAGE }; static struct page *aarch32_vectors_page __ro_after_init; @@ -309,7 +301,10 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vectors]", /* ABI */ .pages = &aarch32_vectors_page, }, -#ifdef CONFIG_COMPAT_VDSO + [AA32_MAP_SIGPAGE] = { + .name = "[sigpage]", /* ABI */ + .pages = &aarch32_sig_page, + }, [AA32_MAP_VVAR] = { .name = "[vvar]", .fault = vvar_fault, @@ -319,11 +314,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, -#endif /* CONFIG_COMPAT_VDSO */ - [AA32_MAP_SIGPAGE] = { - .name = "[sigpage]", /* ABI */ - .pages = &aarch32_sig_page, - }, }; static int aarch32_alloc_kuser_vdso_page(void) @@ -362,25 +352,25 @@ static int aarch32_alloc_sigpage(void) return 0; } -#ifdef CONFIG_COMPAT_VDSO static int __aarch32_alloc_vdso_pages(void) { + + if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) + return 0; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; return __vdso_init(VDSO_ABI_AA32); } -#endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { int ret; -#ifdef CONFIG_COMPAT_VDSO ret = __aarch32_alloc_vdso_pages(); if (ret) return ret; -#endif ret = aarch32_alloc_sigpage(); if (ret) @@ -449,14 +439,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; -#ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(VDSO_ABI_AA32, - mm, - bprm, - uses_interp); - if (ret) - goto out; -#endif /* CONFIG_COMPAT_VDSO */ + if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { + ret = __setup_additional_pages(VDSO_ABI_AA32, + mm, + bprm, + uses_interp); + if (ret) + goto out; + } ret = aarch32_sigreturn_setup(mm); out: -- cgit From 2a30aca81a72d71e128eb692cc4755f33e317670 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:37:09 +0100 Subject: arm64: vdso: Fix unusual formatting in *setup_additional_pages() There's really no need to put every parameter on a new line when calling a function with a long name, so reformat the *setup_additional_pages() functions in the vDSO setup code to follow the usual conventions. Acked-by: Mark Rutland Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 1501b22a3cf6..debb8995d57f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -440,9 +440,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto out; if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { - ret = __setup_additional_pages(VDSO_ABI_AA32, - mm, - bprm, + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, uses_interp); if (ret) goto out; @@ -487,8 +485,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; int ret; @@ -496,11 +493,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (mmap_write_lock_killable(mm)) return -EINTR; - ret = __setup_additional_pages(VDSO_ABI_AA64, - mm, - bprm, - uses_interp); - + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); mmap_write_unlock(mm); return ret; -- cgit From b4c971245925db1a6e0898878ad410f8f17ec59a Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Tue, 4 Aug 2020 16:53:47 +0800 Subject: arm64: traps: Add str of description to panic() in die() Currently, there are different description strings in die() such as die("Oops",,), die("Oops - BUG",,). And panic() called by die() will always show "Fatal exception" or "Fatal exception in interrupt". Note that panic() will run any panic handler via panic_notifier_list. And the string above will be formatted and placed in static buf[] which will be passed to handler. So panic handler can not distinguish which Oops it is from the buf if we want to do some things like reserve the string in memory or panic statistics. It's not benefit to debug. We need to add more codes to troubleshoot. Let's utilize existing resource to make debug much simpler. Signed-off-by: Yue Hu Link: https://lore.kernel.org/r/20200804085347.10720-1-zbestahu@gmail.com Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..af25cedf54e9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -200,9 +200,9 @@ void die(const char *str, struct pt_regs *regs, int err) oops_exit(); if (in_interrupt()) - panic("Fatal exception in interrupt"); + panic("%s: Fatal exception in interrupt", str); if (panic_on_oops) - panic("Fatal exception"); + panic("%s: Fatal exception", str); raw_spin_unlock_irqrestore(&die_lock, flags); -- cgit From ffdbd3d83553beb0fda00081293d5640cba477a2 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 11 Aug 2020 13:35:05 +0800 Subject: arm64: perf: Add general hardware LLC events for PMUv3 This patch is to add the general hardware last level cache (LLC) events for PMUv3: one event is for LLC access and another is for LLC miss. With this change, perf tool can support last level cache profiling, below is an example to demonstrate the usage on Arm64: $ perf stat -e LLC-load-misses -e LLC-loads -- \ perf bench mem memcpy -s 1024MB -l 100 -f default [...] Performance counter stats for 'perf bench mem memcpy -s 1024MB -l 100 -f default': 35,534,262 LLC-load-misses # 2.16% of all LL-cache hits 1,643,946,443 LLC-loads [...] Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200811053505.21223-1-leo.yan@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 462f9a9cc44b..86e2328b0c2e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -69,6 +69,9 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; -- cgit From d51eb416fa111c1129c46ea3320faab36bf4c99c Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 4 Sep 2020 10:21:37 +0800 Subject: drivers/perf: hisi: Add missing include of linux/module.h MODULE_*** is used in HiSilicon uncore PMU drivers and is provided by linux/module.h, but the header file is not directly included. Add the missing include. Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1599186097-18599-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 25b0c97b3eb0..b59ec22169ab 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include -- cgit From ad14c19242b5a5f87aa86c4c930e668121de2809 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 28 Aug 2020 11:18:22 +0800 Subject: arm64: fix some spelling mistakes in the comments by codespell arch/arm64/include/asm/cpu_ops.h:24: necesary ==> necessary arch/arm64/include/asm/kvm_arm.h:69: maintainance ==> maintenance arch/arm64/include/asm/cpufeature.h:361: capabilties ==> capabilities arch/arm64/kernel/perf_regs.c:19: compatability ==> compatibility arch/arm64/kernel/smp_spin_table.c:86: endianess ==> endianness arch/arm64/kernel/smp_spin_table.c:88: endianess ==> endianness arch/arm64/kvm/vgic/vgic-mmio-v3.c:1004: targetting ==> targeting arch/arm64/kvm/vgic/vgic-mmio-v3.c:1005: targetting ==> targeting Signed-off-by: Xiaoming Ni Link: https://lore.kernel.org/r/20200828031822.35928-1-nixiaoming@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu_ops.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kernel/perf_regs.c | 2 +- arch/arm64/kernel/smp_spin_table.c | 4 ++-- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index d28e8f37d3b4..e95c4df83911 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,7 +21,7 @@ * mechanism for doing so, tests whether it is possible to boot * the given CPU. * @cpu_boot: Boots a cpu into the kernel. - * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary * synchronisation. Called from the cpu being booted. * @cpu_can_disable: Determines whether a CPU can be disabled based on * mechanism-specific information. diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89b4f0142c28..3a42dc8e697c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -358,7 +358,7 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) } /* - * Generic helper for handling capabilties with multiple (match,enable) pairs + * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. * Iterate over each entry to see if at least one matches. */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1da8e3dc4455..32acf95d49c7 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -66,7 +66,7 @@ * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain - * FB: Force broadcast of all maintainance operations + * FB: Force broadcast of all maintenance operations * AMO: Override CPSR.A and enable signaling with VA * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 666b225aeb3a..94e8718e7229 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -16,7 +16,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) /* * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but - * we're stuck with it for ABI compatability reasons. + * we're stuck with it for ABI compatibility reasons. * * For a 32-bit consumer inspecting a 32-bit task, then it will look at * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c8a3fee00c11..5892e79fa429 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -83,9 +83,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) /* * We write the release address as LE regardless of the native - * endianess of the kernel. Therefore, any boot-loaders that + * endianness of the kernel. Therefore, any boot-loaders that * read this address need to convert this address to the - * boot-loader's endianess before jumping. This is mandated by + * boot-loader's endianness before jumping. This is mandated by * the boot protocol. */ writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 5c786b915cd3..52d6f24f65dc 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1001,8 +1001,8 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) raw_spin_lock_irqsave(&irq->irq_lock, flags); /* - * An access targetting Group0 SGIs can only generate - * those, while an access targetting Group1 SGIs can + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can * generate interrupts of either group. */ if (!irq->group || allow_group1) { -- cgit From 9a747c91e8d69d7283c850031d2462f4d27bf25b Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Tue, 1 Sep 2020 17:11:54 +0800 Subject: arm64/numa: Fix a typo in comment of arm64_numa_init Fix a typo in comment of arm64_numa_init. 'encomapssing' should be 'encompassing'. Signed-off-by: Yanfei Xu Link: https://lore.kernel.org/r/20200901091154.10112-1-yanfei.xu@windriver.com Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 73f8b49d485c..d402da26cdca 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -448,7 +448,7 @@ static int __init dummy_numa_init(void) * arm64_numa_init() - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The - * last fallback is dummy single node config encomapssing whole memory. + * last fallback is dummy single node config encompassing whole memory. */ void __init arm64_numa_init(void) { -- cgit From 1ab64cf81489e871bcb94fb2dea35c7fac561fff Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 18 Aug 2020 14:36:24 +0800 Subject: ACPI/IORT: Drop the unused @ops of iort_add_device_replay() Since commit d2e1a003af56 ("ACPI/IORT: Don't call iommu_ops->add_device directly"), we use the IOMMU core API to replace a direct invoke of the specified callback. The parameter @ops has therefore became unused. Let's drop it. Signed-off-by: Zenghui Yu Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20200818063625.980-2-yuzenghui@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ec782e4a0fe4..a0ece0e201b2 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -811,8 +811,7 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) return (fwspec && fwspec->ops) ? fwspec->ops : NULL; } -static inline int iort_add_device_replay(const struct iommu_ops *ops, - struct device *dev) +static inline int iort_add_device_replay(struct device *dev) { int err = 0; @@ -1072,7 +1071,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, */ if (!err) { ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(ops, dev); + err = iort_add_device_replay(dev); } /* Ignore all other errors apart from EPROBE_DEFER */ @@ -1089,8 +1088,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, #else static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) { return NULL; } -static inline int iort_add_device_replay(const struct iommu_ops *ops, - struct device *dev) +static inline int iort_add_device_replay(struct device *dev) { return 0; } int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -- cgit From c2bea7a1a1c0c4c5c970696ff556a73f55f998eb Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 18 Aug 2020 14:36:25 +0800 Subject: ACPI/IORT: Remove the unused inline functions Since commit 8212688600ed ("ACPI/IORT: Fix build error when IOMMU_SUPPORT is disabled"), iort_fwspec_iommu_ops() and iort_add_device_replay() are not needed anymore when CONFIG_IOMMU_API is not selected. Let's remove them. Signed-off-by: Zenghui Yu Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20200818063625.980-3-yuzenghui@huawei.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index a0ece0e201b2..e670785a6201 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1086,10 +1086,6 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, } #else -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ return NULL; } -static inline int iort_add_device_replay(struct device *dev) -{ return 0; } int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } const struct iommu_ops *iort_iommu_configure_id(struct device *dev, -- cgit From 120dc60d0bdbadcad7460222f74c9ed15cdeb73e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 25 Aug 2020 15:54:40 +0200 Subject: arm64: get rid of TEXT_OFFSET TEXT_OFFSET serves no purpose, and for this reason, it was redefined as 0x0 in the v5.8 timeframe. Since this does not appear to have caused any issues that require us to revisit that decision, let's get rid of the macro entirely, along with any references to it. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200825135440.11288-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 6 ------ arch/arm64/include/asm/boot.h | 3 +-- arch/arm64/include/asm/kernel-pgtable.h | 2 +- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/kernel/Makefile | 2 -- arch/arm64/kernel/head.S | 16 ++++++---------- arch/arm64/kernel/image.h | 1 - arch/arm64/kernel/vmlinux.lds.S | 4 ++-- drivers/firmware/efi/libstub/Makefile | 1 - drivers/firmware/efi/libstub/arm64-stub.c | 6 +++--- 10 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 130569f90c54..0fd4c1be4f64 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -11,7 +11,6 @@ # Copyright (C) 1995-2001 by Russell King LDFLAGS_vmlinux :=--no-undefined -X -CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -132,9 +131,6 @@ endif # Default value head-y := arch/arm64/kernel/head.o -# The byte offset of the kernel image in RAM from the start of RAM. -TEXT_OFFSET := 0x0 - ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 else @@ -145,8 +141,6 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -export TEXT_OFFSET - core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index c7f67da13cd9..3e7943fd17a4 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -13,8 +13,7 @@ #define MAX_FDT_SIZE SZ_2M /* - * arm64 requires the kernel image to placed - * TEXT_OFFSET bytes beyond a 2 MB aligned base + * arm64 requires the kernel image to placed at a 2 MB aligned base address */ #define MIN_KIMG_ALIGN SZ_2M diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 329fb15f6bac..19ca76ea60d9 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -86,7 +86,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index afa722504bfd..20a9b322d342 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -169,7 +169,7 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +/* the virtual base of the kernel image */ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a561cbb91d4d..4197ef2fb22e 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) -AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 037421c66b14..d8d9caf02834 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -36,14 +36,10 @@ #include "efi-header.S" -#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) +#define __PHYS_OFFSET KERNEL_START -#if (TEXT_OFFSET & 0xfff) != 0 -#error TEXT_OFFSET must be at least 4KB aligned -#elif (PAGE_OFFSET & 0x1fffff) != 0 +#if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned -#elif TEXT_OFFSET > 0x1fffff -#error TEXT_OFFSET must be less than 2MB #endif /* @@ -55,7 +51,7 @@ * x0 = physical address to the FDT blob. * * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET + TEXT_OFFSET). + * __pa(PAGE_OFFSET). * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described @@ -77,7 +73,7 @@ _head: b primary_entry // branch to kernel start, magic .long 0 // reserved #endif - le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad 0 // Image load offset from start of RAM, little-endian le64sym _kernel_size_le // Effective size of kernel image, little-endian le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved @@ -382,7 +378,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * Map the kernel image (starting with PHYS_OFFSET). */ adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) + mov_q x5, KIMAGE_VADDR // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD adrp x6, _end // runtime __pa(_end) @@ -474,7 +470,7 @@ SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" SYM_DATA_START(kimage_vaddr) - .quad _text - TEXT_OFFSET + .quad _text SYM_DATA_END(kimage_vaddr) EXPORT_SYMBOL(kimage_vaddr) .popsection diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7d38c660372..7bc3ba897901 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -62,7 +62,6 @@ */ #define HEAD_SYMBOLS \ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ - DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7cba7623fcec..82801d98a2b7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -105,7 +105,7 @@ SECTIONS *(.eh_frame) } - . = KIMAGE_VADDR + TEXT_OFFSET; + . = KIMAGE_VADDR; .head.text : { _text = .; @@ -274,4 +274,4 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 296b18fbd7a2..5a80cf6bd606 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -64,7 +64,6 @@ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # # For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e5bfac79e5ac..fc178398f1ac 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -77,7 +77,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); - *reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align(); + *reserve_size = kernel_memsize; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { /* @@ -91,7 +91,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align())) { + if (IS_ALIGNED((u64)_text, min_kimg_align())) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -111,7 +111,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - *image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align(); + *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); return EFI_SUCCESS; -- cgit From 44fdf4ed2693eb05dbfa83beaa6fe5fbd0c2f6d0 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 4 Sep 2020 17:57:38 +0800 Subject: arm64: perf: Remove unnecessary event_idx check event_idx is obtained from armv8pmu_get_event_idx(), and this idx must be between ARMV8_IDX_CYCLE_COUNTER and cpu_pmu->num_events. So it's unnecessary to do this check. Let's remove it. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1599213458-28394-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 86e2328b0c2e..04d0ceb72a67 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -310,8 +310,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* @@ -368,12 +366,6 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV8_IDX_CYCLE_COUNTER && - idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); -} - static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); @@ -443,15 +435,11 @@ static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_read_counter(struct perf_event *event) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u64 value = 0; - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else value = armv8pmu_read_hw_counter(event); @@ -480,16 +468,12 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, static void armv8pmu_write_counter(struct perf_event *event, u64 value) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; value = armv8pmu_bias_long_counter(event, value); - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); else armv8pmu_write_hw_counter(event, value); -- cgit From c048ddf86cdd066db283ce838a0049d2bbefb9e5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 4 Sep 2020 14:00:59 +0530 Subject: arm64/mm/ptdump: Add address markers for BPF regions Kernel virtual region [BPF_JIT_REGION_START..BPF_JIT_REGION_END] is missing from address_markers[], hence relevant page table entries are not displayed with /sys/kernel/debug/kernel_page_tables. This adds those missing markers. While here, also rename arch/arm64/mm/dump.c which sounds bit ambiguous, as arch/arm64/mm/ptdump.c instead. Signed-off-by: Anshuman Khandual Reviewed-by: Steven Price Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Steven Price Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599208259-11191-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/Makefile | 2 +- arch/arm64/mm/dump.c | 387 ------------------------------------------------ arch/arm64/mm/ptdump.c | 389 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 388 deletions(-) delete mode 100644 arch/arm64/mm/dump.c create mode 100644 arch/arm64/mm/ptdump.c diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index d91030f0ffee..2a1d275cd4d7 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,7 +4,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c deleted file mode 100644 index 0b8da1cc1c07..000000000000 --- a/arch/arm64/mm/dump.c +++ /dev/null @@ -1,387 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2014, The Linux Foundation. All rights reserved. - * Debug helper to dump the current kernel pagetables of the system - * so that we can see what the various memory ranges are set to. - * - * Derived from x86 and arm implementation: - * (C) Copyright 2008 Intel Corporation - * - * Author: Arjan van de Ven - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -enum address_markers_idx { - PAGE_OFFSET_NR = 0, - PAGE_END_NR, -#ifdef CONFIG_KASAN - KASAN_START_NR, -#endif -}; - -static struct addr_marker address_markers[] = { - { PAGE_OFFSET, "Linear Mapping start" }, - { 0 /* PAGE_END */, "Linear Mapping end" }, -#ifdef CONFIG_KASAN - { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, - { KASAN_SHADOW_END, "Kasan shadow end" }, -#endif - { MODULES_VADDR, "Modules start" }, - { MODULES_END, "Modules end" }, - { VMALLOC_START, "vmalloc() area" }, - { VMALLOC_END, "vmalloc() end" }, - { FIXADDR_START, "Fixmap start" }, - { FIXADDR_TOP, "Fixmap end" }, - { PCI_IO_START, "PCI I/O start" }, - { PCI_IO_END, "PCI I/O end" }, -#ifdef CONFIG_SPARSEMEM_VMEMMAP - { VMEMMAP_START, "vmemmap start" }, - { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, -#endif - { -1, NULL }, -}; - -#define pt_dump_seq_printf(m, fmt, args...) \ -({ \ - if (m) \ - seq_printf(m, fmt, ##args); \ -}) - -#define pt_dump_seq_puts(m, fmt) \ -({ \ - if (m) \ - seq_printf(m, fmt); \ -}) - -/* - * The page dumper groups page table entries of the same type into a single - * description. It uses pg_state to track the range information while - * iterating over the pte entries. When the continuity is broken it then - * dumps out a description of the range. - */ -struct pg_state { - struct ptdump_state ptdump; - struct seq_file *seq; - const struct addr_marker *marker; - unsigned long start_address; - int level; - u64 current_prot; - bool check_wx; - unsigned long wx_pages; - unsigned long uxn_pages; -}; - -struct prot_bits { - u64 mask; - u64 val; - const char *set; - const char *clear; -}; - -static const struct prot_bits pte_bits[] = { - { - .mask = PTE_VALID, - .val = PTE_VALID, - .set = " ", - .clear = "F", - }, { - .mask = PTE_USER, - .val = PTE_USER, - .set = "USR", - .clear = " ", - }, { - .mask = PTE_RDONLY, - .val = PTE_RDONLY, - .set = "ro", - .clear = "RW", - }, { - .mask = PTE_PXN, - .val = PTE_PXN, - .set = "NX", - .clear = "x ", - }, { - .mask = PTE_SHARED, - .val = PTE_SHARED, - .set = "SHD", - .clear = " ", - }, { - .mask = PTE_AF, - .val = PTE_AF, - .set = "AF", - .clear = " ", - }, { - .mask = PTE_NG, - .val = PTE_NG, - .set = "NG", - .clear = " ", - }, { - .mask = PTE_CONT, - .val = PTE_CONT, - .set = "CON", - .clear = " ", - }, { - .mask = PTE_TABLE_BIT, - .val = PTE_TABLE_BIT, - .set = " ", - .clear = "BLK", - }, { - .mask = PTE_UXN, - .val = PTE_UXN, - .set = "UXN", - .clear = " ", - }, { - .mask = PTE_GP, - .val = PTE_GP, - .set = "GP", - .clear = " ", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), - .set = "DEVICE/nGnRnE", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), - .set = "DEVICE/nGnRE", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_DEVICE_GRE), - .set = "DEVICE/GRE", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_NORMAL_NC), - .set = "MEM/NORMAL-NC", - }, { - .mask = PTE_ATTRINDX_MASK, - .val = PTE_ATTRINDX(MT_NORMAL), - .set = "MEM/NORMAL", - } -}; - -struct pg_level { - const struct prot_bits *bits; - const char *name; - size_t num; - u64 mask; -}; - -static struct pg_level pg_level[] = { - { /* pgd */ - .name = "PGD", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* p4d */ - .name = "P4D", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* pud */ - .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* pmd */ - .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, { /* pte */ - .name = "PTE", - .bits = pte_bits, - .num = ARRAY_SIZE(pte_bits), - }, -}; - -static void dump_prot(struct pg_state *st, const struct prot_bits *bits, - size_t num) -{ - unsigned i; - - for (i = 0; i < num; i++, bits++) { - const char *s; - - if ((st->current_prot & bits->mask) == bits->val) - s = bits->set; - else - s = bits->clear; - - if (s) - pt_dump_seq_printf(st->seq, " %s", s); - } -} - -static void note_prot_uxn(struct pg_state *st, unsigned long addr) -{ - if (!st->check_wx) - return; - - if ((st->current_prot & PTE_UXN) == PTE_UXN) - return; - - WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n", - (void *)st->start_address, (void *)st->start_address); - - st->uxn_pages += (addr - st->start_address) / PAGE_SIZE; -} - -static void note_prot_wx(struct pg_state *st, unsigned long addr) -{ - if (!st->check_wx) - return; - if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY) - return; - if ((st->current_prot & PTE_PXN) == PTE_PXN) - return; - - WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n", - (void *)st->start_address, (void *)st->start_address); - - st->wx_pages += (addr - st->start_address) / PAGE_SIZE; -} - -static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - u64 val) -{ - struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); - static const char units[] = "KMGTPE"; - u64 prot = 0; - - if (level >= 0) - prot = val & pg_level[level].mask; - - if (st->level == -1) { - st->level = level; - st->current_prot = prot; - st->start_address = addr; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } else if (prot != st->current_prot || level != st->level || - addr >= st->marker[1].start_address) { - const char *unit = units; - unsigned long delta; - - if (st->current_prot) { - note_prot_uxn(st, addr); - note_prot_wx(st, addr); - } - - pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", - st->start_address, addr); - - delta = (addr - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, - pg_level[st->level].name); - if (st->current_prot && pg_level[st->level].bits) - dump_prot(st, pg_level[st->level].bits, - pg_level[st->level].num); - pt_dump_seq_puts(st->seq, "\n"); - - if (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - - st->start_address = addr; - st->current_prot = prot; - st->level = level; - } - - if (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - -} - -void ptdump_walk(struct seq_file *s, struct ptdump_info *info) -{ - unsigned long end = ~0UL; - struct pg_state st; - - if (info->base_addr < TASK_SIZE_64) - end = TASK_SIZE_64; - - st = (struct pg_state){ - .seq = s, - .marker = info->markers, - .ptdump = { - .note_page = note_page, - .range = (struct ptdump_range[]){ - {info->base_addr, end}, - {0, 0} - } - } - }; - - ptdump_walk_pgd(&st.ptdump, info->mm, NULL); -} - -static void ptdump_initialize(void) -{ - unsigned i, j; - - for (i = 0; i < ARRAY_SIZE(pg_level); i++) - if (pg_level[i].bits) - for (j = 0; j < pg_level[i].num; j++) - pg_level[i].mask |= pg_level[i].bits[j].mask; -} - -static struct ptdump_info kernel_ptdump_info = { - .mm = &init_mm, - .markers = address_markers, - .base_addr = PAGE_OFFSET, -}; - -void ptdump_check_wx(void) -{ - struct pg_state st = { - .seq = NULL, - .marker = (struct addr_marker[]) { - { 0, NULL}, - { -1, NULL}, - }, - .level = -1, - .check_wx = true, - .ptdump = { - .note_page = note_page, - .range = (struct ptdump_range[]) { - {PAGE_OFFSET, ~0UL}, - {0, 0} - } - } - }; - - ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); - - if (st.wx_pages || st.uxn_pages) - pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", - st.wx_pages, st.uxn_pages); - else - pr_info("Checked W+X mappings: passed, no W+X pages found\n"); -} - -static int ptdump_init(void) -{ - address_markers[PAGE_END_NR].start_address = PAGE_END; -#ifdef CONFIG_KASAN - address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; -#endif - ptdump_initialize(); - ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); - return 0; -} -device_initcall(ptdump_init); diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c new file mode 100644 index 000000000000..265284dc942d --- /dev/null +++ b/arch/arm64/mm/ptdump.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 and arm implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +enum address_markers_idx { + PAGE_OFFSET_NR = 0, + PAGE_END_NR, +#ifdef CONFIG_KASAN + KASAN_START_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + { PAGE_OFFSET, "Linear Mapping start" }, + { 0 /* PAGE_END */, "Linear Mapping end" }, +#ifdef CONFIG_KASAN + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, + { KASAN_SHADOW_END, "Kasan shadow end" }, +#endif + { BPF_JIT_REGION_START, "BPF start" }, + { BPF_JIT_REGION_END, "BPF end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { VMALLOC_START, "vmalloc() area" }, + { VMALLOC_END, "vmalloc() end" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { PCI_IO_START, "PCI I/O start" }, + { PCI_IO_END, "PCI I/O end" }, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + { VMEMMAP_START, "vmemmap start" }, + { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, +#endif + { -1, NULL }, +}; + +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + int level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; + unsigned long uxn_pages; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { + .mask = PTE_USER, + .val = PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = PTE_RDONLY, + .val = PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = PTE_PXN, + .val = PTE_PXN, + .set = "NX", + .clear = "x ", + }, { + .mask = PTE_SHARED, + .val = PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = PTE_AF, + .val = PTE_AF, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_NG, + .val = PTE_NG, + .set = "NG", + .clear = " ", + }, { + .mask = PTE_CONT, + .val = PTE_CONT, + .set = "CON", + .clear = " ", + }, { + .mask = PTE_TABLE_BIT, + .val = PTE_TABLE_BIT, + .set = " ", + .clear = "BLK", + }, { + .mask = PTE_UXN, + .val = PTE_UXN, + .set = "UXN", + .clear = " ", + }, { + .mask = PTE_GP, + .val = PTE_GP, + .set = "GP", + .clear = " ", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), + .set = "DEVICE/nGnRnE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), + .set = "DEVICE/nGnRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_GRE), + .set = "DEVICE/GRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_NC), + .set = "MEM/NORMAL-NC", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL), + .set = "MEM/NORMAL", + } +}; + +struct pg_level { + const struct prot_bits *bits; + const char *name; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { /* pgd */ + .name = "PGD", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* p4d */ + .name = "P4D", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pud */ + .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pmd */ + .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pte */ + .name = "PTE", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, + size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + pt_dump_seq_printf(st->seq, " %s", s); + } +} + +static void note_prot_uxn(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if ((st->current_prot & PTE_UXN) == PTE_UXN) + return; + + WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->uxn_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY) + return; + if ((st->current_prot & PTE_PXN) == PTE_PXN) + return; + + WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + u64 val) +{ + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + static const char units[] = "KMGTPE"; + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; + + if (st->level == -1) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + note_prot_uxn(st, addr); + note_prot_wx(st, addr); + } + + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_prot && pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + pt_dump_seq_puts(st->seq, "\n"); + + if (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + +} + +void ptdump_walk(struct seq_file *s, struct ptdump_info *info) +{ + unsigned long end = ~0UL; + struct pg_state st; + + if (info->base_addr < TASK_SIZE_64) + end = TASK_SIZE_64; + + st = (struct pg_state){ + .seq = s, + .marker = info->markers, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]){ + {info->base_addr, end}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); +} + +static void ptdump_initialize(void) +{ + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; +} + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = PAGE_OFFSET, +}; + +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + { 0, NULL}, + { -1, NULL}, + }, + .level = -1, + .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {PAGE_OFFSET, ~0UL}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + + if (st.wx_pages || st.uxn_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", + st.wx_pages, st.uxn_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + +static int ptdump_init(void) +{ + address_markers[PAGE_END_NR].start_address = PAGE_END; +#ifdef CONFIG_KASAN + address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; +#endif + ptdump_initialize(); + ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); + return 0; +} +device_initcall(ptdump_init); -- cgit From 3102bc0e6ac752cc5df896acb557d779af4d82a1 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Sat, 29 Aug 2020 14:00:16 +0100 Subject: arm64: topology: Stop using MPIDR for topology information In the absence of ACPI or DT topology data, we fallback to haphazardly decoding *something* out of MPIDR. Sadly, the contents of that register are mostly unusable due to the implementation leniancy and things like Aff0 having to be capped to 15 (despite being encoded on 8 bits). Consider a simple system with a single package of 32 cores, all under the same LLC. We ought to be shoving them in the same core_sibling mask, but MPIDR is going to look like: | CPU | 0 | ... | 15 | 16 | ... | 31 | |------+---+-----+----+----+-----+----+ | Aff0 | 0 | ... | 15 | 0 | ... | 15 | | Aff1 | 0 | ... | 0 | 1 | ... | 1 | | Aff2 | 0 | ... | 0 | 0 | ... | 0 | Which will eventually yield core_sibling(0-15) == 0-15 core_sibling(16-31) == 16-31 NUMA woes ========= If we try to play games with this and set up NUMA boundaries within those groups of 16 cores via e.g. QEMU: # Node0: 0-9; Node1: 10-19 $ qemu-system-aarch64 \ -smp 20 -numa node,cpus=0-9,nodeid=0 -numa node,cpus=10-19,nodeid=1 The scheduler's MC domain (all CPUs with same LLC) is going to be built via arch_topology.c::cpu_coregroup_mask() In there we try to figure out a sensible mask out of the topology information we have. In short, here we'll pick the smallest of NUMA or core sibling mask. node_mask(CPU9) == 0-9 core_sibling(CPU9) == 0-15 MC mask for CPU9 will thus be 0-9, not a problem. node_mask(CPU10) == 10-19 core_sibling(CPU10) == 0-15 MC mask for CPU10 will thus be 10-19, not a problem. node_mask(CPU16) == 10-19 core_sibling(CPU16) == 16-19 MC mask for CPU16 will thus be 16-19... Uh oh. CPUs 16-19 are in two different unique MC spans, and the scheduler has no idea what to make of that. That triggers the WARN_ON() added by commit ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") Fixing MPIDR-derived topology ============================= We could try to come up with some cleverer scheme to figure out which of the available masks to pick, but really if one of those masks resulted from MPIDR then it should be discarded because it's bound to be bogus. I was hoping to give MPIDR a chance for SMT, to figure out which threads are in the same core using Aff1-3 as core ID, but Sudeep and Robin pointed out to me that there are systems out there where *all* cores have non-zero values in their higher affinity fields (e.g. RK3288 has "5" in all of its cores' MPIDR.Aff1), which would expose a bogus core ID to userspace. Stop using MPIDR for topology information. When no other source of topology information is available, mark each CPU as its own core and its NUMA node as its LLC domain. Signed-off-by: Valentin Schneider Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200829130016.26106-1-valentin.schneider@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0801a0f3c156..ff1dd1dbfe64 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -36,21 +36,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, -- cgit From b65399f6111b03df870d8daa75b8d140c0de93f4 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 10:23:02 +0530 Subject: arm64/mm: Change THP helpers to comply with generic MM semantics pmd_present() and pmd_trans_huge() are expected to behave in the following manner during various phases of a given PMD. It is derived from a previous detailed discussion on this topic [1] and present THP documentation [2]. pmd_present(pmd): - Returns true if pmd refers to system RAM with a valid pmd_page(pmd) - Returns false if pmd refers to a migration or swap entry pmd_trans_huge(pmd): - Returns true if pmd refers to system RAM and is a trans huge mapping ------------------------------------------------------------------------- | PMD states | pmd_present | pmd_trans_huge | ------------------------------------------------------------------------- | Mapped | Yes | Yes | ------------------------------------------------------------------------- | Splitting | Yes | Yes | ------------------------------------------------------------------------- | Migration/Swap | No | No | ------------------------------------------------------------------------- The problem: PMD is first invalidated with pmdp_invalidate() before it's splitting. This invalidation clears PMD_SECT_VALID as below. PMD Split -> pmdp_invalidate() -> pmd_mkinvalid -> Clears PMD_SECT_VALID Once PMD_SECT_VALID gets cleared, it results in pmd_present() return false on the PMD entry. It will need another bit apart from PMD_SECT_VALID to re- affirm pmd_present() as true during the THP split process. To comply with above mentioned semantics, pmd_trans_huge() should also check pmd_present() first before testing presence of an actual transparent huge mapping. The solution: Ideally PMD_TYPE_SECT should have been used here instead. But it shares the bit position with PMD_SECT_VALID which is used for THP invalidation. Hence it will not be there for pmd_present() check after pmdp_invalidate(). A new software defined PMD_PRESENT_INVALID (bit 59) can be set on the PMD entry during invalidation which can help pmd_present() return true and in recognizing the fact that it still points to memory. This bit is transient. During the split process it will be overridden by a page table page representing normal pages in place of erstwhile huge page. Other pmdp_invalidate() callers always write a fresh PMD value on the entry overriding this transient PMD_PRESENT_INVALID bit, which makes it safe. [1]: https://lkml.org/lkml/2018/10/17/231 [2]: https://www.kernel.org/doc/Documentation/vm/transhuge.txt Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599627183-14453-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 7 +++++++ arch/arm64/include/asm/pgtable.h | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 4d867c6446c4..2df4b75fce3c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -19,6 +19,13 @@ #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +/* + * This bit indicates that the entry is present i.e. pmd_page() + * still points to a valid huge page in memory even if the pmd + * has been invalidated. + */ +#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..d8258ae8fce0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -145,6 +145,18 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) &= ~pgprot_val(prot); + return pmd; +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) |= pgprot_val(prot); + return pmd; +} + static inline pte_t pte_wrprotect(pte_t pte) { pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -363,15 +375,24 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) + +static inline int pmd_present(pmd_t pmd) +{ + return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); +} + /* * THP definitions. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -381,7 +402,14 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); + pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + + return pmd; +} #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -- cgit From 53fa117bb33c9ebc4c0746b3830e273f5e9b97b7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 10:23:03 +0530 Subject: arm64/mm: Enable THP migration In certain page migration situations, a THP page can be migrated without being split into it's constituent subpages. This saves time required to split a THP and put it back together when required. But it also saves an wider address range translation covered by a single TLB entry, reducing future page fault costs. A previous patch changed platform THP helpers per generic memory semantics, clearing the path for THP migration support. This adds two more THP helpers required to create PMD migration swap entries. Now enable THP migration via ARCH_ENABLE_THP_MIGRATION. Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Suzuki Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599627183-14453-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ arch/arm64/include/asm/pgtable.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..e21b94061780 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1876,6 +1876,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on HUGETLB_PAGE && MIGRATION +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends on TRANSPARENT_HUGEPAGE + menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d8258ae8fce0..bc68da9f5706 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -875,6 +875,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(swp) __pmd((swp).val) +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + /* * Ensure that there are not more swap files than can be encoded in the kernel * PTEs. -- cgit From 4e56de82d4ec9d98ffdc9e0387d8fdecbf496226 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Sep 2020 11:18:55 +0530 Subject: arm64/cpuinfo: Define HWCAP name arrays per their actual bit definitions HWCAP name arrays (hwcap_str, compat_hwcap_str, compat_hwcap2_str) that are scanned for /proc/cpuinfo are detached from their bit definitions making it vulnerable and difficult to correlate. It is also bit problematic because during /proc/cpuinfo dump these arrays get traversed sequentially assuming they reflect and match actual HWCAP bit sequence, to test various features for a given CPU. This redefines name arrays per their HWCAP bit definitions . It also warns after detecting any feature which is not expected on arm64. Signed-off-by: Anshuman Khandual Acked-by: Catalin Marinas Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Brown Cc: Dave Martin Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Suzuki K Poulose Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1599630535-29337-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/hwcap.h | 9 +++ arch/arm64/kernel/cpuinfo.c | 177 +++++++++++++++++++++-------------------- 2 files changed, 101 insertions(+), 85 deletions(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 22f73fe09030..6493a4c63a2f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -8,18 +8,27 @@ #include #include +#define COMPAT_HWCAP_SWP (1 << 0) #define COMPAT_HWCAP_HALF (1 << 1) #define COMPAT_HWCAP_THUMB (1 << 2) +#define COMPAT_HWCAP_26BIT (1 << 3) #define COMPAT_HWCAP_FAST_MULT (1 << 4) +#define COMPAT_HWCAP_FPA (1 << 5) #define COMPAT_HWCAP_VFP (1 << 6) #define COMPAT_HWCAP_EDSP (1 << 7) +#define COMPAT_HWCAP_JAVA (1 << 8) +#define COMPAT_HWCAP_IWMMXT (1 << 9) +#define COMPAT_HWCAP_CRUNCH (1 << 10) +#define COMPAT_HWCAP_THUMBEE (1 << 11) #define COMPAT_HWCAP_NEON (1 << 12) #define COMPAT_HWCAP_VFPv3 (1 << 13) +#define COMPAT_HWCAP_VFPV3D16 (1 << 14) #define COMPAT_HWCAP_TLS (1 << 15) #define COMPAT_HWCAP_VFPv4 (1 << 16) #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_VFPD32 (1 << 19) #define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index d0076c2159e6..25113245825c 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -43,94 +43,92 @@ static const char *icache_policy_str[] = { unsigned long __icache_flags; static const char *const hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - "atomics", - "fphp", - "asimdhp", - "cpuid", - "asimdrdm", - "jscvt", - "fcma", - "lrcpc", - "dcpop", - "sha3", - "sm3", - "sm4", - "asimddp", - "sha512", - "sve", - "asimdfhm", - "dit", - "uscat", - "ilrcpc", - "flagm", - "ssbs", - "sb", - "paca", - "pacg", - "dcpodp", - "sve2", - "sveaes", - "svepmull", - "svebitperm", - "svesha3", - "svesm4", - "flagm2", - "frint", - "svei8mm", - "svef32mm", - "svef64mm", - "svebf16", - "i8mm", - "bf16", - "dgh", - "rng", - "bti", - /* reserved for "mte" */ - NULL + [KERNEL_HWCAP_FP] = "fp", + [KERNEL_HWCAP_ASIMD] = "asimd", + [KERNEL_HWCAP_EVTSTRM] = "evtstrm", + [KERNEL_HWCAP_AES] = "aes", + [KERNEL_HWCAP_PMULL] = "pmull", + [KERNEL_HWCAP_SHA1] = "sha1", + [KERNEL_HWCAP_SHA2] = "sha2", + [KERNEL_HWCAP_CRC32] = "crc32", + [KERNEL_HWCAP_ATOMICS] = "atomics", + [KERNEL_HWCAP_FPHP] = "fphp", + [KERNEL_HWCAP_ASIMDHP] = "asimdhp", + [KERNEL_HWCAP_CPUID] = "cpuid", + [KERNEL_HWCAP_ASIMDRDM] = "asimdrdm", + [KERNEL_HWCAP_JSCVT] = "jscvt", + [KERNEL_HWCAP_FCMA] = "fcma", + [KERNEL_HWCAP_LRCPC] = "lrcpc", + [KERNEL_HWCAP_DCPOP] = "dcpop", + [KERNEL_HWCAP_SHA3] = "sha3", + [KERNEL_HWCAP_SM3] = "sm3", + [KERNEL_HWCAP_SM4] = "sm4", + [KERNEL_HWCAP_ASIMDDP] = "asimddp", + [KERNEL_HWCAP_SHA512] = "sha512", + [KERNEL_HWCAP_SVE] = "sve", + [KERNEL_HWCAP_ASIMDFHM] = "asimdfhm", + [KERNEL_HWCAP_DIT] = "dit", + [KERNEL_HWCAP_USCAT] = "uscat", + [KERNEL_HWCAP_ILRCPC] = "ilrcpc", + [KERNEL_HWCAP_FLAGM] = "flagm", + [KERNEL_HWCAP_SSBS] = "ssbs", + [KERNEL_HWCAP_SB] = "sb", + [KERNEL_HWCAP_PACA] = "paca", + [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_DCPODP] = "dcpodp", + [KERNEL_HWCAP_SVE2] = "sve2", + [KERNEL_HWCAP_SVEAES] = "sveaes", + [KERNEL_HWCAP_SVEPMULL] = "svepmull", + [KERNEL_HWCAP_SVEBITPERM] = "svebitperm", + [KERNEL_HWCAP_SVESHA3] = "svesha3", + [KERNEL_HWCAP_SVESM4] = "svesm4", + [KERNEL_HWCAP_FLAGM2] = "flagm2", + [KERNEL_HWCAP_FRINT] = "frint", + [KERNEL_HWCAP_SVEI8MM] = "svei8mm", + [KERNEL_HWCAP_SVEF32MM] = "svef32mm", + [KERNEL_HWCAP_SVEF64MM] = "svef64mm", + [KERNEL_HWCAP_SVEBF16] = "svebf16", + [KERNEL_HWCAP_I8MM] = "i8mm", + [KERNEL_HWCAP_BF16] = "bf16", + [KERNEL_HWCAP_DGH] = "dgh", + [KERNEL_HWCAP_RNG] = "rng", + [KERNEL_HWCAP_BTI] = "bti", }; #ifdef CONFIG_COMPAT +#define COMPAT_KERNEL_HWCAP(x) const_ilog2(COMPAT_HWCAP_ ## x) static const char *const compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm", - NULL + [COMPAT_KERNEL_HWCAP(SWP)] = "swp", + [COMPAT_KERNEL_HWCAP(HALF)] = "half", + [COMPAT_KERNEL_HWCAP(THUMB)] = "thumb", + [COMPAT_KERNEL_HWCAP(26BIT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult", + [COMPAT_KERNEL_HWCAP(FPA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(VFP)] = "vfp", + [COMPAT_KERNEL_HWCAP(EDSP)] = "edsp", + [COMPAT_KERNEL_HWCAP(JAVA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(IWMMXT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(CRUNCH)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(THUMBEE)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(NEON)] = "neon", + [COMPAT_KERNEL_HWCAP(VFPv3)] = "vfpv3", + [COMPAT_KERNEL_HWCAP(VFPV3D16)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(TLS)] = "tls", + [COMPAT_KERNEL_HWCAP(VFPv4)] = "vfpv4", + [COMPAT_KERNEL_HWCAP(IDIVA)] = "idiva", + [COMPAT_KERNEL_HWCAP(IDIVT)] = "idivt", + [COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(LPAE)] = "lpae", + [COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm", }; +#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x) static const char *const compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL + [COMPAT_KERNEL_HWCAP2(AES)] = "aes", + [COMPAT_KERNEL_HWCAP2(PMULL)] = "pmull", + [COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1", + [COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2", + [COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32", }; #endif /* CONFIG_COMPAT */ @@ -166,16 +164,25 @@ static int c_show(struct seq_file *m, void *v) seq_puts(m, "Features\t:"); if (compat) { #ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) + for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) { + if (compat_elf_hwcap & (1 << j)) { + /* + * Warn once if any feature should not + * have been present on arm64 platform. + */ + if (WARN_ON_ONCE(!compat_hwcap_str[j])) + continue; + seq_printf(m, " %s", compat_hwcap_str[j]); + } + } - for (j = 0; compat_hwcap2_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++) if (compat_elf_hwcap2 & (1 << j)) seq_printf(m, " %s", compat_hwcap2_str[j]); #endif /* CONFIG_COMPAT */ } else { - for (j = 0; hwcap_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(hwcap_str); j++) if (cpu_have_feature(j)) seq_printf(m, " %s", hwcap_str[j]); } -- cgit From 11e339d53a739e4cf885c1e2a843a4004204d271 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Sep 2020 19:59:34 +1000 Subject: arm64/mm: Remove CONT_RANGE_OFFSET The macro was introduced by commit ("arm64: PTE/PMD contiguous bit definition") at the beginning. It's only used by commit <348a65cdcbbf> ("arm64: Mark kernel page ranges contiguous"), which was reverted later by commit <667c27597ca8>. This makes the macro unused. This removes the unused macro (CONT_RANGE_OFFSET). Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200910095936.20307-1-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d400a4d9aee2..8a399e666837 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -98,8 +98,6 @@ #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -/* the numerical offset of the PTE within a range of CONT_PTES */ -#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) /* * Hardware page table definitions. -- cgit From c0d6de327f18d39ef759aa883bffc3c32bc69015 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Sep 2020 19:59:35 +1000 Subject: arm64/mm: Unify CONT_PTE_SHIFT CONT_PTE_SHIFT actually depends on CONFIG_ARM64_CONT_SHIFT. It's reasonable to reflect the dependency: * This renames CONFIG_ARM64_CONT_SHIFT to CONFIG_ARM64_CONT_PTE_SHIFT, so that we can introduce CONFIG_ARM64_CONT_PMD_SHIFT later. * CONT_{SHIFT, SIZE, MASK}, defined in page-def.h are removed as they are not used by anyone. * CONT_PTE_SHIFT is determined by CONFIG_ARM64_CONT_PTE_SHIFT. Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200910095936.20307-2-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/page-def.h | 5 ----- arch/arm64/include/asm/pgtable-hwdef.h | 4 +--- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e21b94061780..4f3b7b24b5e5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -211,7 +211,7 @@ config ARM64_PAGE_SHIFT default 14 if ARM64_16K_PAGES default 12 -config ARM64_CONT_SHIFT +config ARM64_CONT_PTE_SHIFT int default 5 if ARM64_64K_PAGES default 7 if ARM64_16K_PAGES diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index f99d48ecbeef..2403f7b4cdbf 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,13 +11,8 @@ #include /* PAGE_SHIFT determines the page size */ -/* CONT_SHIFT determines the number of pages which can be tracked together */ #define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) -#define CONT_MASK (~(CONT_SIZE-1)) - #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 8a399e666837..6c9c67f62551 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -81,14 +81,12 @@ /* * Contiguous page definitions. */ +#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) #ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PTE_SHIFT (5 + PAGE_SHIFT) #define CONT_PMD_SHIFT (5 + PMD_SHIFT) #elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PTE_SHIFT (7 + PAGE_SHIFT) #define CONT_PMD_SHIFT (5 + PMD_SHIFT) #else -#define CONT_PTE_SHIFT (4 + PAGE_SHIFT) #define CONT_PMD_SHIFT (4 + PMD_SHIFT) #endif -- cgit From e676594115f0bcc12d4791f9ee919e20d8d750ee Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Sep 2020 19:59:36 +1000 Subject: arm64/mm: Unify CONT_PMD_SHIFT Similar to how CONT_PTE_SHIFT is determined, this introduces a new kernel option (CONFIG_CONT_PMD_SHIFT) to determine CONT_PMD_SHIFT. Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200910095936.20307-3-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 ++++++ arch/arm64/include/asm/pgtable-hwdef.h | 10 ++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4f3b7b24b5e5..609629e36779 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -217,6 +217,12 @@ config ARM64_CONT_PTE_SHIFT default 7 if ARM64_16K_PAGES default 4 +config ARM64_CONT_PMD_SHIFT + int + default 5 if ARM64_64K_PAGES + default 5 if ARM64_16K_PAGES + default 4 + config ARCH_MMAP_RND_BITS_MIN default 14 if ARM64_64K_PAGES default 16 if ARM64_16K_PAGES diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 6c9c67f62551..94b3f2ac2e9d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -82,17 +82,11 @@ * Contiguous page definitions. */ #define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) -#ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#else -#define CONT_PMD_SHIFT (4 + PMD_SHIFT) -#endif - #define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) + +#define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -- cgit From 2a493132146152c21587414d96ba9026e43acc3d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 9 Sep 2020 17:28:02 +0800 Subject: arm64: Remove the unused include statements linux/arm-smccc.h is included more than once, Remove the one that isn't necessary. Signed-off-by: Tian Tao Reviewed-by: Steven Price Link: https://lore.kernel.org/r/1599643682-10404-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c332d49780dc..9df3f9bb7950 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -169,8 +169,6 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM_INDIRECT_VECTORS */ -#include - static void __maybe_unused call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); -- cgit From 72789a4a6a914601912a8992b0d43bd5abc05128 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Fri, 28 Aug 2020 21:39:57 +0800 Subject: arm64/relocate_kernel: remove redundant code Kernel startup entry point requires disabling MMU and D-cache. As for kexec-reboot, taking a close look at "msr sctlr_el1, x12" in __cpu_soft_restart as the following: -1. booted at EL1 The instruction is enough to disable MMU and I/D cache for EL1 regime. -2. booted at EL2, using VHE Access to SCTLR_EL1 is redirected to SCTLR_EL2 in EL2. So the instruction is enough to disable MMU and clear I+C bits for EL2 regime. -3. booted at EL2, not using VHE The instruction itself can not affect EL2 regime. But The hyp-stub doesn't enable the MMU and I/D cache for EL2 regime. And KVM also disable them for EL2 regime when its unloaded, or execute a HVC_SOFT_RESTART call. So when kexec-reboot, the code in KVM has prepare the requirement. As a conclusion, disabling MMU and clearing I+C bits in SYM_CODE_START(arm64_relocate_new_kernel) is redundant, and can be removed Signed-off-by: Pingfan Liu Cc: James Morse Cc: Geoff Levand Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Kees Cook Cc: Remi Denis-Courmont Cc: Ard Biesheuvel Cc: kvmarm@lists.cs.columbia.edu Link: https://lore.kernel.org/r/1598621998-20563-1-git-send-email-kernelfans@gmail.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu-reset.S | 4 ++++ arch/arm64/kernel/relocate_kernel.S | 12 ------------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 4a18055b2ff9..37721eb6f9a1 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -35,6 +35,10 @@ SYM_CODE_START(__cpu_soft_restart) mov_q x13, SCTLR_ELx_FLAGS bic x12, x12, x13 pre_disable_mmu_workaround + /* + * either disable EL1&0 translation regime or disable EL2&0 translation + * regime if HCR_EL2.E2H == 1 + */ msr sctlr_el1, x12 isb diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 542d6edc6806..84eec95ec06c 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -36,18 +36,6 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ - /* Clear the sctlr_el2 flags. */ - mrs x0, CurrentEL - cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 - mov_q x1, SCTLR_ELx_FLAGS - bic x0, x0, x1 - pre_disable_mmu_workaround - msr sctlr_el2, x0 - isb -1: - /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone -- cgit From 3a1793066fdfbbd657d9591cc03d6e1b6634a587 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Fri, 28 Aug 2020 21:39:58 +0800 Subject: Documentation/kvm/arm: improve description of HVC_SOFT_RESTART Besides disabling MMU, HVC_SOFT_RESTART also clears I+D bits. These behaviors are what kexec-reboot expects, so describe it more precisely. Signed-off-by: Pingfan Liu Cc: James Morse Cc: Marc Zyngier Cc: Geoff Levand Cc: Catalin Marinas Cc: Will Deacon Cc: Julien Thierry Cc: Suzuki K Poulose Cc: linux-doc@vger.kernel.org Cc: kvmarm@lists.cs.columbia.edu Link: https://lore.kernel.org/r/1598621998-20563-2-git-send-email-kernelfans@gmail.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Will Deacon --- Documentation/virt/kvm/arm/hyp-abi.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst index d9eba93aa364..83cadd8186fa 100644 --- a/Documentation/virt/kvm/arm/hyp-abi.rst +++ b/Documentation/virt/kvm/arm/hyp-abi.rst @@ -54,9 +54,9 @@ these functions (see arch/arm{,64}/include/asm/virt.h): x3 = x1's value when entering the next payload (arm64) x4 = x2's value when entering the next payload (arm64) - Mask all exceptions, disable the MMU, move the arguments into place - (arm64 only), and jump to the restart address while at HYP/EL2. This - hypercall is not expected to return to its caller. + Mask all exceptions, disable the MMU, clear I+D bits, move the arguments + into place (arm64 only), and jump to the restart address while at HYP/EL2. + This hypercall is not expected to return to its caller. Any other value of r0/x0 triggers a hypervisor-specific handling, which is not documented here. -- cgit From 93396936ed0ce2c6f44140bd14728611d0bb065e Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:51 +0530 Subject: arm64: kprobe: add checks for ARMv8.3-PAuth combined instructions Currently the ARMv8.3-PAuth combined branch instructions (braa, retaa etc.) are not simulated for out-of-line execution with a handler. Hence the uprobe of such instructions leads to kernel warnings in a loop as they are not explicitly checked and fall into INSN_GOOD categories. Other combined instructions like LDRAA and LDRBB can be probed. The issue of the combined branch instructions is fixed by adding group definitions of all such instructions and rejecting their probes. The instruction groups added are br_auth(braa, brab, braaz and brabz), blr_auth(blraa, blrab, blraaz and blrabz), ret_auth(retaa and retab) and eret_auth(eretaa and eretab). Warning log: WARNING: CPU: 0 PID: 156 at arch/arm64/kernel/probes/uprobes.c:182 uprobe_single_step_handler+0x34/0x50 Modules linked in: CPU: 0 PID: 156 Comm: func Not tainted 5.9.0-rc3 #188 Hardware name: Foundation-v8A (DT) pstate: 804003c9 (Nzcv DAIF +PAN -UAO BTYPE=--) pc : uprobe_single_step_handler+0x34/0x50 lr : single_step_handler+0x70/0xf8 sp : ffff800012af3e30 x29: ffff800012af3e30 x28: ffff000878723b00 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 x23: 0000000060001000 x22: 00000000cb000022 x21: ffff800012065ce8 x20: ffff800012af3ec0 x19: ffff800012068d50 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffff800010085c90 x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff80001205a9c8 x5 : ffff80001205a000 x4 : ffff80001233db80 x3 : ffff8000100a7a60 x2 : 0020000000000003 x1 : 0000fffffffff008 x0 : ffff800012af3ec0 Call trace: uprobe_single_step_handler+0x34/0x50 single_step_handler+0x70/0xf8 do_debug_exception+0xb8/0x130 el0_sync_handler+0x138/0x1b8 el0_sync+0x158/0x180 Fixes: 74afda4016a7 ("arm64: compile the kernel with ptrauth return address signing") Fixes: 04ca3204fa09 ("arm64: enable pointer authentication") Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-2-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 4 ++++ arch/arm64/kernel/insn.c | 5 ++++- arch/arm64/kernel/probes/decode-insn.c | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 0bc46149e491..4b39293d0f72 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -359,9 +359,13 @@ __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) __AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) +__AARCH64_INSN_FUNCS(br_auth, 0xFEFFF800, 0xD61F0800) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) +__AARCH64_INSN_FUNCS(blr_auth, 0xFEFFF800, 0xD63F0800) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(ret_auth, 0xFFFFFBFF, 0xD65F0BFF) __AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF) __AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) __AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index a107375005bc..ccc8c9e22b25 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -176,7 +176,7 @@ bool __kprobes aarch64_insn_uses_literal(u32 insn) bool __kprobes aarch64_insn_is_branch(u32 insn) { - /* b, bl, cb*, tb*, b.cond, br, blr */ + /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */ return aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) || @@ -185,8 +185,11 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) || aarch64_insn_is_ret(insn) || + aarch64_insn_is_ret_auth(insn) || aarch64_insn_is_br(insn) || + aarch64_insn_is_br_auth(insn) || aarch64_insn_is_blr(insn) || + aarch64_insn_is_blr_auth(insn) || aarch64_insn_is_bcond(insn); } diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 263d5fba4c8a..c541fb48886e 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -29,7 +29,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) aarch64_insn_is_msr_imm(insn) || aarch64_insn_is_msr_reg(insn) || aarch64_insn_is_exception(insn) || - aarch64_insn_is_eret(insn)) + aarch64_insn_is_eret(insn) || + aarch64_insn_is_eret_auth(insn)) return false; /* -- cgit From 4ef333b2d10680b5d966a733ed7171f72164fcd5 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:52 +0530 Subject: arm64: traps: Allow force_signal_inject to pass esr error code Some error signal need to pass proper ARM esr error code to userspace to better identify the cause of the signal. So the function force_signal_inject is extended to pass this as a parameter. The existing code is not affected by this change. Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-3-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/traps.h | 2 +- arch/arm64/kernel/fpsimd.c | 4 ++-- arch/arm64/kernel/traps.c | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index cee5928e1b7d..d96dc2c7c09d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -24,7 +24,7 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -void force_signal_inject(int signal, int code, unsigned long address); +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str); void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 55c8f3ec6705..77484359d44a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -312,7 +312,7 @@ static void fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject(SIGKILL, SI_KERNEL, 0); + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); return; } @@ -936,7 +936,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..29fd00fe94f2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -412,7 +412,7 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, unsigned long address) +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err) { const char *desc; struct pt_regs *regs = current_pt_regs(); @@ -438,7 +438,7 @@ void force_signal_inject(int signal, int code, unsigned long address) signal = SIGKILL; } - arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0); + arm64_notify_die(desc, regs, signal, code, (void __user *)address, err); } /* @@ -455,7 +455,7 @@ void arm64_notify_segfault(unsigned long addr) code = SEGV_ACCERR; mmap_read_unlock(current->mm); - force_signal_inject(SIGSEGV, code, addr); + force_signal_inject(SIGSEGV, code, addr, 0); } void do_undefinstr(struct pt_regs *regs) @@ -468,14 +468,14 @@ void do_undefinstr(struct pt_regs *regs) return; BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_undefinstr); void do_bti(struct pt_regs *regs) { BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_bti); @@ -528,7 +528,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) __user_cache_maint("ic ivau", address, ret); break; default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } @@ -581,7 +581,7 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) sysreg = esr_sys64_to_sysreg(esr); if (do_emulate_mrs(regs, sysreg, rt) != 0) - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } static void wfi_handler(unsigned int esr, struct pt_regs *regs) -- cgit From e16aeb072682d3dcdbdad452c974baa0d2b0c6db Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:53 +0530 Subject: arm64: ptrauth: Introduce Armv8.3 pointer authentication enhancements Some Armv8.3 Pointer Authentication enhancements have been introduced which are mandatory for Armv8.6 and optional for Armv8.3. These features are, * ARMv8.3-PAuth2 - An enhanced PAC generation logic is added which hardens finding the correct PAC value of the authenticated pointer. * ARMv8.3-FPAC - Fault is generated now when the ptrauth authentication instruction fails in authenticating the PAC present in the address. This is different from earlier case when such failures just adds an error code in the top byte and waits for subsequent load/store to abort. The ptrauth instructions which may cause this fault are autiasp, retaa etc. The above features are now represented by additional configurations for the Address Authentication cpufeature and a new ESR exception class. The userspace fault received in the kernel due to ARMv8.3-FPAC is treated as Illegal instruction and hence signal SIGILL is injected with ILL_ILLOPN as the signal code. Note that this is different from earlier ARMv8.3 ptrauth where signal SIGSEGV is issued due to Pointer authentication failures. The in-kernel PAC fault causes kernel to crash. Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-4-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 4 +++- arch/arm64/include/asm/exception.h | 1 + arch/arm64/include/asm/sysreg.h | 24 ++++++++++++++++-------- arch/arm64/kernel/entry-common.c | 21 +++++++++++++++++++++ arch/arm64/kernel/traps.c | 12 ++++++++++++ 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 035003acfa87..22c81f1edda2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -35,7 +35,9 @@ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ -/* Unallocated EC: 0x1b - 0x1E */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ +/* Unallocated EC: 0x1D - 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7577a754d443..99b9383cd036 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -47,4 +47,5 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 554a7e8ecb07..b738bc793369 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -636,14 +636,22 @@ #define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_DPB_SHIFT 0 -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 +#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index d3be9dbf5490..43d4c329775f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -66,6 +66,13 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(el1_dbg); +static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +{ + local_daif_inherit(regs); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el1_fpac); + asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -92,6 +99,9 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el1_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el1_fpac(regs, esr); + break; default: el1_inv(regs, esr); } @@ -227,6 +237,14 @@ static void notrace el0_svc(struct pt_regs *regs) } NOKPROBE_SYMBOL(el0_svc); +static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el0_fpac); + asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -272,6 +290,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el0_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el0_fpac(regs, esr); + break; default: el0_inv(regs, esr); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 29fd00fe94f2..b24f81197a68 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -479,6 +479,17 @@ void do_bti(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_bti); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr) +{ + /* + * Unexpected FPAC exception or pointer authentication failure in + * the kernel: kill the task before it does any more harm. + */ + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); +} +NOKPROBE_SYMBOL(do_ptrauth_fault); + #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -775,6 +786,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", + [ESR_ELx_EC_FPAC] = "FPAC", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", -- cgit From ba9d1d3e3e7c34826b62498f7d6563b73c22ac13 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:54 +0530 Subject: arm64: cpufeature: Modify address authentication cpufeature to exact The current address authentication cpufeature levels are set as LOWER_SAFE which is not compatible with the different configurations added for Armv8.3 ptrauth enhancements as the different levels have different behaviour and there is no tunable to enable the lower safe versions. This is rectified by setting those cpufeature type as EXACT. The current cpufeature framework also does not interfere in the booting of non-exact secondary cpus but rather marks them as tainted. As a workaround this is fixed by replacing the generic match handler with a new handler specific to ptrauth. After this change, if there is any variation in ptrauth configurations in secondary cpus from boot cpu then those mismatched cpus are parked in an infinite loop. Following ptrauth crash log is observed in Arm fastmodel with simulated mismatched cpus without this fix, CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64ISAR1_EL1. Boot CPU: 0x11111110211402, CPU4: 0x11111110211102 CPU features: Unsupported CPU feature variation detected. GICv3: CPU4: found redistributor 100 region 0:0x000000002f180000 CPU4: Booted secondary processor 0x0000000100 [0x410fd0f0] Unable to handle kernel paging request at virtual address bfff800010dadf3c Mem abort info: ESR = 0x86000004 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 [bfff800010dadf3c] address between user and kernel address ranges Internal error: Oops: 86000004 [#1] PREEMPT SMP Modules linked in: CPU: 4 PID: 29 Comm: migration/4 Tainted: G S 5.8.0-rc4-00005-ge658591d66d1-dirty #158 Hardware name: Foundation-v8A (DT) pstate: 60000089 (nZCv daIf -PAN -UAO BTYPE=--) pc : 0xbfff800010dadf3c lr : __schedule+0x2b4/0x5a8 sp : ffff800012043d70 x29: ffff800012043d70 x28: 0080000000000000 x27: ffff800011cbe000 x26: ffff00087ad37580 x25: ffff00087ad37000 x24: ffff800010de7d50 x23: ffff800011674018 x22: 0784800010dae2a8 x21: ffff00087ad37000 x20: ffff00087acb8000 x19: ffff00087f742100 x18: 0000000000000030 x17: 0000000000000000 x16: 0000000000000000 x15: ffff800011ac1000 x14: 00000000000001bd x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 71519a147ddfeb82 x9 : 825d5ec0fb246314 x8 : ffff00087ad37dd8 x7 : 0000000000000000 x6 : 00000000fffedb0e x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000028 x2 : ffff80086e11e000 x1 : ffff00087ad37000 x0 : ffff00087acdc600 Call trace: 0xbfff800010dadf3c schedule+0x78/0x110 schedule_preempt_disabled+0x24/0x40 __kthread_parkme+0x68/0xd0 kthread+0x138/0x160 ret_from_fork+0x10/0x34 Code: bad PC value After this fix, the mismatched CPU4 is parked as, CPU features: CPU4: Detected conflict for capability 39 (Address authentication (IMP DEF algorithm)), System: 1, CPU: 0 CPU4: will not boot CPU4: failed to come online CPU4: died during early boot [Suzuki: Introduce new matching function for address authentication] Suggested-by: Suzuki K Poulose Signed-off-by: Amit Daniel Kachhap Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200914083656.21428-5-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 44 +++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6424584be01e..fdbc26c0d712 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -197,9 +197,9 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1648,11 +1648,37 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH -static bool has_address_auth(const struct arm64_cpu_capabilities *entry, - int __unused) +static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope) { - return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF); + int boot_val, sec_val; + + /* We don't expect to be called with SCOPE_SYSTEM */ + WARN_ON(scope == SCOPE_SYSTEM); + /* + * The ptr-auth feature levels are not intercompatible with lower + * levels. Hence we must match ptr-auth feature level of the secondary + * CPUs with that of the boot CPU. The level of boot cpu is fetched + * from the sanitised register whereas direct register read is done for + * the secondary CPUs. + * The sanitised feature state is guaranteed to match that of the + * boot CPU as a mismatched secondary CPU is parked before it gets + * a chance to update the state, with the capability. + */ + boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg), + entry->field_pos, entry->sign); + if (scope & SCOPE_BOOT_CPU) + return boot_val >= entry->min_field_value; + /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ + sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), + entry->field_pos, entry->sign); + return sec_val == boot_val; +} + +static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || + has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, @@ -2021,7 +2047,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, - .matches = has_cpuid_feature, + .matches = has_address_auth_cpucap, }, { .desc = "Address authentication (IMP DEF algorithm)", @@ -2031,12 +2057,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, - .matches = has_cpuid_feature, + .matches = has_address_auth_cpucap, }, { .capability = ARM64_HAS_ADDRESS_AUTH, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, - .matches = has_address_auth, + .matches = has_address_auth_metacap, }, { .desc = "Generic authentication (architected algorithm)", -- cgit From 6560edca515e53bb2e7c637ab324313680a133f4 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:55 +0530 Subject: arm64: kprobe: disable probe of fault prone ptrauth instruction With the addition of ARMv8.3-FPAC feature, the probe of authenticate ptrauth instructions (AUT*) may cause ptrauth fault exception in case of authenticate failure so they cannot be safely single stepped. Hence the probe of authenticate instructions is disallowed but the corresponding pac ptrauth instruction (PAC*) is not affected and they can still be probed. Also AUTH* instructions do not make sense at function entry points so most realistic probes would be unaffected by this change. Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-6-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index ccc8c9e22b25..6c0de2f60ea9 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -60,16 +60,10 @@ bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) case AARCH64_INSN_HINT_XPACLRI: case AARCH64_INSN_HINT_PACIA_1716: case AARCH64_INSN_HINT_PACIB_1716: - case AARCH64_INSN_HINT_AUTIA_1716: - case AARCH64_INSN_HINT_AUTIB_1716: case AARCH64_INSN_HINT_PACIAZ: case AARCH64_INSN_HINT_PACIASP: case AARCH64_INSN_HINT_PACIBZ: case AARCH64_INSN_HINT_PACIBSP: - case AARCH64_INSN_HINT_AUTIAZ: - case AARCH64_INSN_HINT_AUTIASP: - case AARCH64_INSN_HINT_AUTIBZ: - case AARCH64_INSN_HINT_AUTIBSP: case AARCH64_INSN_HINT_BTI: case AARCH64_INSN_HINT_BTIC: case AARCH64_INSN_HINT_BTIJ: -- cgit From 03c9c8fad6cb5e8fdfb40287fa1cdf8ee2db0b67 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 14 Sep 2020 14:06:56 +0530 Subject: arm64: kprobe: clarify the comment of steppable hint instructions The existing comment about steppable hint instruction is not complete and only describes NOP instructions as steppable. As the function aarch64_insn_is_steppable_hint allows all white-listed instruction to be probed so the comment is updated to reflect this. Signed-off-by: Amit Daniel Kachhap Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200914083656.21428-7-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/decode-insn.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index c541fb48886e..104101f633b1 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -43,8 +43,10 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) != AARCH64_INSN_SPCLREG_DAIF; /* - * The HINT instruction is is problematic when single-stepping, - * except for the NOP case. + * The HINT instruction is steppable only if it is in whitelist + * and the rest of other such instructions are blocked for + * single stepping as they may cause exception or other + * unintended behaviour. */ if (aarch64_insn_is_hint(insn)) return aarch64_insn_is_steppable_hint(insn); -- cgit From 2cf660eb81e93f58ae31215af67fee8499901dd9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 14 Sep 2020 09:47:30 +1000 Subject: arm64/mm: Refactor {pgd, pud, pmd, pte}_ERROR() The function __{pgd, pud, pmd, pte}_error() are introduced so that they can be called by {pgd, pud, pmd, pte}_ERROR(). However, some of the functions could never be called when the corresponding page table level isn't enabled. For example, __{pud, pmd}_error() are unused when PUD and PMD are folded to PGD. This removes __{pgd, pud, pmd, pte}_error() and call pr_err() from {pgd, pud, pmd, pte}_ERROR() directly, similar to what x86/powerpc are doing. With this, the code looks a bit simplified either. Signed-off-by: Gavin Shan Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20200913234730.23145-1-gshan@redhat.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 17 ++++++++--------- arch/arm64/kernel/traps.c | 20 -------------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..e0ab81923c30 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,11 +35,6 @@ extern struct page *vmemmap; -extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); -extern void __pud_error(const char *file, int line, unsigned long val); -extern void __pgd_error(const char *file, int line, unsigned long val); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -57,7 +52,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) /* * Macros to convert between a physical address and its placement in a @@ -541,7 +537,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #if CONFIG_PGTABLE_LEVELS > 2 -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) @@ -608,7 +605,8 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 -#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) @@ -667,7 +665,8 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index af25cedf54e9..1e48b869be21 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -935,26 +935,6 @@ asmlinkage void enter_from_user_mode(void) } NOKPROBE_SYMBOL(enter_from_user_mode); -void __pte_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pte %016lx.\n", file, line, val); -} - -void __pmd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); -} - -void __pud_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pud %016lx.\n", file, line, val); -} - -void __pgd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); -} - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) -- cgit From 118bb62f271a2b3e2f6ab058b6f43d2601f85480 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 14 Sep 2020 15:28:42 +0800 Subject: arm64: hibernate: Remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1600068522-54499-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 68e14152d6e9..735cfd8a744f 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include -- cgit From 2b694fc92a34e8c8b774a17266656d72b8cd4429 Mon Sep 17 00:00:00 2001 From: Tuan Phan Date: Mon, 14 Sep 2020 11:04:16 -0700 Subject: perf: arm_dsu: Support DSU ACPI devices Add support for probing device from ACPI node. Each DSU ACPI node and its associated cpus are inside a cluster node. Signed-off-by: Tuan Phan Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/1600106656-9542-1-git-send-email-tuanphan@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm_dsu_pmu.c | 63 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 96ed93cc78e6..98e68ed7db85 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -11,6 +11,7 @@ #define DRVNAME PMUNAME "_pmu" #define pr_fmt(fmt) DRVNAME ": " fmt +#include #include #include #include @@ -603,18 +604,19 @@ static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev) } /** - * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster. + * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster + * from device tree. */ -static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask) +static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask) { int i = 0, n, cpu; struct device_node *cpu_node; - n = of_count_phandle_with_args(dev, "cpus", NULL); + n = of_count_phandle_with_args(dev->of_node, "cpus", NULL); if (n <= 0) return -ENODEV; for (; i < n; i++) { - cpu_node = of_parse_phandle(dev, "cpus", i); + cpu_node = of_parse_phandle(dev->of_node, "cpus", i); if (!cpu_node) break; cpu = of_cpu_node_to_id(cpu_node); @@ -631,6 +633,36 @@ static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask) return 0; } +/** + * dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster + * from ACPI. + */ +static int dsu_pmu_acpi_get_cpus(struct device *dev, cpumask_t *mask) +{ +#ifdef CONFIG_ACPI + int cpu; + + /* + * A dsu pmu node is inside a cluster parent node along with cpu nodes. + * We need to find out all cpus that have the same parent with this pmu. + */ + for_each_possible_cpu(cpu) { + struct acpi_device *acpi_dev; + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpu_dev) + continue; + + acpi_dev = ACPI_COMPANION(cpu_dev); + if (acpi_dev && + acpi_dev->parent == ACPI_COMPANION(dev)->parent) + cpumask_set_cpu(cpu, mask); + } +#endif + + return 0; +} + /* * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster. */ @@ -676,6 +708,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) { int irq, rc; struct dsu_pmu *dsu_pmu; + struct fwnode_handle *fwnode = dev_fwnode(&pdev->dev); char *name; static atomic_t pmu_idx = ATOMIC_INIT(-1); @@ -683,7 +716,16 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) if (IS_ERR(dsu_pmu)) return PTR_ERR(dsu_pmu); - rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus); + if (IS_ERR_OR_NULL(fwnode)) + return -ENOENT; + + if (is_of_node(fwnode)) + rc = dsu_pmu_dt_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus); + else if (is_acpi_device_node(fwnode)) + rc = dsu_pmu_acpi_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus); + else + return -ENOENT; + if (rc) { dev_warn(&pdev->dev, "Failed to parse the CPUs\n"); return rc; @@ -752,11 +794,21 @@ static const struct of_device_id dsu_pmu_of_match[] = { { .compatible = "arm,dsu-pmu", }, {}, }; +MODULE_DEVICE_TABLE(of, dsu_pmu_of_match); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id dsu_pmu_acpi_match[] = { + { "ARMHD500", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, dsu_pmu_acpi_match); +#endif static struct platform_driver dsu_pmu_driver = { .driver = { .name = DRVNAME, .of_match_table = of_match_ptr(dsu_pmu_of_match), + .acpi_match_table = ACPI_PTR(dsu_pmu_acpi_match), .suppress_bind_attrs = true, }, .probe = dsu_pmu_device_probe, @@ -826,7 +878,6 @@ static void __exit dsu_pmu_exit(void) module_init(dsu_pmu_init); module_exit(dsu_pmu_exit); -MODULE_DEVICE_TABLE(of, dsu_pmu_of_match); MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit"); MODULE_AUTHOR("Suzuki K Poulose "); MODULE_LICENSE("GPL v2"); -- cgit From 5fd39dc220279108131edbc85832a5fef821a826 Mon Sep 17 00:00:00 2001 From: Clint Sbisa Date: Fri, 18 Sep 2020 03:33:12 +0000 Subject: arm64: Enable PCI write-combine resources under sysfs This change exposes write-combine mappings under sysfs for prefetchable PCI resources on arm64. Originally, the usage of "write combine" here was driven by the x86 definition of write combine. This definition is specific to x86 and does not generalize to other architectures. However, the usage of WC has mutated to "write combine" semantics, which is implemented differently on each arch. Generally, prefetchable BARs are accepted to allow speculative accesses, write combining, and re-ordering-- from the PCI perspective, this means there are no read side effects. (This contradicts the PCI spec which allows prefetchable BARs to have read side effects, but this definition is ill-advised as it is impossible to meet.) On x86, prefetchable BARs are mapped as WC as originally defined (with some conditionals on arch features). On arm64, WC is taken to mean normal non-cacheable memory. In practice, write combine semantics are used to minimize write operations. A common usage of this is minimizing PCI TLPs which can significantly improve performance with PCI devices. In order to provide the same benefits to userspace, we need to allow userspace to map prefetchable BARs with write combine semantics. The resourceX_wc mapping is used today by userspace programs and libraries. While this model is flawed as "write combine" is very ill-defined, it is already used by multiple non-x86 archs to expose write combine semantics to user space. We enable this on arm64 to give userspace on arm64 an equivalent mechanism for utilizing write combining with PCI devices. Signed-off-by: Clint Sbisa Acked-by: Catalin Marinas Acked-by: Lorenzo Pieralisi Acked-by: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Jason Gunthorpe Cc: Lorenzo Pieralisi Cc: Will Deacon Link: https://lore.kernel.org/r/20200918033312.ddfpibgfylfjpex2@amazon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 70b323cf8300..b33ca260e3c9 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -17,6 +17,7 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) +#define arch_can_pci_mmap_wc() 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; -- cgit From e74e1d55728509b352e4eec4283dd5b2781b2070 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:12 +0100 Subject: kselftests/arm64: add a basic Pointer Authentication test PAuth signs and verifies return addresses on the stack. It does so by inserting a Pointer Authentication code (PAC) into some of the unused top bits of an address. This is achieved by adding paciasp/autiasp instructions at the beginning and end of a function. This feature is partially backwards compatible with earlier versions of the ARM architecture. To coerce the compiler into emitting fully backwards compatible code the main file is compiled to target an earlier ARM version. This allows the tests to check for the feature and print meaningful error messages instead of crashing. Add a test to verify that corrupting the return address results in a SIGSEGV on return. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-2-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/pauth/.gitignore | 1 + tools/testing/selftests/arm64/pauth/Makefile | 32 ++++++++++++++++ tools/testing/selftests/arm64/pauth/helper.h | 9 +++++ tools/testing/selftests/arm64/pauth/pac.c | 44 ++++++++++++++++++++++ .../testing/selftests/arm64/pauth/pac_corruptor.S | 19 ++++++++++ 6 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/pauth/.gitignore create mode 100644 tools/testing/selftests/arm64/pauth/Makefile create mode 100644 tools/testing/selftests/arm64/pauth/helper.h create mode 100644 tools/testing/selftests/arm64/pauth/pac.c create mode 100644 tools/testing/selftests/arm64/pauth/pac_corruptor.S diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 93b567d23c8b..525506fd97b9 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal +ARM64_SUBTARGETS ?= tags signal pauth else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore new file mode 100644 index 000000000000..b557c916720a --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -0,0 +1 @@ +pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile new file mode 100644 index 000000000000..01d35aaa610a --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -mbranch-protection=pac-ret +# check if the compiler supports ARMv8.3 and branch protection with PAuth +pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(pauth_cc_support),1) +TEST_GEN_PROGS := pac +TEST_GEN_FILES := pac_corruptor.o +endif + +include ../../lib.mk + +ifeq ($(pauth_cc_support),1) +# pac* and aut* instructions are not available on architectures berfore +# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly +$(OUTPUT)/pac_corruptor.o: pac_corruptor.S + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or +# greater, gcc emits pac* instructions which are not in HINT NOP space, +# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can +# run on earlier targets and print a meaningful error messages +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a +endif diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h new file mode 100644 index 000000000000..3e0a2a404bf4 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _HELPER_H_ +#define _HELPER_H_ + +void pac_corruptor(void); + +#endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c new file mode 100644 index 000000000000..0293310ba70a --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include +#include +#include + +#include "../../kselftest_harness.h" +#include "helper.h" + +#define ASSERT_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* data key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ +} while (0) + +sigjmp_buf jmpbuf; +void pac_signal_handler(int signum, siginfo_t *si, void *uc) +{ + if (signum == SIGSEGV || signum == SIGILL) + siglongjmp(jmpbuf, 1); +} + +/* check that a corrupted PAC results in SIGSEGV or SIGILL */ +TEST(corrupt_pac) +{ + struct sigaction sa; + + ASSERT_PAUTH_ENABLED(); + if (sigsetjmp(jmpbuf, 1) == 0) { + sa.sa_sigaction = pac_signal_handler; + sa.sa_flags = SA_SIGINFO | SA_RESETHAND; + sigemptyset(&sa.sa_mask); + + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + + pac_corruptor(); + ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur"); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S new file mode 100644 index 000000000000..aa6588050752 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +.global pac_corruptor + +.text +/* + * Corrupting a single bit of the PAC ensures the authentication will fail. It + * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no + * top byte PAC is tested + */ + pac_corruptor: + paciasp + + /* corrupt the top bit of the PAC */ + eor lr, lr, #1 << 53 + + autiasp + ret -- cgit From 766d95b1ed93ebdd07ac87490e60e442342f5dc4 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:13 +0100 Subject: kselftests/arm64: add nop checks for PAuth tests PAuth adds sign/verify controls to enable and disable groups of instructions in hardware for compatibility with libraries that do not implement PAuth. The kernel always enables them if it detects PAuth. Add a test that checks that each group of instructions is enabled, if the kernel reports PAuth as detected. Note: For groups, for the purpose of this patch, we intend instructions that use a certain key. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-3-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/pauth/.gitignore | 1 + tools/testing/selftests/arm64/pauth/Makefile | 7 +++- tools/testing/selftests/arm64/pauth/helper.c | 39 ++++++++++++++++++++ tools/testing/selftests/arm64/pauth/helper.h | 9 +++++ tools/testing/selftests/arm64/pauth/pac.c | 51 ++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/arm64/pauth/helper.c diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore index b557c916720a..155137d92722 100644 --- a/tools/testing/selftests/arm64/pauth/.gitignore +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -1 +1,2 @@ +exec_target pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 01d35aaa610a..5c0dd129562f 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -12,7 +12,7 @@ pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/nu ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac -TEST_GEN_FILES := pac_corruptor.o +TEST_GEN_FILES := pac_corruptor.o helper.o endif include ../../lib.mk @@ -23,10 +23,13 @@ ifeq ($(pauth_cc_support),1) $(OUTPUT)/pac_corruptor.o: pac_corruptor.S $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a +$(OUTPUT)/helper.o: helper.c + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + # when -mbranch-protection is enabled and the target architecture is ARMv8.3 or # greater, gcc emits pac* instructions which are not in HINT NOP space, # preventing the tests from occurring at all. Compile for ARMv8.2 so tests can # run on earlier targets and print a meaningful error messages -$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a endif diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c new file mode 100644 index 000000000000..2c201e7d0d50 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include "helper.h" + +size_t keyia_sign(size_t ptr) +{ + asm volatile("paciza %0" : "+r" (ptr)); + return ptr; +} + +size_t keyib_sign(size_t ptr) +{ + asm volatile("pacizb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyda_sign(size_t ptr) +{ + asm volatile("pacdza %0" : "+r" (ptr)); + return ptr; +} + +size_t keydb_sign(size_t ptr) +{ + asm volatile("pacdzb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyg_sign(size_t ptr) +{ + /* output is encoded in the upper 32 bits */ + size_t dest = 0; + size_t modifier = 0; + + asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier)); + + return dest; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h index 3e0a2a404bf4..35c4f3357ae3 100644 --- a/tools/testing/selftests/arm64/pauth/helper.h +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -4,6 +4,15 @@ #ifndef _HELPER_H_ #define _HELPER_H_ +#include + void pac_corruptor(void); +/* PAuth sign a value with key ia and modifier value 0 */ +size_t keyia_sign(size_t val); +size_t keyib_sign(size_t val); +size_t keyda_sign(size_t val); +size_t keydb_sign(size_t val); +size_t keyg_sign(size_t val); + #endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index 0293310ba70a..bd3d4c0eca9d 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -8,12 +8,25 @@ #include "../../kselftest_harness.h" #include "helper.h" +#define PAC_COLLISION_ATTEMPTS 10 +/* + * The kernel sets TBID by default. So bits 55 and above should remain + * untouched no matter what. + * The VA space size is 48 bits. Bigger is opt-in. + */ +#define PAC_MASK (~0xff80ffffffffffff) #define ASSERT_PAUTH_ENABLED() \ do { \ unsigned long hwcaps = getauxval(AT_HWCAP); \ /* data key instructions are not in NOP space. This prevents a SIGILL */ \ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ } while (0) +#define ASSERT_GENERIC_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* generic key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ +} while (0) sigjmp_buf jmpbuf; void pac_signal_handler(int signum, siginfo_t *si, void *uc) @@ -41,4 +54,42 @@ TEST(corrupt_pac) } } +/* + * There are no separate pac* and aut* controls so checking only the pac* + * instructions is sufficient + */ +TEST(pac_instructions_not_nop) +{ + size_t keyia = 0; + size_t keyib = 0; + size_t keyda = 0; + size_t keydb = 0; + + ASSERT_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + keyia |= keyia_sign(i) & PAC_MASK; + keyib |= keyib_sign(i) & PAC_MASK; + keyda |= keyda_sign(i) & PAC_MASK; + keydb |= keydb_sign(i) & PAC_MASK; + } + + ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing"); + ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing"); + ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing"); + ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing"); +} + +TEST(pac_instructions_not_nop_generic) +{ + size_t keyg = 0; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) + keyg |= keyg_sign(i) & PAC_MASK; + + ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); +} + TEST_HARNESS_MAIN -- cgit From 806a15b2545e6b5949f14ea8401ce295bd38a018 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:14 +0100 Subject: kselftests/arm64: add PAuth test for whether exec() changes keys Kernel documentation states that it will change PAuth keys on exec() calls. Verify that all keys are correctly switched to new ones. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-4-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/pauth/Makefile | 4 + tools/testing/selftests/arm64/pauth/exec_target.c | 34 +++++ tools/testing/selftests/arm64/pauth/helper.h | 10 ++ tools/testing/selftests/arm64/pauth/pac.c | 150 ++++++++++++++++++++++ 4 files changed, 198 insertions(+) create mode 100644 tools/testing/selftests/arm64/pauth/exec_target.c diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 5c0dd129562f..72e290b0b10c 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -13,6 +13,7 @@ pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/nu ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac TEST_GEN_FILES := pac_corruptor.o helper.o +TEST_GEN_PROGS_EXTENDED := exec_target endif include ../../lib.mk @@ -30,6 +31,9 @@ $(OUTPUT)/helper.o: helper.c # greater, gcc emits pac* instructions which are not in HINT NOP space, # preventing the tests from occurring at all. Compile for ARMv8.2 so tests can # run on earlier targets and print a meaningful error messages +$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a + $(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a endif diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c new file mode 100644 index 000000000000..4435600ca400 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include +#include +#include + +#include "helper.h" + +int main(void) +{ + struct signatures signed_vals; + unsigned long hwcaps; + size_t val; + + fread(&val, sizeof(size_t), 1, stdin); + + /* don't try to execute illegal (unimplemented) instructions) caller + * should have checked this and keep worker simple + */ + hwcaps = getauxval(AT_HWCAP); + + if (hwcaps & HWCAP_PACA) { + signed_vals.keyia = keyia_sign(val); + signed_vals.keyib = keyib_sign(val); + signed_vals.keyda = keyda_sign(val); + signed_vals.keydb = keydb_sign(val); + } + signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0; + + fwrite(&signed_vals, sizeof(struct signatures), 1, stdout); + + return 0; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h index 35c4f3357ae3..652496c7b411 100644 --- a/tools/testing/selftests/arm64/pauth/helper.h +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -6,6 +6,16 @@ #include +#define NKEYS 5 + +struct signatures { + size_t keyia; + size_t keyib; + size_t keyda; + size_t keydb; + size_t keyg; +}; + void pac_corruptor(void); /* PAuth sign a value with key ia and modifier value 0 */ diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index bd3d4c0eca9d..b363ad6a0b50 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -2,6 +2,8 @@ // Copyright (C) 2020 ARM Limited #include +#include +#include #include #include @@ -28,6 +30,117 @@ do { \ ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ } while (0) +void sign_specific(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); +} + +void sign_all(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); + sign->keyg = keyg_sign(val); +} + +int n_same(struct signatures *old, struct signatures *new, int nkeys) +{ + int res = 0; + + res += old->keyia == new->keyia; + res += old->keyib == new->keyib; + res += old->keyda == new->keyda; + res += old->keydb == new->keydb; + if (nkeys == NKEYS) + res += old->keyg == new->keyg; + + return res; +} + +int exec_sign_all(struct signatures *signed_vals, size_t val) +{ + int new_stdin[2]; + int new_stdout[2]; + int status; + ssize_t ret; + pid_t pid; + + ret = pipe(new_stdin); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + ret = pipe(new_stdout); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + pid = fork(); + // child + if (pid == 0) { + dup2(new_stdin[0], STDIN_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + dup2(new_stdout[1], STDOUT_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + close(new_stdin[0]); + close(new_stdin[1]); + close(new_stdout[0]); + close(new_stdout[1]); + + ret = execl("exec_target", "exec_target", (char *)NULL); + if (ret == -1) { + perror("exec returned error"); + exit(1); + } + } + + close(new_stdin[0]); + close(new_stdout[1]); + + ret = write(new_stdin[1], &val, sizeof(size_t)); + if (ret == -1) { + perror("write returned error"); + return -1; + } + + /* + * wait for the worker to finish, so that read() reads all data + * will also context switch with worker so that this function can be used + * for context switch tests + */ + waitpid(pid, &status, 0); + if (WIFEXITED(status) == 0) { + fprintf(stderr, "worker exited unexpectedly\n"); + return -1; + } + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "worker exited with error\n"); + return -1; + } + + ret = read(new_stdout[0], signed_vals, sizeof(struct signatures)); + if (ret == -1) { + perror("read returned error"); + return -1; + } + + return 0; +} + sigjmp_buf jmpbuf; void pac_signal_handler(int signum, siginfo_t *si, void *uc) { @@ -92,4 +205,41 @@ TEST(pac_instructions_not_nop_generic) ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); } +/* + * fork() does not change keys. Only exec() does so call a worker program. + * Its only job is to sign a value and report back the resutls + */ +TEST(exec_changed_keys) +{ + struct signatures new_keys; + struct signatures old_keys; + int ret; + int same = 10; + int nkeys = NKEYS; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + ret = exec_sign_all(&new_keys, i); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + if (nkeys == NKEYS) + sign_all(&old_keys, i); + else + sign_specific(&old_keys, i); + + ret = n_same(&old_keys, &new_keys, nkeys); + if (ret < same) + same = ret; + } + + ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); +} + TEST_HARNESS_MAIN -- cgit From d21435e9670b11a02bcb1b5683dddd50da61966d Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 18 Sep 2020 11:47:15 +0100 Subject: kselftests/arm64: add PAuth tests for single threaded consistency and differently initialized keys PAuth adds 5 different keys that can be used to sign addresses. Add a test that verifies that the kernel initializes them to different values and preserves them across context switches. Signed-off-by: Boyan Karatotev Reviewed-by: Vincenzo Frascino Reviewed-by: Amit Daniel Kachhap Acked-by: Shuah Khan Cc: Shuah Khan Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20200918104715.182310-5-boian4o1@gmail.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/pauth/pac.c | 123 ++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index b363ad6a0b50..592fe538506e 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -1,11 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2020 ARM Limited +#define _GNU_SOURCE + #include #include #include #include #include +#include #include "../../kselftest_harness.h" #include "helper.h" @@ -17,6 +20,7 @@ * The VA space size is 48 bits. Bigger is opt-in. */ #define PAC_MASK (~0xff80ffffffffffff) +#define ARBITRARY_VALUE (0x1234) #define ASSERT_PAUTH_ENABLED() \ do { \ unsigned long hwcaps = getauxval(AT_HWCAP); \ @@ -61,13 +65,37 @@ int n_same(struct signatures *old, struct signatures *new, int nkeys) return res; } +int n_same_single_set(struct signatures *sign, int nkeys) +{ + size_t vals[nkeys]; + int same = 0; + + vals[0] = sign->keyia & PAC_MASK; + vals[1] = sign->keyib & PAC_MASK; + vals[2] = sign->keyda & PAC_MASK; + vals[3] = sign->keydb & PAC_MASK; + + if (nkeys >= 4) + vals[4] = sign->keyg & PAC_MASK; + + for (int i = 0; i < nkeys - 1; i++) { + for (int j = i + 1; j < nkeys; j++) { + if (vals[i] == vals[j]) + same += 1; + } + } + return same; +} + int exec_sign_all(struct signatures *signed_vals, size_t val) { int new_stdin[2]; int new_stdout[2]; int status; + int i; ssize_t ret; pid_t pid; + cpu_set_t mask; ret = pipe(new_stdin); if (ret == -1) { @@ -81,6 +109,20 @@ int exec_sign_all(struct signatures *signed_vals, size_t val) return -1; } + /* + * pin this process and all its children to a single CPU, so it can also + * guarantee a context switch with its child + */ + sched_getaffinity(0, sizeof(mask), &mask); + + for (i = 0; i < sizeof(cpu_set_t); i++) + if (CPU_ISSET(i, &mask)) + break; + + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); + pid = fork(); // child if (pid == 0) { @@ -205,6 +247,44 @@ TEST(pac_instructions_not_nop_generic) ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); } +TEST(single_thread_different_keys) +{ + int same = 10; + int nkeys = NKEYS; + int tmp; + struct signatures signed_vals; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + /* + * In Linux the PAC field can be up to 7 bits wide. Even if keys are + * different, there is about 5% chance for PACs to collide with + * different addresses. This chance rapidly increases with fewer bits + * allocated for the PAC (e.g. wider address). A comparison of the keys + * directly will be more reliable. + * All signed values need to be different at least once out of n + * attempts to be certain that the keys are different + */ + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + if (nkeys == NKEYS) + sign_all(&signed_vals, i); + else + sign_specific(&signed_vals, i); + + tmp = n_same_single_set(&signed_vals, nkeys); + if (tmp < same) + same = tmp; + } + + ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same); +} + /* * fork() does not change keys. Only exec() does so call a worker program. * Its only job is to sign a value and report back the resutls @@ -242,4 +322,47 @@ TEST(exec_changed_keys) ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); } +TEST(context_switch_keep_keys) +{ + int ret; + struct signatures trash; + struct signatures before; + struct signatures after; + + ASSERT_PAUTH_ENABLED(); + + sign_specific(&before, ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + sign_specific(&after, ARBITRARY_VALUE); + + ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching"); + ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching"); + ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching"); + ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching"); +} + +TEST(context_switch_keep_keys_generic) +{ + int ret; + struct signatures trash; + size_t before; + size_t after; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + before = keyg_sign(ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + after = keyg_sign(ARBITRARY_VALUE); + + ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching"); +} + TEST_HARNESS_MAIN -- cgit From ca765153eb90577e5fda281485048427b80a9a77 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:32 +0100 Subject: selftests: arm64: Test case for enumeration of SVE vector lengths Add a test case that verifies that we can enumerate the SVE vector lengths on systems where we detect SVE, and that those SVE vector lengths are valid. This program was written by Dave Martin and adapted to kselftest by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-2-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-probe-vls.c | 58 ++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/sve-probe-vls.c diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c new file mode 100644 index 000000000000..b29cbc642c57 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +int main(int argc, char **argv) +{ + unsigned int vq; + int vl; + static unsigned int vqs[SVE_VQ_MAX]; + unsigned int nvqs = 0; + + ksft_print_header(); + ksft_set_plan(2); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available"); + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (!sve_vl_valid(vl)) + ksft_exit_fail_msg("VL %d invalid\n", vl); + vq = sve_vq_from_vl(vl); + + if (!(nvqs < SVE_VQ_MAX)) + ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n", + nvqs); + vqs[nvqs++] = vq; + } + ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs); + ksft_test_result_pass("All vector lengths valid\n"); + + /* Print out the vector lengths in ascending order: */ + while (nvqs--) + ksft_print_msg("%u\n", 16 * vqs[nvqs]); + + ksft_exit_pass(); +} -- cgit From 0dca276ac4d20d4071b3d3095b1ad33269ea5272 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:33 +0100 Subject: selftests: arm64: Add test for the SVE ptrace interface Add a test case that does some basic verification of the SVE ptrace interface, forking off a child with known values in the registers and then using ptrace to inspect and manipulate the SVE registers of the child, including in FPSIMD mode to account for sharing between the SVE and FPSIMD registers. This program was written by Dave Martin and modified for kselftest by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-3-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace-asm.S | 33 +++ tools/testing/selftests/arm64/fp/sve-ptrace.c | 336 ++++++++++++++++++++++ 2 files changed, 369 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace-asm.S create mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace.c diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S new file mode 100644 index 000000000000..3e81f9fab574 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin +#include + +.arch_extension sve + +.globl sve_store_patterns + +sve_store_patterns: + mov x1, x0 + + index z0.b, #0, #1 + str q0, [x1] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x10] + + mov z1.d, z0.d + str q0, [x1, #0x20] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x30] + + mov z1.d, z0.d + str q0, [x1, #0x40] + + ret + +.size sve_store_patterns, . - sve_store_patterns +.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c new file mode 100644 index 000000000000..b2282be6f938 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +/* and don't like each other, so: */ +#ifndef NT_ARM_SVE +#define NT_ARM_SVE 0x405 +#endif + +/* Number of registers filled in by sve_store_patterns */ +#define NR_VREGS 5 + +void sve_store_patterns(__uint128_t v[NR_VREGS]); + +static void dump(const void *buf, size_t size) +{ + size_t i; + const unsigned char *p = buf; + + for (i = 0; i < size; ++i) + printf(" %.2x", *p++); +} + +static int check_vregs(const __uint128_t vregs[NR_VREGS]) +{ + int i; + int ok = 1; + + for (i = 0; i < NR_VREGS; ++i) { + printf("# v[%d]:", i); + dump(&vregs[i], sizeof vregs[i]); + putchar('\n'); + + if (vregs[i] != vregs[0]) + ok = 0; + } + + return ok; +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) +{ + struct user_sve_header *sve; + void *p; + size_t sz = sizeof *sve; + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) + goto error; + + sve = *buf; + if (sve->size <= sz) + break; + + sz = sve->size; + } + + return sve; + +error: + return NULL; +} + +static int set_sve(pid_t pid, const struct user_sve_header *sve) +{ + struct iovec iov; + + iov.iov_base = (void *)sve; + iov.iov_len = sve->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); +} + +static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, + unsigned int vlmax) +{ + unsigned int vq; + unsigned int i; + + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + ksft_exit_fail_msg("Dumping non-SVE register\n"); + + if (vlmax > sve->vl) + vlmax = sve->vl; + + vq = sve_vq_from_vl(sve->vl); + for (i = 0; i < num; ++i) { + printf("# z%u:", i); + dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), + vlmax); + printf("%s\n", vlmax == sve->vl ? "" : " ..."); + } +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + void *svebuf = NULL, *newsvebuf; + size_t svebufsz = 0, newsvebufsz; + struct user_sve_header *sve, *new_sve; + struct user_fpsimd_state *fpsimd; + unsigned int i, j; + unsigned char *p; + unsigned int vq; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", + status); + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + sve = get_sve(pid, &svebuf, &svebufsz); + if (!sve) { + int e = errno; + + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } else { + ksft_test_result_pass("get_sve\n"); + } + + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto error; + + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) + p[j] = j; + } + + if (set_sve(pid, sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + vq = sve_vq_from_vl(sve->vl); + + newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + new_sve = newsvebuf = malloc(newsvebufsz); + if (!new_sve) { + errno = ENOMEM; + perror(NULL); + goto error; + } + + *new_sve = *sve; + new_sve->flags &= ~SVE_PT_REGS_MASK; + new_sve->flags |= SVE_PT_REGS_SVE; + memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), + 0, SVE_PT_SVE_ZREG_SIZE(vq)); + new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + if (set_sve(pid, new_sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); + if (!new_sve) { + int e = errno; + + ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, + "SVE registers\n"); + if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + goto error; + + dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + + p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); + for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { + unsigned char expected = i; + + if (__BYTE_ORDER == __BIG_ENDIAN) + expected = sizeof fpsimd->vregs[0] - 1 - expected; + + ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + if (p[i] != expected) + goto error; + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + __uint128_t v[NR_VREGS]; + pid_t child; + + ksft_print_header(); + ksft_set_plan(20); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + sve_store_patterns(v); + + if (!check_vregs(v)) + ksft_exit_fail_msg("Initial check_vregs() failed\n"); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return 0; +} -- cgit From 5e992c638ea55d928e43d3c1aab1bd8e13835e6e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:34 +0100 Subject: selftests: arm64: Add stress tests for FPSMID and SVE context switching Add programs sve-test and fpsimd-test which spin reading and writing to the SVE and FPSIMD registers, verifying the operations they perform. The intended use is to leave them running to stress the context switch code's handling of these registers which isn't compatible with what kselftest does so they're not integrated into the framework but there's no other obvious testsuite where they fit so let's store them here. These tests were written by Dave Martin and lightly adapted by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-4-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/asm-offsets.h | 11 + tools/testing/selftests/arm64/fp/assembler.h | 57 +++ tools/testing/selftests/arm64/fp/fpsimd-test.S | 482 ++++++++++++++++++ tools/testing/selftests/arm64/fp/sve-test.S | 672 +++++++++++++++++++++++++ 4 files changed, 1222 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/asm-offsets.h create mode 100644 tools/testing/selftests/arm64/fp/assembler.h create mode 100644 tools/testing/selftests/arm64/fp/fpsimd-test.S create mode 100644 tools/testing/selftests/arm64/fp/sve-test.S diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h new file mode 100644 index 000000000000..a180851496ec --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -0,0 +1,11 @@ +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 +#define SIGUSR1 10 +#define SIGTERM 15 +#define SIGINT 2 +#define SIGABRT 6 +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h new file mode 100644 index 000000000000..8944f2189206 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +.macro __for from:req, to:req + .if (\from) == (\to) + _for__body %\from + .else + __for \from, %(\from) + ((\to) - (\from)) / 2 + __for %(\from) + ((\to) - (\from)) / 2 + 1, \to + .endif +.endm + +.macro _for var:req, from:req, to:req, insn:vararg + .macro _for__body \var:req + .noaltmacro + \insn + .altmacro + .endm + + .altmacro + __for \from, \to + .noaltmacro + + .purgem _for__body +.endm + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +.macro define_accessor name, num, insn + .macro \name\()_entry n + \insn \n, 1 + ret + .endm + +function \name + adr x2, .L__accessor_tbl\@ + add x2, x2, x0, lsl #3 + br x2 + +.L__accessor_tbl\@: + _for x, 0, (\num) - 1, \name\()_entry \x +endfunction + + .purgem \name\()_entry +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S new file mode 100644 index 000000000000..1c5556bdd11d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin +// +// Simple FPSIMD context switch test +// Repeatedly writes unique test patterns into each FPSIMD register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include +#include "assembler.h" +#include "asm-offsets.h" + +#define NVR 32 +#define MAXVL_B (128 / 8) + +.macro _vldr Vn:req, Xt:req + ld1 {v\Vn\().2d}, [x\Xt] +.endm + +.macro _vstr Vn:req, Xt:req + st1 {v\Vn\().2d}, [x\Xt] +.endm + +// Generate accessor functions to read/write programmatically selected +// FPSIMD registers. +// x0 is the register index to access +// x1 is the memory address to read from (getv,setp) or store to (setv,setp) +// All clobber x0-x2 +define_accessor setv, NVR, _vldr +define_accessor getv, NVR, _vstr + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +vref: + .space MAXVL_B * NVR +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for FPSIMD V-register V +.macro _adrv xd, xn, nrtmp + ldr \xd, =vref + mov x\nrtmp, #16 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a FPSIMD V-register +// x0: pid +// x1: register number +// x2: generation +function setup_vreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrv x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setv + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 1f + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne barf + subs x2, x2, #1 + b.ne 0b + +1: ret +endfunction + +// Verify that a FPSIMD V-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x5. +function check_vreg + mov x3, x30 + + _adrv x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getv + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random V-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #7 + movi v9.16b, #9 + movi v31.8b, #31 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + mov x19, #128 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + + mov x21, #0 // Set up V-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S new file mode 100644 index 000000000000..f95074c9b48b --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin +// +// Simple Scalable Vector Extension context switch test +// Repeatedly writes unique test patterns into each SVE register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include +#include "assembler.h" +#include "asm-offsets.h" + +#define NZR 32 +#define NPR 16 +#define MAXVL_B (2048 / 8) + +.arch_extension sve + +.macro _sve_ldr_v zt, xn + ldr z\zt, [x\xn] +.endm + +.macro _sve_str_v zt, xn + str z\zt, [x\xn] +.endm + +.macro _sve_ldr_p pt, xn + ldr p\pt, [x\xn] +.endm + +.macro _sve_str_p pt, xn + str p\pt, [x\xn] +.endm + +// Generate accessor functions to read/write programmatically selected +// SVE registers. +// x0 is the register index to access +// x1 is the memory address to read from (getz,setp) or store to (setz,setp) +// All clobber x0-x2 +define_accessor setz, NZR, _sve_ldr_v +define_accessor getz, NZR, _sve_str_v +define_accessor setp, NPR, _sve_ldr_p +define_accessor getp, NPR, _sve_str_p + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +zref: + .space MAXVL_B * NZR +pref: + .space MAXVL_B / 8 * NPR +ffrref: + .space MAXVL_B / 8 +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:16 register number +// bits 15: 0 pid + +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for SVE Z-register Z +.macro _adrz xd, xn, nrtmp + ldr \xd, =zref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Get the address of shadow data for SVE P-register P +.macro _adrp xd, xn, nrtmp + ldr \xd, =pref + rdvl x\nrtmp, #1 + lsr x\nrtmp, x\nrtmp, #3 + sub \xn, \xn, #NZR + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a SVE Z-register +// x0: pid +// x1: register number +// x2: generation +function setup_zreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrz x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setz + + ret x4 +endfunction + +// Set up test pattern in a SVE P-register +// x0: pid +// x1: register number +// x2: generation +function setup_preg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrp x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setp + + ret x4 +endfunction + +// Set up test pattern in the FFR +// x0: pid +// x2: generation +// Beware: corrupts P0. +function setup_ffr + mov x4, x30 + + bl pattern + ldr x0, =ffrref + ldr x1, =scratch + rdvl x2, #1 + lsr x2, x2, #3 + bl memcpy + + mov x0, #0 + ldr x1, =ffrref + bl setp + + wrffr p0.b + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a SVE Z-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_zreg + mov x3, x30 + + _adrz x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getz + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that a SVE P-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_preg + mov x3, x30 + + _adrp x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getp + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that the FFR matches its shadow in memory, else abort +// Beware -- corrupts P0. +// Clobbers x0-x5. +function check_ffr + mov x3, x30 + + ldr x4, =scratch + rdvl x5, #1 + lsr x5, x5, #3 + + mov x0, x4 + mov x1, x5 + bl memfill_ae + + rdffr p0.b + mov x0, #0 + mov x1, x4 + bl getp + + ldr x0, =ffrref + mov x1, x4 + mov x2, x5 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random Z-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 + // And P0 + rdffr p0.b + // And FFR + wrffr p15.b + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + mov x21, #0 // Set up Z-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + + mov x0, x20 // Set up FFR & shadow with test pattern + mov x1, #NZR + NPR + and x2, x22, #0xf + bl setup_ffr + +0: mov x0, x20 // Set up P-regs & shadow with test pattern + mov x1, x21 + and x2, x22, #0xf + bl setup_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: +// mov x8, #__NR_sched_yield // Encourage preemption +// svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + +0: mov x0, x21 + bl check_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + + bl check_ffr + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction -- cgit From fc7e611f9f38de7166c5f8951df93f7351542448 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:35 +0100 Subject: selftests: arm64: Add utility to set SVE vector lengths vlset is a small utility for use in conjunction with tests like the sve-test stress test which allows another executable to be invoked with a configured SVE vector length. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-5-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vlset.c | 155 +++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/vlset.c diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c new file mode 100644 index 000000000000..308d27a68226 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2019 ARM Limited. + * Original author: Dave Martin + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int inherit = 0; +static int no_inherit = 0; +static int force = 0; +static unsigned long vl; + +static const struct option options[] = { + { "force", no_argument, NULL, 'f' }, + { "inherit", no_argument, NULL, 'i' }, + { "max", no_argument, NULL, 'M' }, + { "no-inherit", no_argument, &no_inherit, 1 }, + { "help", no_argument, NULL, '?' }, + {} +}; + +static char const *program_name; + +static int parse_options(int argc, char **argv) +{ + int c; + char *rest; + + program_name = strrchr(argv[0], '/'); + if (program_name) + ++program_name; + else + program_name = argv[0]; + + while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1) + switch (c) { + case 'M': vl = SVE_VL_MAX; break; + case 'f': force = 1; break; + case 'i': inherit = 1; break; + case 0: break; + default: goto error; + } + + if (inherit && no_inherit) + goto error; + + if (!vl) { + /* vector length */ + if (optind >= argc) + goto error; + + errno = 0; + vl = strtoul(argv[optind], &rest, 0); + if (*rest) { + vl = ULONG_MAX; + errno = EINVAL; + } + if (vl == ULONG_MAX && errno) { + fprintf(stderr, "%s: %s: %s\n", + program_name, argv[optind], strerror(errno)); + goto error; + } + + ++optind; + } + + /* command */ + if (optind >= argc) + goto error; + + return 0; + +error: + fprintf(stderr, + "Usage: %s [-f | --force] " + "[-i | --inherit | --no-inherit] " + "{-M | --max | } " + " [ ...]\n", + program_name); + return -1; +} + +int main(int argc, char **argv) +{ + int ret = 126; /* same as sh(1) command-not-executable error */ + long flags; + char *path; + int t, e; + + if (parse_options(argc, argv)) + return 2; /* same as sh(1) builtin incorrect-usage */ + + if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) { + fprintf(stderr, "%s: Invalid vector length %lu\n", + program_name, vl); + return 2; /* same as sh(1) builtin incorrect-usage */ + } + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + fprintf(stderr, "%s: Scalable Vector Extension not present\n", + program_name); + + if (!force) + goto error; + + fputs("Going ahead anyway (--force): " + "This is a debug option. Don't rely on it.\n", + stderr); + } + + flags = PR_SVE_SET_VL_ONEXEC; + if (inherit) + flags |= PR_SVE_VL_INHERIT; + + t = prctl(PR_SVE_SET_VL, vl | flags); + if (t < 0) { + fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + + t = prctl(PR_SVE_GET_VL); + if (t == -1) { + fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + flags = PR_SVE_VL_LEN_MASK; + flags = t & ~flags; + + assert(optind < argc); + path = argv[optind]; + + execvp(path, &argv[optind]); + e = errno; + if (errno == ENOENT) + ret = 127; /* same as sh(1) not-found error */ + fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e)); + +error: + return ret; /* same as sh(1) not-executable error */ +} -- cgit From 25f47e3eb66e6ee31b3ed3f8c0b7bdce098106fe Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:36 +0100 Subject: selftests: arm64: Add wrapper scripts for stress tests Add wrapper scripts which invoke fpsimd-test and sve-test with several copies per CPU such that the context switch code will be appropriately exercised. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-6-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/fpsimd-stress | 60 ++++++++++++++++++++++++++ tools/testing/selftests/arm64/fp/sve-stress | 59 +++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100755 tools/testing/selftests/arm64/fp/fpsimd-stress create mode 100755 tools/testing/selftests/arm64/fp/sve-stress diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress new file mode 100755 index 000000000000..781b5b022eaf --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-stress @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT +trap child_died CHLD + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./fpsimd-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress new file mode 100755 index 000000000000..24dd0922cc02 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./sve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 -- cgit From e093256d14fbf9e8aaf02ed0ac69087eef6792bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Aug 2020 12:48:37 +0100 Subject: selftests: arm64: Add build and documentation for FP tests Integrate the FP tests with the build system and add some documentation for the ones run outside the kselftest infrastructure. The content in the README was largely written by Dave Martin with edits by me. Signed-off-by: Mark Brown Acked-by: Dave Martin Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20200819114837.51466-7-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/fp/.gitignore | 5 ++ tools/testing/selftests/arm64/fp/Makefile | 17 +++++ tools/testing/selftests/arm64/fp/README | 100 ++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/fp/.gitignore create mode 100644 tools/testing/selftests/arm64/fp/Makefile create mode 100644 tools/testing/selftests/arm64/fp/README diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 525506fd97b9..463d56278f8f 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth +ARM64_SUBTARGETS ?= tags signal pauth fp else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore new file mode 100644 index 000000000000..d66f76d2a650 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -0,0 +1,5 @@ +fpsimd-test +sve-probe-vls +sve-ptrace +sve-test +vlset diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile new file mode 100644 index 000000000000..a57009d3a0dc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := sve-ptrace sve-probe-vls +TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset + +all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) + +fpsimd-test: fpsimd-test.o + $(CC) -nostdlib $^ -o $@ +sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-probe-vls: sve-probe-vls.o +sve-test: sve-test.o + $(CC) -nostdlib $^ -o $@ +vlset: vlset.o + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README new file mode 100644 index 000000000000..03e3dad865d8 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/README @@ -0,0 +1,100 @@ +This directory contains a mix of tests integrated with kselftest and +standalone stress tests. + +kselftest tests +=============== + +sve-probe-vls - Checks the SVE vector length enumeration interface +sve-ptrace - Checks the SVE ptrace interface + +Running the non-kselftest tests +=============================== + +sve-stress performs an SVE context switch stress test, as described +below. + +(The fpsimd-stress test works the same way; just substitute "fpsimd" for +"sve" in the following commands.) + + +The test runs until killed by the user. + +If no context switch error was detected, you will see output such as +the following: + +$ ./sve-stress +(wait for some time) +^C +Vector length: 512 bits +PID: 1573 +Terminated by signal 15, no error, iterations=9467, signals=1014 +Vector length: 512 bits +PID: 1575 +Terminated by signal 15, no error, iterations=9448, signals=1028 +Vector length: 512 bits +PID: 1577 +Terminated by signal 15, no error, iterations=9436, signals=1039 +Vector length: 512 bits +PID: 1579 +Terminated by signal 15, no error, iterations=9421, signals=1039 +Vector length: 512 bits +PID: 1581 +Terminated by signal 15, no error, iterations=9403, signals=1039 +Vector length: 512 bits +PID: 1583 +Terminated by signal 15, no error, iterations=9385, signals=1036 +Vector length: 512 bits +PID: 1585 +Terminated by signal 15, no error, iterations=9376, signals=1039 +Vector length: 512 bits +PID: 1587 +Terminated by signal 15, no error, iterations=9361, signals=1039 +Vector length: 512 bits +PID: 1589 +Terminated by signal 15, no error, iterations=9350, signals=1039 + + +If an error was detected, details of the mismatch will be printed +instead of "no error". + +Ideally, the test should be allowed to run for many minutes or hours +to maximise test coverage. + + +KVM stress testing +================== + +To try to reproduce the bugs that we have been observing, sve-stress +should be run in parallel in two KVM guests, while simultaneously +running on the host. + +1) Start 2 guests, using the following command for each: + +$ lkvm run --console=virtio -pconsole=hvc0 --sve Image + +(Depending on the hardware GIC implementation, you may also need +--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I +can't remember whether my branch is new enough for that. Try without +the option first.) + +Kvmtool occupies the terminal until you kill it (Ctrl+A x), +or until the guest terminates. It is therefore recommended to run +each instance in separate terminal (use screen or ssh etc.) This +allows multiple guests to be run in parallel while running other +commands on the host. + +Within the guest, the host filesystem is accessible, mounted on /host. + +2) Run the sve-stress on *each* guest with the Vector-Length set to 32: +guest$ ./vlset --inherit 32 ./sve-stress + +3) Run the sve-stress on the host with the maximum Vector-Length: +host$ ./vlset --inherit --max ./sve-stress + + +Again, the test should be allowed to run for many minutes or hours to +maximise test coverage. + +If no error is detected, you will see output from each sve-stress +instance similar to that illustrated above; otherwise details of the +observed mismatches will be printed. -- cgit From 264c03a245de7c5b1cc3836db45de6b991f877ca Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Sep 2020 16:34:07 +0100 Subject: stacktrace: Remove reliable argument from arch_stack_walk() callback Currently the callback passed to arch_stack_walk() has an argument called reliable passed to it to indicate if the stack entry is reliable, a comment says that this is used by some printk() consumers. However in the current kernel none of the arch_stack_walk() implementations ever set this flag to true and the only callback implementation we have is in the generic stacktrace code which ignores the flag. It therefore appears that this flag is redundant so we can simplify and clarify things by removing it. Signed-off-by: Mark Brown Reviewed-by: Miroslav Benes Link: https://lore.kernel.org/r/20200914153409.25097-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/s390/kernel/stacktrace.c | 4 ++-- arch/x86/kernel/stacktrace.c | 10 +++++----- include/linux/stacktrace.h | 5 +---- kernel/stacktrace.c | 8 +++----- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index fc5419ac64c8..7f1266c24f6b 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -19,7 +19,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, unwind_for_each_frame(&state, task, regs, 0) { addr = unwind_get_return_address(&state); - if (!addr || !consume_entry(cookie, addr, false)) + if (!addr || !consume_entry(cookie, addr)) break; } } @@ -56,7 +56,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, return -EINVAL; #endif - if (!consume_entry(cookie, addr, false)) + if (!consume_entry(cookie, addr)) return -EINVAL; } diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 2fd698e28e4d..8627fda8d993 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -18,13 +18,13 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct unwind_state state; unsigned long addr; - if (regs && !consume_entry(cookie, regs->ip, false)) + if (regs && !consume_entry(cookie, regs->ip)) return; for (unwind_start(&state, task, regs, NULL); !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); - if (!addr || !consume_entry(cookie, addr, false)) + if (!addr || !consume_entry(cookie, addr)) break; } } @@ -72,7 +72,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (!addr) return -EINVAL; - if (!consume_entry(cookie, addr, false)) + if (!consume_entry(cookie, addr)) return -EINVAL; } @@ -114,7 +114,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, { const void __user *fp = (const void __user *)regs->bp; - if (!consume_entry(cookie, regs->ip, false)) + if (!consume_entry(cookie, regs->ip)) return; while (1) { @@ -128,7 +128,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, break; if (!frame.ret_addr) break; - if (!consume_entry(cookie, frame.ret_addr, false)) + if (!consume_entry(cookie, frame.ret_addr)) break; fp = frame.next_fp; } diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b7af8cc13eda..50e2df30b0aa 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -29,14 +29,11 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); * stack_trace_consume_fn - Callback for arch_stack_walk() * @cookie: Caller supplied pointer handed back by arch_stack_walk() * @addr: The stack entry address to consume - * @reliable: True when the stack entry is reliable. Required by - * some printk based consumers. * * Return: True, if the entry was consumed or skipped * False, if there is no space left to store */ -typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr, - bool reliable); +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr); /** * arch_stack_walk - Architecture specific function to walk the stack * @consume_entry: Callback which is invoked by the architecture code for diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 946f44a9e86a..9f8117c7cfdd 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -78,8 +78,7 @@ struct stacktrace_cookie { unsigned int len; }; -static bool stack_trace_consume_entry(void *cookie, unsigned long addr, - bool reliable) +static bool stack_trace_consume_entry(void *cookie, unsigned long addr) { struct stacktrace_cookie *c = cookie; @@ -94,12 +93,11 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr, return c->len < c->size; } -static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr, - bool reliable) +static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr) { if (in_sched_functions(addr)) return true; - return stack_trace_consume_entry(cookie, addr, reliable); + return stack_trace_consume_entry(cookie, addr); } /** -- cgit From baa2cd417053cb674deb90ee66b88afea0517335 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Sep 2020 16:34:08 +0100 Subject: arm64: stacktrace: Make stack walk callback consistent with generic code As with the generic arch_stack_walk() code the arm64 stack walk code takes a callback that is called per stack frame. Currently the arm64 code always passes a struct stackframe to the callback and the generic code just passes the pc, however none of the users ever reference anything in the struct other than the pc value. The arm64 code also uses a return type of int while the generic code uses a return type of bool though in both cases the return value is a boolean value and the sense is inverted between the two. In order to reduce code duplication when arm64 is converted to use arch_stack_walk() change the signature and return sense of the arm64 specific callback to match that of the generic code. Signed-off-by: Mark Brown Reviewed-by: Miroslav Benes Link: https://lore.kernel.org/r/20200914153409.25097-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 2 +- arch/arm64/kernel/perf_callchain.c | 6 +++--- arch/arm64/kernel/return_address.c | 8 ++++---- arch/arm64/kernel/stacktrace.c | 11 +++++------ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index fc7613023c19..eb29b1fe8255 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -63,7 +63,7 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data); + bool (*fn)(void *, unsigned long), void *data); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index b0e03e052dd1..88ff471b0bce 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -137,11 +137,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, * whist unwinding the stackframe and is like a subroutine return so we use * the PC. */ -static int callchain_trace(struct stackframe *frame, void *data) +static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, frame->pc); - return 0; + perf_callchain_store(entry, pc); + return true; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index a5e8b3b9d798..a6d18755652f 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -18,16 +18,16 @@ struct return_address_data { void *addr; }; -static int save_return_addr(struct stackframe *frame, void *d) +static bool save_return_addr(void *d, unsigned long pc) { struct return_address_data *data = d; if (!data->level) { - data->addr = (void *)frame->pc; - return 1; + data->addr = (void *)pc; + return false; } else { --data->level; - return 0; + return true; } } NOKPROBE_SYMBOL(save_return_addr); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 2dd8e3b8b94b..05eaba21fd46 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -118,12 +118,12 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) NOKPROBE_SYMBOL(unwind_frame); void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) + bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; - if (fn(frame, data)) + if (!fn(data, frame->pc)) break; ret = unwind_frame(tsk, frame); if (ret < 0) @@ -139,17 +139,16 @@ struct stack_trace_data { unsigned int skip; }; -static int save_trace(struct stackframe *frame, void *d) +static bool save_trace(void *d, unsigned long addr) { struct stack_trace_data *data = d; struct stack_trace *trace = data->trace; - unsigned long addr = frame->pc; if (data->no_sched_functions && in_sched_functions(addr)) - return 0; + return false; if (data->skip) { data->skip--; - return 0; + return false; } trace->entries[trace->nr_entries++] = addr; -- cgit From 5fc57df2f6fdc18b9f9273f98318ff7919968c2c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Sep 2020 16:34:09 +0100 Subject: arm64: stacktrace: Convert to ARCH_STACKWALK Historically architectures have had duplicated code in their stack trace implementations for filtering what gets traced. In order to avoid this duplication some generic code has been provided using a new interface arch_stack_walk(), enabled by selecting ARCH_STACKWALK in Kconfig, which factors all this out into the generic stack trace code. Convert arm64 to use this common infrastructure. Signed-off-by: Mark Brown Reviewed-by: Miroslav Benes Link: https://lore.kernel.org/r/20200914153409.25097-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/stacktrace.c | 79 ++++++------------------------------------ 2 files changed, 11 insertions(+), 69 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..d1ba52e4b976 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -29,6 +29,7 @@ config ARM64 select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY + select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 05eaba21fd46..804d076b02cb 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -133,82 +133,23 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, NOKPROBE_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE -struct stack_trace_data { - struct stack_trace *trace; - unsigned int no_sched_functions; - unsigned int skip; -}; -static bool save_trace(void *d, unsigned long addr) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - struct stack_trace_data *data = d; - struct stack_trace *trace = data->trace; - - if (data->no_sched_functions && in_sched_functions(addr)) - return false; - if (data->skip) { - data->skip--; - return false; - } - - trace->entries[trace->nr_entries++] = addr; - - return trace->nr_entries >= trace->max_entries; -} - -void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - struct stack_trace_data data; - struct stackframe frame; - - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = 0; - - start_backtrace(&frame, regs->regs[29], regs->pc); - walk_stackframe(current, &frame, save_trace, &data); -} -EXPORT_SYMBOL_GPL(save_stack_trace_regs); - -static noinline void __save_stack_trace(struct task_struct *tsk, - struct stack_trace *trace, unsigned int nosched) -{ - struct stack_trace_data data; struct stackframe frame; - if (!try_get_task_stack(tsk)) - return; - - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = nosched; - - if (tsk != current) { - start_backtrace(&frame, thread_saved_fp(tsk), - thread_saved_pc(tsk)); - } else { - /* We don't want this function nor the caller */ - data.skip += 2; + if (regs) + start_backtrace(&frame, regs->regs[29], regs->pc); + else if (task == current) start_backtrace(&frame, (unsigned long)__builtin_frame_address(0), - (unsigned long)__save_stack_trace); - } - - walk_stackframe(tsk, &frame, save_trace, &data); + (unsigned long)arch_stack_walk); + else + start_backtrace(&frame, thread_saved_fp(task), + thread_saved_pc(task)); - put_task_stack(tsk); -} - -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - __save_stack_trace(tsk, trace, 1); -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -void save_stack_trace(struct stack_trace *trace) -{ - __save_stack_trace(current, trace, 0); + walk_stackframe(task, &frame, consume_entry, cookie); } -EXPORT_SYMBOL_GPL(save_stack_trace); #endif -- cgit From c6b90d5cf637ac1186ca25ec1f9e410455a0ca7d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 15 Sep 2020 16:19:59 +0800 Subject: arm64/fpsimd: Fix missing-prototypes in fpsimd.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warnings. arch/arm64/kernel/fpsimd.c:935:6: warning: no previous prototype for ‘do_sve_acc’ [-Wmissing-prototypes] arch/arm64/kernel/fpsimd.c:962:6: warning: no previous prototype for ‘do_fpsimd_acc’ [-Wmissing-prototypes] arch/arm64/kernel/fpsimd.c:971:6: warning: no previous prototype for ‘do_fpsimd_exc’ [-Wmissing-prototypes] arch/arm64/kernel/fpsimd.c:1266:6: warning: no previous prototype for ‘kernel_neon_begin’ [-Wmissing-prototypes] arch/arm64/kernel/fpsimd.c:1292:6: warning: no previous prototype for ‘kernel_neon_end’ [-Wmissing-prototypes] Signed-off-by: Tian Tao Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/1600157999-14802-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 55c8f3ec6705..0a637e373226 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -32,9 +32,11 @@ #include #include +#include #include #include #include +#include #include #include #include -- cgit From 152d75d66428afc636ec34bcef8c90a4aa2a7848 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 16 Sep 2020 10:20:47 +0800 Subject: arm64: mm: Fix missing-prototypes in pageattr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warnings. ‘set_memory_valid’ [-Wmissing-prototypes] int set_memory_valid(unsigned long addr, int numpages, int enable) ^ ‘set_direct_map_invalid_noflush’ [-Wmissing-prototypes] int set_direct_map_invalid_noflush(struct page *page) ^ ‘set_direct_map_default_noflush’ [-Wmissing-prototypes] int set_direct_map_default_noflush(struct page *page) ^ Signed-off-by: Tian Tao Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/1600222847-56792-1-git-send-email-tiantao6@hisilicon.com arch/arm64/mm/pageattr.c:138:5: warning: no previous prototype for arch/arm64/mm/pageattr.c:150:5: warning: no previous prototype for arch/arm64/mm/pageattr.c:165:5: warning: no previous prototype for Signed-off-by: Will Deacon --- arch/arm64/mm/pageattr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 23f648c2a199..1b94f5b82654 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -8,6 +8,7 @@ #include #include +#include #include #include -- cgit From a76b8236edcf5b785d044b930f9e14ad02b4a484 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 15 Sep 2020 16:41:09 -0400 Subject: drivers/perf: xgene_pmu: Fix uninitialized resource struct This splat was reported on newer Fedora kernels booting on certain X-gene based machines: xgene-pmu APMC0D83:00: X-Gene PMU version 3 Unable to handle kernel read from unreadable memory at virtual \ address 0000000000004006 ... Call trace: string+0x50/0x100 vsnprintf+0x160/0x750 devm_kvasprintf+0x5c/0xb4 devm_kasprintf+0x54/0x60 __devm_ioremap_resource+0xdc/0x1a0 devm_ioremap_resource+0x14/0x20 acpi_get_pmu_hw_inf.isra.0+0x84/0x15c acpi_pmu_dev_add+0xbc/0x21c acpi_ns_walk_namespace+0x16c/0x1e4 acpi_walk_namespace+0xb4/0xfc xgene_pmu_probe_pmu_dev+0x7c/0xe0 xgene_pmu_probe.part.0+0x2c0/0x310 xgene_pmu_probe+0x54/0x64 platform_drv_probe+0x60/0xb4 really_probe+0xe8/0x4a0 driver_probe_device+0xe4/0x100 device_driver_attach+0xcc/0xd4 __driver_attach+0xb0/0x17c bus_for_each_dev+0x6c/0xb0 driver_attach+0x30/0x40 bus_add_driver+0x154/0x250 driver_register+0x84/0x140 __platform_driver_register+0x54/0x60 xgene_pmu_driver_init+0x28/0x34 do_one_initcall+0x40/0x204 do_initcalls+0x104/0x144 kernel_init_freeable+0x198/0x210 kernel_init+0x20/0x12c ret_from_fork+0x10/0x18 Code: 91000400 110004e1 eb08009f 540000c0 (38646846) ---[ end trace f08c10566496a703 ]--- This is due to use of an uninitialized local resource struct in the xgene pmu driver. The thunderx2_pmu driver avoids this by using the resource list constructed by acpi_dev_get_resources() rather than using a callback from that function. The callback in the xgene driver didn't fully initialize the resource. So get rid of the callback and search the resource list as done by thunderx2. Fixes: 832c927d119b ("perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver") Signed-off-by: Mark Salter Link: https://lore.kernel.org/r/20200915204110.326138-1-msalter@redhat.com Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index edac28cd25dd..633cf07ba672 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1453,17 +1453,6 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) } #if defined(CONFIG_ACPI) -static int acpi_pmu_dev_add_resource(struct acpi_resource *ares, void *data) -{ - struct resource *res = data; - - if (ares->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) - acpi_dev_resource_memory(ares, res); - - /* Always tell the ACPI core to skip this resource */ - return 1; -} - static struct xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, struct acpi_device *adev, u32 type) @@ -1475,6 +1464,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, struct hw_pmu_info *inf; void __iomem *dev_csr; struct resource res; + struct resource_entry *rentry; int enable_bit; int rc; @@ -1483,11 +1473,23 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, return NULL; INIT_LIST_HEAD(&resource_list); - rc = acpi_dev_get_resources(adev, &resource_list, - acpi_pmu_dev_add_resource, &res); + rc = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); + if (rc <= 0) { + dev_err(dev, "PMU type %d: No resources found\n", type); + return NULL; + } + + list_for_each_entry(rentry, &resource_list, node) { + if (resource_type(rentry->res) == IORESOURCE_MEM) { + res = *rentry->res; + rentry = NULL; + break; + } + } acpi_dev_free_resource_list(&resource_list); - if (rc < 0) { - dev_err(dev, "PMU type %d: No resource address found\n", type); + + if (rentry) { + dev_err(dev, "PMU type %d: No memory resource found\n", type); return NULL; } -- cgit From 688494a407d1419a6b158c644b262c61cde39f48 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 15 Sep 2020 16:41:10 -0400 Subject: drivers/perf: thunderx2_pmu: Fix memory resource error handling In tx2_uncore_pmu_init_dev(), a call to acpi_dev_get_resources() is used to create a list _CRS resources which is searched for the device base address. There is an error check following this: if (!rentry->res) return NULL In no case, will rentry->res be NULL, so the test is useless. Even if the test worked, it comes before the resource list memory is freed. None of this really matters as long as the ACPI table has the memory resource. Let's clean it up so that it makes sense and will give a meaningful error should firmware leave out the memory resource. Fixes: 69c32972d593 ("drivers/perf: Add Cavium ThunderX2 SoC UNCORE PMU driver") Signed-off-by: Mark Salter Link: https://lore.kernel.org/r/20200915204110.326138-2-msalter@redhat.com Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index aac9823b0c6b..e116815fa809 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -805,14 +805,17 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, list_for_each_entry(rentry, &list, node) { if (resource_type(rentry->res) == IORESOURCE_MEM) { res = *rentry->res; + rentry = NULL; break; } } + acpi_dev_free_resource_list(&list); - if (!rentry->res) + if (rentry) { + dev_err(dev, "PMU type %d: Fail to find resource\n", type); return NULL; + } - acpi_dev_free_resource_list(&list); base = devm_ioremap_resource(dev, &res); if (IS_ERR(base)) { dev_err(dev, "PMU type %d: Fail to map resource\n", type); -- cgit From 0fdb64c2a303e4d2562245501920241c0e507951 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2020 15:48:09 +0100 Subject: arm64: Improve diagnostics when trapping BRK with FAULT_BRK_IMM When generating instructions at runtime, for example due to kernel text patching or the BPF JIT, we can emit a trapping BRK instruction if we are asked to encode an invalid instruction such as an out-of-range] branch. This is indicative of a bug in the caller, and will result in a crash on executing the generated code. Unfortunately, the message from the crash is really unhelpful, and mumbles something about ptrace: | Unexpected kernel BRK exception at EL1 | Internal error: ptrace BRK handler: f2000100 [#1] SMP We can do better than this. Install a break handler for FAULT_BRK_IMM, which is the immediate used to encode the "I've been asked to generate an invalid instruction" error, and triage the faulting PC to determine whether or not the failure occurred in the BPF JIT. Link: https://lore.kernel.org/r/20200915141707.GB26439@willie-the-truck Reported-by: Ilias Apalodimas Signed-off-by: Will Deacon --- arch/arm64/include/asm/extable.h | 9 +++++++++ arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/traps.c | 17 +++++++++++++++++ arch/arm64/mm/extable.c | 4 +--- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 840a35ed92ec..b15eb4a3e6b2 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,6 +22,15 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE +static inline bool in_bpf_jit(struct pt_regs *regs) +{ + if (!IS_ENABLED(CONFIG_BPF_JIT)) + return false; + + return regs->pc >= BPF_JIT_REGION_START && + regs->pc < BPF_JIT_REGION_END; +} + #ifdef CONFIG_BPF_JIT int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 7310a4f7f993..fa76151de6ff 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -384,7 +384,7 @@ void __init debug_traps_init(void) hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); + TRAP_BRKPT, "BRK handler"); } /* Re-enable single step for syscall restarting. */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..00e92c9f338c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -994,6 +995,21 @@ static struct break_hook bug_break_hook = { .imm = BUG_BRK_IMM, }; +static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) +{ + pr_err("%s generated an invalid instruction at %pS!\n", + in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", + (void *)instruction_pointer(regs)); + + /* We cannot handle this */ + return DBG_HOOK_ERROR; +} + +static struct break_hook fault_break_hook = { + .fn = reserved_fault_handler, + .imm = FAULT_BRK_IMM, +}; + #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_ESR_RECOVER 0x20 @@ -1059,6 +1075,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr, void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); + register_kernel_break_hook(&fault_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); #endif diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index eee1732ab6cd..aa0060178343 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -14,9 +14,7 @@ int fixup_exception(struct pt_regs *regs) if (!fixup) return 0; - if (IS_ENABLED(CONFIG_BPF_JIT) && - regs->pc >= BPF_JIT_REGION_START && - regs->pc < BPF_JIT_REGION_END) + if (in_bpf_jit(regs)) return arm64_bpf_fixup_exception(fixup, regs); regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; -- cgit From f186a84d8abecc7dfbd5bbd0a8ad0358078e4767 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:48 +0100 Subject: arm64/fpsimd: Update documentation of do_sve_acc fpsimd_restore_current_state() enables and disables the SVE access trap based on TIF_SVE, not task_fpsimd_load(). Update the documentation of do_sve_acc to reflect this behavior. Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 55c8f3ec6705..1c6a82083d5c 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -928,7 +928,7 @@ void fpsimd_release_task(struct task_struct *dead_task) * the SVE access trap will be disabled the next time this task * reaches ret_to_user. * - * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load() + * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SVE access trap for userspace during * ret_to_user, making an SVE access trap impossible in that case. */ -- cgit From 68a4c52e55e024ad946779f20d50ab31860eb0cc Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:49 +0100 Subject: arm64/signal: Update the comment in preserve_sve_context The SVE state is saved by fpsimd_signal_preserve_current_state() and not preserve_fpsimd_context(). Update the comment in preserve_sve_context to reflect the current behavior. Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 3b4f31f35e45..f3af68dc1cf8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -244,7 +244,8 @@ static int preserve_sve_context(struct sve_context __user *ctx) if (vq) { /* * This assumes that the SVE state has already been saved to - * the task struct by calling preserve_fpsimd_context(). + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). */ err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET, current->thread.sve_state, -- cgit From 315cf047d230a363515bedce177cfbf460cbb2d6 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:51 +0100 Subject: arm64/fpsimdmacros: Introduce a macro to update ZCR_EL1.LEN A follow-up patch will need to update ZCR_EL1.LEN. Add a macro that could be re-used in the current and new places to avoid code duplication. Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimdmacros.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 636e9d9c7929..60e29a3e41c5 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -183,6 +183,17 @@ .purgem _for__body .endm +/* Update ZCR_EL1.LEN with the new VQ */ +.macro sve_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_ZCR_EL1 + bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising +921: +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 @@ -197,13 +208,7 @@ .endm .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - mrs_s x\nxtmp, SYS_ZCR_EL1 - bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK - orr \xtmp2, \xtmp2, \xvqminus1 - cmp \xtmp2, x\nxtmp - b.eq 921f - msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising -921: + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 -- cgit From 6d40f05fad0babfb3e991edb2d0bd28a30a2f10c Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:50 +0100 Subject: arm64/fpsimdmacros: Allow the macro "for" to be used in more cases The current version of the macro "for" is not able to work when the counter is used to generate registers using mnemonics. This is because gas is not able to evaluate the expression generated if used in register's name (i.e x\n). Gas offers a way to evaluate macro arguments by using % in front of them under the alternate macro mode. The implementation of "for" is updated to use the alternate macro mode and %, so we can use the macro in more cases. As the alternate macro mode may have side-effects, this is disabled when expanding the body. While it is enough to prefix the argument of the macro "__for_body" with %, the arguments of "__for" are also prefixed to get a more bearable value in case of compilation error. Suggested-by: Dave Martin Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimdmacros.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 60e29a3e41c5..feef5b371fba 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -166,19 +166,23 @@ .macro __for from:req, to:req .if (\from) == (\to) - _for__body \from + _for__body %\from .else - __for \from, (\from) + ((\to) - (\from)) / 2 - __for (\from) + ((\to) - (\from)) / 2 + 1, \to + __for %\from, %((\from) + ((\to) - (\from)) / 2) + __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to .endif .endm .macro _for var:req, from:req, to:req, insn:vararg .macro _for__body \var:req + .noaltmacro \insn + .altmacro .endm + .altmacro __for \from, \to + .noaltmacro .purgem _for__body .endm -- cgit From 1e530f1352a2709d7bcacdb8b6d42c8900ba2c80 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:52 +0100 Subject: arm64/sve: Implement a helper to flush SVE registers Introduce a new helper that will zero all SVE registers but the first 128-bits of each vector. This will be used by subsequent patches to avoid costly store/maipulate/reload sequences in places like do_sve_acc(). Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-6-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++ arch/arm64/kernel/entry-fpsimd.S | 8 ++++++++ 3 files changed, 28 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 59f10dd13f12..958f642e930d 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,6 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); +extern void sve_flush_live(void); extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index feef5b371fba..af43367534c7 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -164,6 +164,13 @@ | ((\np) << 5) .endm +/* PFALSE P\np.B */ +.macro _sve_pfalse np + _sve_check_preg \np + .inst 0x2518e400 \ + | (\np) +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from @@ -198,6 +205,18 @@ 921: .endm +/* Preserve the first 128-bits of Znz and zero the rest. */ +.macro _sve_flush_z nz + _sve_check_zreg \nz + mov v\nz\().16b, v\nz\().16b +.endm + +.macro sve_flush + _for n, 0, 31, _sve_flush_z \n + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index f880dd63ddc3..fdf7a5a35c63 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -32,6 +32,7 @@ SYM_FUNC_START(fpsimd_load_state) SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE + SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret @@ -46,4 +47,11 @@ SYM_FUNC_START(sve_get_vl) _sve_rdvl 0, 1 ret SYM_FUNC_END(sve_get_vl) + +/* Zero all SVE registers but the first 128-bits of each vector */ +SYM_FUNC_START(sve_flush_live) + sve_flush + ret +SYM_FUNC_END(sve_flush_live) + #endif /* CONFIG_ARM64_SVE */ -- cgit From 9c4b4c701e53183df94f6b9802b63762f0c0e1e3 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 28 Aug 2020 19:11:53 +0100 Subject: arm64/sve: Implement a helper to load SVE registers from FPSIMD state In a follow-up patch, we may save the FPSIMD rather than the full SVE state when the state has to be zeroed on return to userspace (e.g during a syscall). Introduce an helper to load SVE vectors from FPSIMD state and zero the rest of SVE registers. Signed-off-by: Julien Grall Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20200828181155.17745-7-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/entry-fpsimd.S | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 958f642e930d..bec5f14b622a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -70,6 +70,8 @@ extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); extern void sve_flush_live(void); +extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, + unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index fdf7a5a35c63..2ca395c25448 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -48,6 +48,23 @@ SYM_FUNC_START(sve_get_vl) ret SYM_FUNC_END(sve_get_vl) +/* + * Load SVE state from FPSIMD state. + * + * x0 = pointer to struct fpsimd_state + * x1 = VQ - 1 + * + * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD + * and the rest zeroed. All the other SVE registers will be zeroed. + */ +SYM_FUNC_START(sve_load_from_fpsimd_state) + sve_load_vq x1, x2, x3 + fpsimd_restore x0, 8 + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 + ret +SYM_FUNC_END(sve_load_from_fpsimd_state) + /* Zero all SVE registers but the first 128-bits of each vector */ SYM_FUNC_START(sve_flush_live) sve_flush -- cgit From 9e0f085c2b33ebe13bcec53cbacce505fe78fde7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 21 Sep 2020 13:23:41 +0100 Subject: arm64: Move console stack display code to stacktrace.c Currently the code for displaying a stack trace on the console is located in traps.c rather than stacktrace.c, using the unwinding code that is in stacktrace.c. This can be confusing and make the code hard to find since such output is often referred to as a stack trace which might mislead the unwary. Due to this and since traps.c doesn't interact with this code except for via the public interfaces move the code to stacktrace.c to make it easier to find. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20200921122341.11280-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 65 ++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/traps.c | 65 ------------------------------------------ 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 804d076b02cb..fa56af1a59c3 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -132,6 +132,71 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, } NOKPROBE_SYMBOL(walk_stackframe); +static void dump_backtrace_entry(unsigned long where, const char *loglvl) +{ + printk("%s %pS\n", loglvl, (void *)where); +} + +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) +{ + struct stackframe frame; + int skip = 0; + + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + + if (regs) { + if (user_mode(regs)) + return; + skip = 1; + } + + if (!tsk) + tsk = current; + + if (!try_get_task_stack(tsk)) + return; + + if (tsk == current) { + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)dump_backtrace); + } else { + /* + * task blocked in __switch_to + */ + start_backtrace(&frame, + thread_saved_fp(tsk), + thread_saved_pc(tsk)); + } + + printk("%sCall trace:\n", loglvl); + do { + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(frame.pc, loglvl); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc, loglvl); + } + } while (!unwind_frame(tsk, &frame)); + + put_task_stack(tsk); +} + +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) +{ + dump_backtrace(NULL, tsk, loglvl); + barrier(); +} + #ifdef CONFIG_STACKTRACE void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..77dc8e7701ed 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -53,11 +53,6 @@ static const char *handler[]= { int show_unhandled_signals = 0; -static void dump_backtrace_entry(unsigned long where, const char *loglvl) -{ - printk("%s %pS\n", loglvl, (void *)where); -} - static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); @@ -83,66 +78,6 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) printk("%sCode: %s\n", lvl, str); } -void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, - const char *loglvl) -{ - struct stackframe frame; - int skip = 0; - - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (regs) { - if (user_mode(regs)) - return; - skip = 1; - } - - if (!tsk) - tsk = current; - - if (!try_get_task_stack(tsk)) - return; - - if (tsk == current) { - start_backtrace(&frame, - (unsigned long)__builtin_frame_address(0), - (unsigned long)dump_backtrace); - } else { - /* - * task blocked in __switch_to - */ - start_backtrace(&frame, - thread_saved_fp(tsk), - thread_saved_pc(tsk)); - } - - printk("%sCall trace:\n", loglvl); - do { - /* skip until specified stack frame */ - if (!skip) { - dump_backtrace_entry(frame.pc, loglvl); - } else if (frame.fp == regs->regs[29]) { - skip = 0; - /* - * Mostly, this is the case where this function is - * called in panic/abort. As exception handler's - * stack frame does not contain the corresponding pc - * at which an exception has taken place, use regs->pc - * instead. - */ - dump_backtrace_entry(regs->pc, loglvl); - } - } while (!unwind_frame(tsk, &frame)); - - put_task_stack(tsk); -} - -void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) -{ - dump_backtrace(NULL, tsk, loglvl); - barrier(); -} - #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" #elif defined(CONFIG_PREEMPT_RT) -- cgit From a194c5f2d2b3a05428805146afcabe5140b5d378 Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Mon, 21 Sep 2020 10:39:36 +0800 Subject: arm64/mm: return cpu_all_mask when node is NUMA_NO_NODE The @node passed to cpumask_of_node() can be NUMA_NO_NODE, in that case it will trigger the following WARN_ON(node >= nr_node_ids) due to mismatched data types of @node and @nr_node_ids. Actually we should return cpu_all_mask just like most other architectures do if passed NUMA_NO_NODE. Also add a similar check to the inline cpumask_of_node() in numa.h. Signed-off-by: Zhengyuan Liu Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20200921023936.21846-1-liuzhengyuan@tj.kylinos.cn Signed-off-by: Will Deacon --- arch/arm64/include/asm/numa.h | 3 +++ arch/arm64/mm/numa.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 73f8b49d485c..88e51aade0da 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -46,7 +46,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) -- cgit From f5be3a61fdb5dd11ef60173e2783ccf62685f892 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 22 Sep 2020 13:53:45 +0800 Subject: arm64: perf: Add support caps under sysfs ARMv8.4-PMU introduces the PMMIR_EL1 registers and some new PMU events, like STALL_SLOT etc, are related to it. Let's add a caps directory to /sys/bus/event_source/devices/armv8_pmuv3_0/ and support slots from PMMIR_EL1 registers in this entry. The user programs can get the slots from sysfs directly. /sys/bus/event_source/devices/armv8_pmuv3_0/caps/slots is exposed under sysfs. Both ARMv8.4-PMU and STALL_SLOT event are implemented, it returns the slots from PMMIR_EL1, otherwise it will return 0. Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1600754025-53535-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 3 ++ arch/arm64/include/asm/sysreg.h | 2 + arch/arm64/kernel/perf_event.c | 103 ++++++++++++++++++++++++------------ include/linux/perf/arm_pmu.h | 3 ++ 4 files changed, 78 insertions(+), 33 deletions(-) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2c2d7dbe8a02..60731f602d3e 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -236,6 +236,9 @@ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS_MASK 0xff + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 554a7e8ecb07..921773adff5e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -321,6 +321,8 @@ #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) +#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6) + #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 04d0ceb72a67..34a7cd3af699 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -305,6 +305,28 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; +static ssize_t slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + + return snprintf(page, PAGE_SIZE, "0x%08x\n", slots); +} + +static DEVICE_ATTR_RO(slots); + +static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_slots.attr, + NULL, +}; + +static struct attribute_group armv8_pmuv3_caps_attr_group = { + .name = "caps", + .attrs = armv8_pmuv3_caps_attrs, +}; + /* * Perf Events' indices */ @@ -984,6 +1006,12 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + /* store PMMIR_EL1 register for sysfs */ + if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31))) + cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + else + cpu_pmu->reg_pmmir = 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1006,7 +1034,8 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, int (*map_event)(struct perf_event *event), const struct attribute_group *events, - const struct attribute_group *format) + const struct attribute_group *format, + const struct attribute_group *caps) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1031,104 +1060,112 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, events : &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? format : &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? + caps : &armv8_pmuv3_caps_attr_group; return 0; } +static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event)) +{ + return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); +} + static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_pmuv3", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3", + armv8_pmuv3_map_event); } static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34", + armv8_pmuv3_map_event); } static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event); } static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55", + armv8_pmuv3_map_event); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event); } static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65", + armv8_pmuv3_map_event); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event); } static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75", + armv8_pmuv3_map_event); } static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76", + armv8_pmuv3_map_event); } static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77", + armv8_pmuv3_map_event); } static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", + armv8_pmuv3_map_event); } static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1", + armv8_pmuv3_map_event); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event); } static const struct of_device_id armv8_pmu_of_device_ids[] = { diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 5b616dde9a4c..505480217cf1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -73,6 +73,7 @@ enum armpmu_attr_groups { ARMPMU_ATTR_GROUP_COMMON, ARMPMU_ATTR_GROUP_EVENTS, ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_ATTR_GROUP_CAPS, ARMPMU_NR_ATTR_GROUPS }; @@ -109,6 +110,8 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; + /* store the PMMIR_EL1 to expose slots */ + u64 reg_pmmir; /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; -- cgit From c8fdbbfa981a7bc64acec234620788c97d1f6a88 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 18 Sep 2020 19:24:09 +0100 Subject: perf: Add Arm CMN-600 DT binding Document the requirements for the CMN-600 DT binding. The internal topology is almost entirely discoverable by walking a tree of ID registers, but sadly both the starting point for that walk and the exact format of those registers are configuration-dependent and not discoverable from some sane fixed location. Oh well. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- .../devicetree/bindings/perf/arm,cmn.yaml | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 Documentation/devicetree/bindings/perf/arm,cmn.yaml diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml new file mode 100644 index 000000000000..e4fcc0de25e2 --- /dev/null +++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2020 Arm Ltd. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/perf/arm,cmn.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm CMN (Coherent Mesh Network) Performance Monitors + +maintainers: + - Robin Murphy + +properties: + compatible: + const: arm,cmn-600 + + reg: + items: + - description: Physical address of the base (PERIPHBASE) and + size (up to 64MB) of the configuration address space. + + interrupts: + minItems: 1 + maxItems: 4 + items: + - description: Overflow interrupt for DTC0 + - description: Overflow interrupt for DTC1 + - description: Overflow interrupt for DTC2 + - description: Overflow interrupt for DTC3 + description: One interrupt for each DTC domain implemented must + be specified, in order. DTC0 is always present. + + arm,root-node: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Offset from PERIPHBASE of the configuration + discovery node (see TRM definition of ROOTNODEBASE). + +required: + - compatible + - reg + - interrupts + - arm,root-node + +additionalProperties: false + +examples: + - | + #include + #include + pmu@50000000 { + compatible = "arm,cmn-600"; + reg = <0x50000000 0x4000000>; + /* 4x2 mesh with one DTC, and CFG node at 0,1,1,0 */ + interrupts = ; + arm,root-node = <0x104000>; + }; +... -- cgit From 0ba64770a2f2e5a104bf835e133d78d3f82287ad Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 18 Sep 2020 14:28:38 +0100 Subject: perf: Add Arm CMN-600 PMU driver Initial driver for PMU event counting on the Arm CMN-600 interconnect. CMN sports an obnoxiously complex distributed PMU system as part of its debug and trace features, which can do all manner of things like sampling, cross-triggering and generating CoreSight trace. This driver covers the PMU functionality, plus the relevant aspects of watchpoints for simply counting matching flits. Tested-by: Tsahi Zidenberg Tested-by: Tuan Phan Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/arm-cmn.rst | 65 ++ Documentation/admin-guide/perf/index.rst | 1 + drivers/perf/Kconfig | 7 + drivers/perf/Makefile | 1 + drivers/perf/arm-cmn.c | 1641 ++++++++++++++++++++++++++++ 5 files changed, 1715 insertions(+) create mode 100644 Documentation/admin-guide/perf/arm-cmn.rst create mode 100644 drivers/perf/arm-cmn.c diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst new file mode 100644 index 000000000000..0e4809346014 --- /dev/null +++ b/Documentation/admin-guide/perf/arm-cmn.rst @@ -0,0 +1,65 @@ +============================= +Arm Coherent Mesh Network PMU +============================= + +CMN-600 is a configurable mesh interconnect consisting of a rectangular +grid of crosspoints (XPs), with each crosspoint supporting up to two +device ports to which various AMBA CHI agents are attached. + +CMN implements a distributed PMU design as part of its debug and trace +functionality. This consists of a local monitor (DTM) at every XP, which +counts up to 4 event signals from the connected device nodes and/or the +XP itself. Overflow from these local counters is accumulated in up to 8 +global counters implemented by the main controller (DTC), which provides +overall PMU control and interrupts for global counter overflow. + +PMU events +---------- + +The PMU driver registers a single PMU device for the whole interconnect, +see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link +more than one CMN together via external CCIX links - in this situation, +each mesh counts its own events entirely independently, and additional +PMU devices will be named arm_cmn_{1..n}. + +Most events are specified in a format based directly on the TRM +definitions - "type" selects the respective node type, and "eventid" the +event number. Some events require an additional occupancy ID, which is +specified by "occupid". + +* Since RN-D nodes do not have any distinct events from RN-I nodes, they + are treated as the same type (0xa), and the common event templates are + named "rnid_*". + +* The cycle counter is treated as a synthetic event belonging to the DTC + node ("type" == 0x3, "eventid" is ignored). + +* XP events also encode the port and channel in the "eventid" field, to + match the underlying pmu_event0_id encoding for the pmu_event_sel + register. The event templates are named with prefixes to cover all + permutations. + +By default each event provides an aggregate count over all nodes of the +given type. To target a specific node, "bynodeid" must be set to 1 and +"nodeid" to the appropriate value derived from the CMN configuration +(as defined in the "Node ID Mapping" section of the TRM). + +Watchpoints +----------- + +The PMU can also count watchpoint events to monitor specific flit +traffic. Watchpoints are treated as a synthetic event type, and like PMU +events can be global or targeted with a particular XP's "nodeid" value. +Since the watchpoint direction is otherwise implicit in the underlying +register selection, separate events are provided for flit uploads and +downloads. + +The flit match value and mask are passed in config1 and config2 ("val" +and "mask" respectively). "wp_dev_sel", "wp_chn_sel", "wp_grp" and +"wp_exclusive" are specified per the TRM definitions for dtm_wp_config0. +Where a watchpoint needs to match fields from both match groups on the +REQ or SNP channel, it can be specified as two events - one for each +group - with the same nonzero "combine" value. The count for such a +pair of combined events will be attributed to the primary match. +Watchpoint events with a "combine" value of 0 are considered independent +and will count individually. diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 47c99f40cc16..5a8f2529a033 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -12,6 +12,7 @@ Performance monitor support qcom_l2_pmu qcom_l3_pmu arm-ccn + arm-cmn xgene-pmu arm_dsu_pmu thunderx2-pmu diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 7305d57d1890..130327ff0b0e 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -41,6 +41,13 @@ config ARM_CCN PMU (perf) driver supporting the ARM CCN (Cache Coherent Network) interconnect. +config ARM_CMN + tristate "Arm CMN-600 PMU support" + depends on ARM64 || (COMPILE_TEST && 64BIT) + help + Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh + Network interconnect. + config ARM_PMU depends on ARM || ARM64 bool "ARM PMU framework" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 2ebb4de17815..5365fd56f88f 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o obj-$(CONFIG_ARM_CCN) += arm-ccn.o +obj-$(CONFIG_ARM_CMN) += arm-cmn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c new file mode 100644 index 000000000000..e824b5b83ea2 --- /dev/null +++ b/drivers/perf/arm-cmn.c @@ -0,0 +1,1641 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2016-2020 Arm Limited +// CMN-600 Coherent Mesh Network PMU driver + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Common register stuff */ +#define CMN_NODE_INFO 0x0000 +#define CMN_NI_NODE_TYPE GENMASK_ULL(15, 0) +#define CMN_NI_NODE_ID GENMASK_ULL(31, 16) +#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) + +#define CMN_NODEID_DEVID(reg) ((reg) & 3) +#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1) +#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits))) +#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1)) + +#define CMN_CHILD_INFO 0x0080 +#define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) +#define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) + +#define CMN_CHILD_NODE_ADDR GENMASK(27,0) +#define CMN_CHILD_NODE_EXTERNAL BIT(31) + +#define CMN_ADDR_NODE_PTR GENMASK(27, 14) + +#define CMN_NODE_PTR_DEVID(ptr) (((ptr) >> 2) & 3) +#define CMN_NODE_PTR_PID(ptr) ((ptr) & 1) +#define CMN_NODE_PTR_X(ptr, bits) ((ptr) >> (6 + (bits))) +#define CMN_NODE_PTR_Y(ptr, bits) (((ptr) >> 6) & ((1U << (bits)) - 1)) + +#define CMN_MAX_XPS (8 * 8) + +/* The CFG node has one other useful purpose */ +#define CMN_CFGM_PERIPH_ID_2 0x0010 +#define CMN_CFGM_PID2_REVISION GENMASK(7, 4) + +/* PMU registers occupy the 3rd 4KB page of each node's 16KB space */ +#define CMN_PMU_OFFSET 0x2000 + +/* For most nodes, this is all there is */ +#define CMN_PMU_EVENT_SEL 0x000 +#define CMN_PMU_EVENTn_ID_SHIFT(n) ((n) * 8) + +/* DTMs live in the PMU space of XP registers */ +#define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) +#define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) +#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) +#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) +#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) +#define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1) +#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0) +#define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08) +#define CMN_DTM_WPn_MASK(n) (CMN_DTM_WPn(n) + 0x10) + +#define CMN_DTM_PMU_CONFIG 0x210 +#define CMN__PMEVCNT0_INPUT_SEL GENMASK_ULL(37, 32) +#define CMN__PMEVCNT0_INPUT_SEL_WP 0x00 +#define CMN__PMEVCNT0_INPUT_SEL_XP 0x04 +#define CMN__PMEVCNT0_INPUT_SEL_DEV 0x10 +#define CMN__PMEVCNT0_GLOBAL_NUM GENMASK_ULL(18, 16) +#define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n) ((n) * 4) +#define CMN__PMEVCNT_PAIRED(n) BIT(4 + (n)) +#define CMN__PMEVCNT23_COMBINED BIT(2) +#define CMN__PMEVCNT01_COMBINED BIT(1) +#define CMN_DTM_PMU_CONFIG_PMU_EN BIT(0) + +#define CMN_DTM_PMEVCNT 0x220 + +#define CMN_DTM_PMEVCNTSR 0x240 + +#define CMN_DTM_NUM_COUNTERS 4 + +/* The DTC node is where the magic happens */ +#define CMN_DT_DTC_CTL 0x0a00 +#define CMN_DT_DTC_CTL_DT_EN BIT(0) + +/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */ +#define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4) +#define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40) + +#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90) + +#define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100) +#define CMN_DT_PMCR_PMU_EN BIT(0) +#define CMN_DT_PMCR_CNTR_RST BIT(5) +#define CMN_DT_PMCR_OVFL_INTR_EN BIT(6) + +#define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118) +#define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120) + +#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128) +#define CMN_DT_PMSSR_SS_STATUS(n) BIT(n) + +#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130) +#define CMN_DT_PMSRR_SS_REQ BIT(0) + +#define CMN_DT_NUM_COUNTERS 8 +#define CMN_MAX_DTCS 4 + +/* + * Even in the worst case a DTC counter can't wrap in fewer than 2^42 cycles, + * so throwing away one bit to make overflow handling easy is no big deal. + */ +#define CMN_COUNTER_INIT 0x80000000 +/* Similarly for the 40-bit cycle counter */ +#define CMN_CC_INIT 0x8000000000ULL + + +/* Event attributes */ +#define CMN_CONFIG_TYPE GENMASK(15, 0) +#define CMN_CONFIG_EVENTID GENMASK(23, 16) +#define CMN_CONFIG_OCCUPID GENMASK(27, 24) +#define CMN_CONFIG_BYNODEID BIT(31) +#define CMN_CONFIG_NODEID GENMASK(47, 32) + +#define CMN_EVENT_TYPE(event) FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config) +#define CMN_EVENT_EVENTID(event) FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config) +#define CMN_EVENT_OCCUPID(event) FIELD_GET(CMN_CONFIG_OCCUPID, (event)->attr.config) +#define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) +#define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) + +#define CMN_CONFIG_WP_COMBINE GENMASK(27, 24) +#define CMN_CONFIG_WP_DEV_SEL BIT(48) +#define CMN_CONFIG_WP_CHN_SEL GENMASK(50, 49) +#define CMN_CONFIG_WP_GRP BIT(52) +#define CMN_CONFIG_WP_EXCLUSIVE BIT(53) +#define CMN_CONFIG1_WP_VAL GENMASK(63, 0) +#define CMN_CONFIG2_WP_MASK GENMASK(63, 0) + +#define CMN_EVENT_WP_COMBINE(event) FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config) +#define CMN_EVENT_WP_DEV_SEL(event) FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config) +#define CMN_EVENT_WP_CHN_SEL(event) FIELD_GET(CMN_CONFIG_WP_CHN_SEL, (event)->attr.config) +#define CMN_EVENT_WP_GRP(event) FIELD_GET(CMN_CONFIG_WP_GRP, (event)->attr.config) +#define CMN_EVENT_WP_EXCLUSIVE(event) FIELD_GET(CMN_CONFIG_WP_EXCLUSIVE, (event)->attr.config) +#define CMN_EVENT_WP_VAL(event) FIELD_GET(CMN_CONFIG1_WP_VAL, (event)->attr.config1) +#define CMN_EVENT_WP_MASK(event) FIELD_GET(CMN_CONFIG2_WP_MASK, (event)->attr.config2) + +/* Made-up event IDs for watchpoint direction */ +#define CMN_WP_UP 0 +#define CMN_WP_DOWN 2 + + +/* r0px probably don't exist in silicon, thankfully */ +enum cmn_revision { + CMN600_R1P0, + CMN600_R1P1, + CMN600_R1P2, + CMN600_R1P3, + CMN600_R2P0, + CMN600_R3P0, +}; + +enum cmn_node_type { + CMN_TYPE_INVALID, + CMN_TYPE_DVM, + CMN_TYPE_CFG, + CMN_TYPE_DTC, + CMN_TYPE_HNI, + CMN_TYPE_HNF, + CMN_TYPE_XP, + CMN_TYPE_SBSX, + CMN_TYPE_RNI = 0xa, + CMN_TYPE_RND = 0xd, + CMN_TYPE_RNSAM = 0xf, + CMN_TYPE_CXRA = 0x100, + CMN_TYPE_CXHA = 0x101, + CMN_TYPE_CXLA = 0x102, + /* Not a real node type */ + CMN_TYPE_WP = 0x7770 +}; + +struct arm_cmn_node { + void __iomem *pmu_base; + u16 id, logid; + enum cmn_node_type type; + + union { + /* Device node */ + struct { + int to_xp; + /* DN/HN-F/CXHA */ + unsigned int occupid_val; + unsigned int occupid_count; + }; + /* XP */ + struct { + int dtc; + u32 pmu_config_low; + union { + u8 input_sel[4]; + __le32 pmu_config_high; + }; + s8 wp_event[4]; + }; + }; + + union { + u8 event[4]; + __le32 event_sel; + }; +}; + +struct arm_cmn_dtc { + void __iomem *base; + unsigned int irq; + int irq_friend; + bool cc_active; + + struct perf_event *counters[CMN_DT_NUM_COUNTERS]; + struct perf_event *cycles; +}; + +#define CMN_STATE_DISABLED BIT(0) +#define CMN_STATE_TXN BIT(1) + +struct arm_cmn { + struct device *dev; + void __iomem *base; + + enum cmn_revision rev; + u8 mesh_x; + u8 mesh_y; + u16 num_xps; + u16 num_dns; + struct arm_cmn_node *xps; + struct arm_cmn_node *dns; + + struct arm_cmn_dtc *dtc; + unsigned int num_dtcs; + + int cpu; + struct hlist_node cpuhp_node; + + unsigned int state; + struct pmu pmu; +}; + +#define to_cmn(p) container_of(p, struct arm_cmn, pmu) + +static int arm_cmn_hp_state; + +struct arm_cmn_hw_event { + struct arm_cmn_node *dn; + u64 dtm_idx[2]; + unsigned int dtc_idx; + u8 dtcs_used; + u8 num_dns; +}; + +#define for_each_hw_dn(hw, dn, i) \ + for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++) + +static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event) +{ + BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target)); + return (struct arm_cmn_hw_event *)&event->hw; +} + +static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val) +{ + x[pos / 32] |= (u64)val << ((pos % 32) * 2); +} + +static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos) +{ + return (x[pos / 32] >> ((pos % 32) * 2)) & 3; +} + +struct arm_cmn_event_attr { + struct device_attribute attr; + enum cmn_node_type type; + u8 eventid; + u8 occupid; +}; + +struct arm_cmn_format_attr { + struct device_attribute attr; + u64 field; + int config; +}; + +static int arm_cmn_xyidbits(const struct arm_cmn *cmn) +{ + return cmn->mesh_x > 4 || cmn->mesh_y > 4 ? 3 : 2; +} + +static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn, + struct arm_cmn_node *dn) +{ + int bits = arm_cmn_xyidbits(cmn); + int x = CMN_NODEID_X(dn->id, bits); + int y = CMN_NODEID_Y(dn->id, bits); + int xp_idx = cmn->mesh_x * y + x; + + dn->to_xp = (cmn->xps + xp_idx) - dn; +} + +static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) +{ + return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp; +} + +static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, + enum cmn_node_type type) +{ + int i; + + for (i = 0; i < cmn->num_dns; i++) + if (cmn->dns[i].type == type) + return &cmn->dns[i]; + return NULL; +} + +#define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid) \ + (&((struct arm_cmn_event_attr[]) {{ \ + .attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL), \ + .type = _type, \ + .eventid = _eventid, \ + .occupid = _occupid, \ + }})[0].attr.attr) + +static bool arm_cmn_is_occup_event(enum cmn_node_type type, unsigned int id) +{ + return (type == CMN_TYPE_DVM && id == 0x05) || + (type == CMN_TYPE_HNF && id == 0x0f); +} + +static ssize_t arm_cmn_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn_event_attr *eattr; + + eattr = container_of(attr, typeof(*eattr), attr); + + if (eattr->type == CMN_TYPE_DTC) + return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type); + + if (eattr->type == CMN_TYPE_WP) + return snprintf(buf, PAGE_SIZE, + "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n", + eattr->type, eattr->eventid); + + if (arm_cmn_is_occup_event(eattr->type, eattr->eventid)) + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n", + eattr->type, eattr->eventid, eattr->occupid); + + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x\n", + eattr->type, eattr->eventid); +} + +static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + struct arm_cmn_event_attr *eattr; + enum cmn_node_type type; + + eattr = container_of(attr, typeof(*eattr), attr.attr); + type = eattr->type; + + /* Watchpoints aren't nodes */ + if (type == CMN_TYPE_WP) + type = CMN_TYPE_XP; + + /* Revision-specific differences */ + if (cmn->rev < CMN600_R1P2) { + if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b) + return 0; + } + + if (!arm_cmn_node(cmn, type)) + return 0; + + return attr->mode; +} + +#define _CMN_EVENT_DVM(_name, _event, _occup) \ + CMN_EVENT_ATTR(dn_##_name, CMN_TYPE_DVM, _event, _occup) +#define CMN_EVENT_DTC(_name) \ + CMN_EVENT_ATTR(dtc_##_name, CMN_TYPE_DTC, 0, 0) +#define _CMN_EVENT_HNF(_name, _event, _occup) \ + CMN_EVENT_ATTR(hnf_##_name, CMN_TYPE_HNF, _event, _occup) +#define CMN_EVENT_HNI(_name, _event) \ + CMN_EVENT_ATTR(hni_##_name, CMN_TYPE_HNI, _event, 0) +#define __CMN_EVENT_XP(_name, _event) \ + CMN_EVENT_ATTR(mxp_##_name, CMN_TYPE_XP, _event, 0) +#define CMN_EVENT_SBSX(_name, _event) \ + CMN_EVENT_ATTR(sbsx_##_name, CMN_TYPE_SBSX, _event, 0) +#define CMN_EVENT_RNID(_name, _event) \ + CMN_EVENT_ATTR(rnid_##_name, CMN_TYPE_RNI, _event, 0) + +#define CMN_EVENT_DVM(_name, _event) \ + _CMN_EVENT_DVM(_name, _event, 0) +#define CMN_EVENT_HNF(_name, _event) \ + _CMN_EVENT_HNF(_name, _event, 0) +#define _CMN_EVENT_XP(_name, _event) \ + __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ + __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \ + __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \ + __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \ + __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \ + __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)) + +/* Good thing there are only 3 fundamental XP events... */ +#define CMN_EVENT_XP(_name, _event) \ + _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \ + _CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)), \ + _CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)), \ + _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)) + + +static struct attribute *arm_cmn_event_attrs[] = { + CMN_EVENT_DTC(cycles), + + /* + * DVM node events conflict with HN-I events in the equivalent PMU + * slot, but our lazy short-cut of using the DTM counter index for + * the PMU index as well happens to avoid that by construction. + */ + CMN_EVENT_DVM(rxreq_dvmop, 0x01), + CMN_EVENT_DVM(rxreq_dvmsync, 0x02), + CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03), + CMN_EVENT_DVM(rxreq_retried, 0x04), + _CMN_EVENT_DVM(rxreq_trk_occupancy_all, 0x05, 0), + _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmop, 0x05, 1), + _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2), + + CMN_EVENT_HNF(cache_miss, 0x01), + CMN_EVENT_HNF(slc_sf_cache_access, 0x02), + CMN_EVENT_HNF(cache_fill, 0x03), + CMN_EVENT_HNF(pocq_retry, 0x04), + CMN_EVENT_HNF(pocq_reqs_recvd, 0x05), + CMN_EVENT_HNF(sf_hit, 0x06), + CMN_EVENT_HNF(sf_evictions, 0x07), + CMN_EVENT_HNF(dir_snoops_sent, 0x08), + CMN_EVENT_HNF(brd_snoops_sent, 0x09), + CMN_EVENT_HNF(slc_eviction, 0x0a), + CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b), + CMN_EVENT_HNF(mc_retries, 0x0c), + CMN_EVENT_HNF(mc_reqs, 0x0d), + CMN_EVENT_HNF(qos_hh_retry, 0x0e), + _CMN_EVENT_HNF(qos_pocq_occupancy_all, 0x0f, 0), + _CMN_EVENT_HNF(qos_pocq_occupancy_read, 0x0f, 1), + _CMN_EVENT_HNF(qos_pocq_occupancy_write, 0x0f, 2), + _CMN_EVENT_HNF(qos_pocq_occupancy_atomic, 0x0f, 3), + _CMN_EVENT_HNF(qos_pocq_occupancy_stash, 0x0f, 4), + CMN_EVENT_HNF(pocq_addrhaz, 0x10), + CMN_EVENT_HNF(pocq_atomic_addrhaz, 0x11), + CMN_EVENT_HNF(ld_st_swp_adq_full, 0x12), + CMN_EVENT_HNF(cmp_adq_full, 0x13), + CMN_EVENT_HNF(txdat_stall, 0x14), + CMN_EVENT_HNF(txrsp_stall, 0x15), + CMN_EVENT_HNF(seq_full, 0x16), + CMN_EVENT_HNF(seq_hit, 0x17), + CMN_EVENT_HNF(snp_sent, 0x18), + CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19), + CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a), + CMN_EVENT_HNF(snp_sent_untrk, 0x1b), + CMN_EVENT_HNF(intv_dirty, 0x1c), + CMN_EVENT_HNF(stash_snp_sent, 0x1d), + CMN_EVENT_HNF(stash_data_pull, 0x1e), + CMN_EVENT_HNF(snp_fwded, 0x1f), + + CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), + CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21), + CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22), + CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23), + CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24), + CMN_EVENT_HNI(rrt_rd_alloc, 0x25), + CMN_EVENT_HNI(rrt_wr_alloc, 0x26), + CMN_EVENT_HNI(rdt_rd_alloc, 0x27), + CMN_EVENT_HNI(rdt_wr_alloc, 0x28), + CMN_EVENT_HNI(wdb_alloc, 0x29), + CMN_EVENT_HNI(txrsp_retryack, 0x2a), + CMN_EVENT_HNI(arvalid_no_arready, 0x2b), + CMN_EVENT_HNI(arready_no_arvalid, 0x2c), + CMN_EVENT_HNI(awvalid_no_awready, 0x2d), + CMN_EVENT_HNI(awready_no_awvalid, 0x2e), + CMN_EVENT_HNI(wvalid_no_wready, 0x2f), + CMN_EVENT_HNI(txdat_stall, 0x30), + CMN_EVENT_HNI(nonpcie_serialization, 0x31), + CMN_EVENT_HNI(pcie_serialization, 0x32), + + CMN_EVENT_XP(txflit_valid, 0x01), + CMN_EVENT_XP(txflit_stall, 0x02), + CMN_EVENT_XP(partial_dat_flit, 0x03), + /* We treat watchpoints as a special made-up class of XP events */ + CMN_EVENT_ATTR(watchpoint_up, CMN_TYPE_WP, 0, 0), + CMN_EVENT_ATTR(watchpoint_down, CMN_TYPE_WP, 2, 0), + + CMN_EVENT_SBSX(rd_req, 0x01), + CMN_EVENT_SBSX(wr_req, 0x02), + CMN_EVENT_SBSX(cmo_req, 0x03), + CMN_EVENT_SBSX(txrsp_retryack, 0x04), + CMN_EVENT_SBSX(txdat_flitv, 0x05), + CMN_EVENT_SBSX(txrsp_flitv, 0x06), + CMN_EVENT_SBSX(rd_req_trkr_occ_cnt_ovfl, 0x11), + CMN_EVENT_SBSX(wr_req_trkr_occ_cnt_ovfl, 0x12), + CMN_EVENT_SBSX(cmo_req_trkr_occ_cnt_ovfl, 0x13), + CMN_EVENT_SBSX(wdb_occ_cnt_ovfl, 0x14), + CMN_EVENT_SBSX(rd_axi_trkr_occ_cnt_ovfl, 0x15), + CMN_EVENT_SBSX(cmo_axi_trkr_occ_cnt_ovfl, 0x16), + CMN_EVENT_SBSX(arvalid_no_arready, 0x21), + CMN_EVENT_SBSX(awvalid_no_awready, 0x22), + CMN_EVENT_SBSX(wvalid_no_wready, 0x23), + CMN_EVENT_SBSX(txdat_stall, 0x24), + CMN_EVENT_SBSX(txrsp_stall, 0x25), + + CMN_EVENT_RNID(s0_rdata_beats, 0x01), + CMN_EVENT_RNID(s1_rdata_beats, 0x02), + CMN_EVENT_RNID(s2_rdata_beats, 0x03), + CMN_EVENT_RNID(rxdat_flits, 0x04), + CMN_EVENT_RNID(txdat_flits, 0x05), + CMN_EVENT_RNID(txreq_flits_total, 0x06), + CMN_EVENT_RNID(txreq_flits_retried, 0x07), + CMN_EVENT_RNID(rrt_occ_ovfl, 0x08), + CMN_EVENT_RNID(wrt_occ_ovfl, 0x09), + CMN_EVENT_RNID(txreq_flits_replayed, 0x0a), + CMN_EVENT_RNID(wrcancel_sent, 0x0b), + CMN_EVENT_RNID(s0_wdata_beats, 0x0c), + CMN_EVENT_RNID(s1_wdata_beats, 0x0d), + CMN_EVENT_RNID(s2_wdata_beats, 0x0e), + CMN_EVENT_RNID(rrt_alloc, 0x0f), + CMN_EVENT_RNID(wrt_alloc, 0x10), + CMN_EVENT_RNID(rdb_unord, 0x11), + CMN_EVENT_RNID(rdb_replay, 0x12), + CMN_EVENT_RNID(rdb_hybrid, 0x13), + CMN_EVENT_RNID(rdb_ord, 0x14), + + NULL +}; + +static const struct attribute_group arm_cmn_event_attrs_group = { + .name = "events", + .attrs = arm_cmn_event_attrs, + .is_visible = arm_cmn_event_attr_is_visible, +}; + +static ssize_t arm_cmn_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr); + int lo = __ffs(fmt->field), hi = __fls(fmt->field); + + if (lo == hi) + return snprintf(buf, PAGE_SIZE, "config:%d\n", lo); + + if (!fmt->config) + return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi); + + return snprintf(buf, PAGE_SIZE, "config%d:%d-%d\n", fmt->config, lo, hi); +} + +#define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \ + (&((struct arm_cmn_format_attr[]) {{ \ + .attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL), \ + .config = _cfg, \ + .field = _fld, \ + }})[0].attr.attr) +#define CMN_FORMAT_ATTR(_name, _fld) _CMN_FORMAT_ATTR(_name, 0, _fld) + +static struct attribute *arm_cmn_format_attrs[] = { + CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE), + CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID), + CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID), + CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID), + CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID), + + CMN_FORMAT_ATTR(wp_dev_sel, CMN_CONFIG_WP_DEV_SEL), + CMN_FORMAT_ATTR(wp_chn_sel, CMN_CONFIG_WP_CHN_SEL), + CMN_FORMAT_ATTR(wp_grp, CMN_CONFIG_WP_GRP), + CMN_FORMAT_ATTR(wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE), + CMN_FORMAT_ATTR(wp_combine, CMN_CONFIG_WP_COMBINE), + + _CMN_FORMAT_ATTR(wp_val, 1, CMN_CONFIG1_WP_VAL), + _CMN_FORMAT_ATTR(wp_mask, 2, CMN_CONFIG2_WP_MASK), + + NULL +}; + +static const struct attribute_group arm_cmn_format_attrs_group = { + .name = "format", + .attrs = arm_cmn_format_attrs, +}; + +static ssize_t arm_cmn_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(cmn->cpu)); +} + +static struct device_attribute arm_cmn_cpumask_attr = + __ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL); + +static struct attribute *arm_cmn_cpumask_attrs[] = { + &arm_cmn_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group arm_cmn_cpumask_attr_group = { + .attrs = arm_cmn_cpumask_attrs, +}; + +static const struct attribute_group *arm_cmn_attr_groups[] = { + &arm_cmn_event_attrs_group, + &arm_cmn_format_attrs_group, + &arm_cmn_cpumask_attr_group, + NULL +}; + +static int arm_cmn_wp_idx(struct perf_event *event) +{ + return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event); +} + +static u32 arm_cmn_wp_config(struct perf_event *event) +{ + u32 config; + u32 dev = CMN_EVENT_WP_DEV_SEL(event); + u32 chn = CMN_EVENT_WP_CHN_SEL(event); + u32 grp = CMN_EVENT_WP_GRP(event); + u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); + u32 combine = CMN_EVENT_WP_COMBINE(event); + + config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc); + if (combine && !grp) + config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; + + return config; +} + +static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state) +{ + if (!cmn->state) + writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR); + cmn->state |= state; +} + +static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state) +{ + cmn->state &= ~state; + if (!cmn->state) + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, + cmn->dtc[0].base + CMN_DT_PMCR); +} + +static void arm_cmn_pmu_enable(struct pmu *pmu) +{ + arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_DISABLED); +} + +static void arm_cmn_pmu_disable(struct pmu *pmu) +{ + arm_cmn_set_state(to_cmn(pmu), CMN_STATE_DISABLED); +} + +static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, + bool snapshot) +{ + struct arm_cmn_node *dn; + unsigned int i, offset; + u64 count = 0; + + offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT; + for_each_hw_dn(hw, dn, i) { + struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + u64 reg = readq_relaxed(xp->pmu_base + offset); + u16 dtm_count = reg >> (dtm_idx * 16); + + count += dtm_count; + } + return count; +} + +static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc) +{ + u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR); + + writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR); + return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1); +} + +static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx) +{ + u32 val, pmevcnt = CMN_DT_PMEVCNT(idx); + + val = readl_relaxed(dtc->base + pmevcnt); + writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt); + return val - CMN_COUNTER_INIT; +} + +static void arm_cmn_init_counter(struct perf_event *event) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx); + u64 count; + + for (i = 0; hw->dtcs_used & (1U << i); i++) { + writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt); + cmn->dtc[i].counters[hw->dtc_idx] = event; + } + + count = arm_cmn_read_dtm(cmn, hw, false); + local64_set(&event->hw.prev_count, count); +} + +static void arm_cmn_event_read(struct perf_event *event) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + u64 delta, new, prev; + unsigned long flags; + unsigned int i; + + if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) { + i = __ffs(hw->dtcs_used); + delta = arm_cmn_read_cc(cmn->dtc + i); + local64_add(delta, &event->count); + return; + } + new = arm_cmn_read_dtm(cmn, hw, false); + prev = local64_xchg(&event->hw.prev_count, new); + + delta = new - prev; + + local_irq_save(flags); + for (i = 0; hw->dtcs_used & (1U << i); i++) { + new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx); + delta += new << 16; + } + local_irq_restore(flags); + local64_add(delta, &event->count); +} + +static void arm_cmn_event_start(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + enum cmn_node_type type = CMN_EVENT_TYPE(event); + int i; + + if (type == CMN_TYPE_DTC) { + i = __ffs(hw->dtcs_used); + writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); + cmn->dtc[i].cc_active = true; + } else if (type == CMN_TYPE_WP) { + int wp_idx = arm_cmn_wp_idx(event); + u64 val = CMN_EVENT_WP_VAL(event); + u64 mask = CMN_EVENT_WP_MASK(event); + + for_each_hw_dn(hw, dn, i) { + writeq_relaxed(val, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); + writeq_relaxed(mask, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); + } + } else for_each_hw_dn(hw, dn, i) { + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + + dn->event[dtm_idx] = CMN_EVENT_EVENTID(event); + writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL); + } +} + +static void arm_cmn_event_stop(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + enum cmn_node_type type = CMN_EVENT_TYPE(event); + int i; + + if (type == CMN_TYPE_DTC) { + i = __ffs(hw->dtcs_used); + cmn->dtc[i].cc_active = false; + } else if (type == CMN_TYPE_WP) { + int wp_idx = arm_cmn_wp_idx(event); + + for_each_hw_dn(hw, dn, i) { + writeq_relaxed(0, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); + writeq_relaxed(~0ULL, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); + } + } else for_each_hw_dn(hw, dn, i) { + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + + dn->event[dtm_idx] = 0; + writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL); + } + + arm_cmn_event_read(event); +} + +struct arm_cmn_val { + u8 dtm_count[CMN_MAX_XPS]; + u8 occupid[CMN_MAX_XPS]; + u8 wp[CMN_MAX_XPS][4]; + int dtc_count; + bool cycles; +}; + +static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *event) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + enum cmn_node_type type; + int i; + u8 occupid; + + if (is_software_event(event)) + return; + + type = CMN_EVENT_TYPE(event); + if (type == CMN_TYPE_DTC) { + val->cycles = true; + return; + } + + val->dtc_count++; + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + occupid = CMN_EVENT_OCCUPID(event) + 1; + else + occupid = 0; + + for_each_hw_dn(hw, dn, i) { + int wp_idx, xp = arm_cmn_node_to_xp(dn)->logid; + + val->dtm_count[xp]++; + val->occupid[xp] = occupid; + + if (type != CMN_TYPE_WP) + continue; + + wp_idx = arm_cmn_wp_idx(event); + val->wp[xp][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; + } +} + +static int arm_cmn_validate_group(struct perf_event *event) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + struct perf_event *sibling, *leader = event->group_leader; + enum cmn_node_type type; + struct arm_cmn_val val; + int i; + u8 occupid; + + if (leader == event) + return 0; + + if (event->pmu != leader->pmu && !is_software_event(leader)) + return -EINVAL; + + memset(&val, 0, sizeof(val)); + + arm_cmn_val_add_event(&val, leader); + for_each_sibling_event(sibling, leader) + arm_cmn_val_add_event(&val, sibling); + + type = CMN_EVENT_TYPE(event); + if (type == CMN_TYPE_DTC) + return val.cycles ? -EINVAL : 0; + + if (val.dtc_count == CMN_DT_NUM_COUNTERS) + return -EINVAL; + + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + occupid = CMN_EVENT_OCCUPID(event) + 1; + else + occupid = 0; + + for_each_hw_dn(hw, dn, i) { + int wp_idx, wp_cmb, xp = arm_cmn_node_to_xp(dn)->logid; + + if (val.dtm_count[xp] == CMN_DTM_NUM_COUNTERS) + return -EINVAL; + + if (occupid && val.occupid[xp] && occupid != val.occupid[xp]) + return -EINVAL; + + if (type != CMN_TYPE_WP) + continue; + + wp_idx = arm_cmn_wp_idx(event); + if (val.wp[xp][wp_idx]) + return -EINVAL; + + wp_cmb = val.wp[xp][wp_idx ^ 1]; + if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) + return -EINVAL; + } + + return 0; +} + +static int arm_cmn_event_init(struct perf_event *event) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + enum cmn_node_type type; + unsigned int i; + bool bynodeid; + u16 nodeid, eventid; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + event->cpu = cmn->cpu; + if (event->cpu < 0) + return -EINVAL; + + type = CMN_EVENT_TYPE(event); + /* DTC events (i.e. cycles) already have everything they need */ + if (type == CMN_TYPE_DTC) + return 0; + + /* For watchpoints we need the actual XP node here */ + if (type == CMN_TYPE_WP) { + type = CMN_TYPE_XP; + /* ...and we need a "real" direction */ + eventid = CMN_EVENT_EVENTID(event); + if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN) + return -EINVAL; + } + + bynodeid = CMN_EVENT_BYNODEID(event); + nodeid = CMN_EVENT_NODEID(event); + + hw->dn = arm_cmn_node(cmn, type); + for (i = hw->dn - cmn->dns; i < cmn->num_dns && cmn->dns[i].type == type; i++) { + if (!bynodeid) { + hw->num_dns++; + } else if (cmn->dns[i].id != nodeid) { + hw->dn++; + } else { + hw->num_dns = 1; + break; + } + } + + if (!hw->num_dns) { + int bits = arm_cmn_xyidbits(cmn); + + dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n", + nodeid, CMN_NODEID_X(nodeid, bits), CMN_NODEID_Y(nodeid, bits), + CMN_NODEID_PID(nodeid), CMN_NODEID_DEVID(nodeid), type); + return -EINVAL; + } + /* + * By assuming events count in all DTC domains, we cunningly avoid + * needing to know anything about how XPs are assigned to domains. + */ + hw->dtcs_used = (1U << cmn->num_dtcs) - 1; + + return arm_cmn_validate_group(event); +} + +static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, + int i) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + enum cmn_node_type type = CMN_EVENT_TYPE(event); + + while (i--) { + struct arm_cmn_node *xp = arm_cmn_node_to_xp(hw->dn + i); + unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + + if (type == CMN_TYPE_WP) + hw->dn[i].wp_event[arm_cmn_wp_idx(event)] = -1; + + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + hw->dn[i].occupid_count--; + + xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx); + writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG); + } + memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); + + for (i = 0; hw->dtcs_used & (1U << i); i++) + cmn->dtc[i].counters[hw->dtc_idx] = NULL; +} + +static int arm_cmn_event_add(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_dtc *dtc = &cmn->dtc[0]; + struct arm_cmn_node *dn; + enum cmn_node_type type = CMN_EVENT_TYPE(event); + unsigned int i, dtc_idx, input_sel; + + if (type == CMN_TYPE_DTC) { + i = 0; + while (cmn->dtc[i].cycles) + if (++i == cmn->num_dtcs) + return -ENOSPC; + + cmn->dtc[i].cycles = event; + hw->dtc_idx = CMN_DT_NUM_COUNTERS; + hw->dtcs_used = 1U << i; + + if (flags & PERF_EF_START) + arm_cmn_event_start(event, 0); + return 0; + } + + /* Grab a free global counter first... */ + dtc_idx = 0; + while (dtc->counters[dtc_idx]) + if (++dtc_idx == CMN_DT_NUM_COUNTERS) + return -ENOSPC; + + hw->dtc_idx = dtc_idx; + + /* ...then the local counters to feed it. */ + for_each_hw_dn(hw, dn, i) { + struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); + unsigned int dtm_idx, shift; + u64 reg; + + dtm_idx = 0; + while (xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx)) + if (++dtm_idx == CMN_DTM_NUM_COUNTERS) + goto free_dtms; + + if (type == CMN_TYPE_XP) { + input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx; + } else if (type == CMN_TYPE_WP) { + int tmp, wp_idx = arm_cmn_wp_idx(event); + u32 cfg = arm_cmn_wp_config(event); + + if (dn->wp_event[wp_idx] >= 0) + goto free_dtms; + + tmp = dn->wp_event[wp_idx ^ 1]; + if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != + CMN_EVENT_WP_COMBINE(dtc->counters[tmp])) + goto free_dtms; + + input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; + dn->wp_event[wp_idx] = dtc_idx; + writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx)); + } else { + unsigned int port = CMN_NODEID_PID(dn->id); + unsigned int dev = CMN_NODEID_DEVID(dn->id); + + input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx + + (port << 4) + (dev << 2); + + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) { + int occupid = CMN_EVENT_OCCUPID(event); + + if (dn->occupid_count == 0) { + dn->occupid_val = occupid; + writel_relaxed(occupid, + dn->pmu_base + CMN_PMU_EVENT_SEL + 4); + } else if (dn->occupid_val != occupid) { + goto free_dtms; + } + dn->occupid_count++; + } + } + + arm_cmn_set_index(hw->dtm_idx, i, dtm_idx); + + xp->input_sel[dtm_idx] = input_sel; + shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); + xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); + xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; + xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); + reg = (u64)le32_to_cpu(xp->pmu_config_high) << 32 | xp->pmu_config_low; + writeq_relaxed(reg, xp->pmu_base + CMN_DTM_PMU_CONFIG); + } + + /* Go go go! */ + arm_cmn_init_counter(event); + + if (flags & PERF_EF_START) + arm_cmn_event_start(event, 0); + + return 0; + +free_dtms: + arm_cmn_event_clear(cmn, event, i); + return -ENOSPC; +} + +static void arm_cmn_event_del(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + enum cmn_node_type type = CMN_EVENT_TYPE(event); + + arm_cmn_event_stop(event, PERF_EF_UPDATE); + + if (type == CMN_TYPE_DTC) + cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL; + else + arm_cmn_event_clear(cmn, event, hw->num_dns); +} + +/* + * We stop the PMU for both add and read, to avoid skew across DTM counters. + * In theory we could use snapshots to read without stopping, but then it + * becomes a lot trickier to deal with overlow and racing against interrupts, + * plus it seems they don't work properly on some hardware anyway :( + */ +static void arm_cmn_start_txn(struct pmu *pmu, unsigned int flags) +{ + arm_cmn_set_state(to_cmn(pmu), CMN_STATE_TXN); +} + +static void arm_cmn_end_txn(struct pmu *pmu) +{ + arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_TXN); +} + +static int arm_cmn_commit_txn(struct pmu *pmu) +{ + arm_cmn_end_txn(pmu); + return 0; +} + +static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct arm_cmn *cmn; + unsigned int target; + + cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); + if (cpu != cmn->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&cmn->pmu, cpu, target); + cmn->cpu = target; + return 0; +} + +static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) +{ + struct arm_cmn_dtc *dtc = dev_id; + irqreturn_t ret = IRQ_NONE; + + for (;;) { + u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR); + u64 delta; + int i; + + for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) { + if (status & (1U << i)) { + ret = IRQ_HANDLED; + if (WARN_ON(!dtc->counters[i])) + continue; + delta = (u64)arm_cmn_read_counter(dtc, i) << 16; + local64_add(delta, &dtc->counters[i]->count); + } + } + + if (status & (1U << CMN_DT_NUM_COUNTERS)) { + ret = IRQ_HANDLED; + if (dtc->cc_active && !WARN_ON(!dtc->cycles)) { + delta = arm_cmn_read_cc(dtc); + local64_add(delta, &dtc->cycles->count); + } + } + + writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR); + + if (!dtc->irq_friend) + return ret; + dtc += dtc->irq_friend; + } +} + +/* We can reasonably accommodate DTCs of the same CMN sharing IRQs */ +static int arm_cmn_init_irqs(struct arm_cmn *cmn) +{ + int i, j, irq, err; + + for (i = 0; i < cmn->num_dtcs; i++) { + irq = cmn->dtc[i].irq; + for (j = i; j--; ) { + if (cmn->dtc[j].irq == irq) { + cmn->dtc[j].irq_friend = j - i; + goto next; + } + } + err = devm_request_irq(cmn->dev, irq, arm_cmn_handle_irq, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(cmn->dev), &cmn->dtc[i]); + if (err) + return err; + + err = irq_set_affinity_hint(irq, cpumask_of(cmn->cpu)); + if (err) + return err; + next: + ; /* isn't C great? */ + } + return 0; +} + +static void arm_cmn_init_dtm(struct arm_cmn_node *xp) +{ + int i; + + for (i = 0; i < 4; i++) { + xp->wp_event[i] = -1; + writeq_relaxed(0, xp->pmu_base + CMN_DTM_WPn_MASK(i)); + writeq_relaxed(~0ULL, xp->pmu_base + CMN_DTM_WPn_VAL(i)); + } + xp->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; + xp->dtc = -1; +} + +static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx) +{ + struct arm_cmn_dtc *dtc = cmn->dtc + idx; + struct arm_cmn_node *xp; + + dtc->base = dn->pmu_base - CMN_PMU_OFFSET; + dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx); + if (dtc->irq < 0) + return dtc->irq; + + writel_relaxed(0, dtc->base + CMN_DT_PMCR); + writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); + writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); + + /* We do at least know that a DTC's XP must be in that DTC's domain */ + xp = arm_cmn_node_to_xp(dn); + xp->dtc = idx; + + return 0; +} + +static int arm_cmn_node_cmp(const void *a, const void *b) +{ + const struct arm_cmn_node *dna = a, *dnb = b; + int cmp; + + cmp = dna->type - dnb->type; + if (!cmp) + cmp = dna->logid - dnb->logid; + return cmp; +} + +static int arm_cmn_init_dtcs(struct arm_cmn *cmn) +{ + struct arm_cmn_node *dn; + int dtc_idx = 0; + + cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); + if (!cmn->dtc) + return -ENOMEM; + + sort(cmn->dns, cmn->num_dns, sizeof(cmn->dns[0]), arm_cmn_node_cmp, NULL); + + cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); + + for (dn = cmn->dns; dn < cmn->dns + cmn->num_dns; dn++) { + if (dn->type != CMN_TYPE_XP) + arm_cmn_init_node_to_xp(cmn, dn); + else if (cmn->num_dtcs == 1) + dn->dtc = 0; + + if (dn->type == CMN_TYPE_DTC) + arm_cmn_init_dtc(cmn, dn, dtc_idx++); + + /* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */ + if (dn->type == CMN_TYPE_RND) + dn->type = CMN_TYPE_RNI; + } + + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL); + + return 0; +} + +static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node) +{ + int level; + u64 reg = readq_relaxed(cmn->base + offset + CMN_NODE_INFO); + + node->type = FIELD_GET(CMN_NI_NODE_TYPE, reg); + node->id = FIELD_GET(CMN_NI_NODE_ID, reg); + node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg); + + node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET; + + if (node->type == CMN_TYPE_CFG) + level = 0; + else if (node->type == CMN_TYPE_XP) + level = 1; + else + level = 2; + + dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#x\n", + (level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ', + node->type, node->logid, offset); +} + +static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) +{ + void __iomem *cfg_region; + struct arm_cmn_node cfg, *dn; + u16 child_count, child_poff; + u32 xp_offset[CMN_MAX_XPS]; + u64 reg; + int i, j; + + cfg_region = cmn->base + rgn_offset; + reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); + cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); + dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev); + + arm_cmn_init_node_info(cmn, rgn_offset, &cfg); + if (cfg.type != CMN_TYPE_CFG) + return -ENODEV; + + reg = readq_relaxed(cfg_region + CMN_CHILD_INFO); + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); + + cmn->num_xps = child_count; + cmn->num_dns = cmn->num_xps; + + /* Pass 1: visit the XPs, enumerate their children */ + for (i = 0; i < cmn->num_xps; i++) { + reg = readq_relaxed(cfg_region + child_poff + i * 8); + xp_offset[i] = reg & CMN_CHILD_NODE_ADDR; + + reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO); + cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg); + } + + /* Cheeky +1 to help terminate pointer-based iteration */ + cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns + 1, + sizeof(*cmn->dns), GFP_KERNEL); + if (!cmn->dns) + return -ENOMEM; + + /* Pass 2: now we can actually populate the nodes */ + dn = cmn->dns; + for (i = 0; i < cmn->num_xps; i++) { + void __iomem *xp_region = cmn->base + xp_offset[i]; + struct arm_cmn_node *xp = dn++; + + arm_cmn_init_node_info(cmn, xp_offset[i], xp); + arm_cmn_init_dtm(xp); + /* + * Thanks to the order in which XP logical IDs seem to be + * assigned, we can handily infer the mesh X dimension by + * looking out for the XP at (0,1) without needing to know + * the exact node ID format, which we can later derive. + */ + if (xp->id == (1 << 3)) + cmn->mesh_x = xp->logid; + + reg = readq_relaxed(xp_region + CMN_CHILD_INFO); + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); + + for (j = 0; j < child_count; j++) { + reg = readq_relaxed(xp_region + child_poff + j * 8); + /* + * Don't even try to touch anything external, since in general + * we haven't a clue how to power up arbitrary CHI requesters. + * As of CMN-600r1 these could only be RN-SAMs or CXLAs, + * neither of which have any PMU events anyway. + * (Actually, CXLAs do seem to have grown some events in r1p2, + * but they don't go to regular XP DTMs, and they depend on + * secure configuration which we can't easily deal with) + */ + if (reg & CMN_CHILD_NODE_EXTERNAL) { + dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); + continue; + } + + arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); + + switch (dn->type) { + case CMN_TYPE_DTC: + cmn->num_dtcs++; + dn++; + break; + /* These guys have PMU events */ + case CMN_TYPE_DVM: + case CMN_TYPE_HNI: + case CMN_TYPE_HNF: + case CMN_TYPE_SBSX: + case CMN_TYPE_RNI: + case CMN_TYPE_RND: + case CMN_TYPE_CXRA: + case CMN_TYPE_CXHA: + dn++; + break; + /* Nothing to see here */ + case CMN_TYPE_RNSAM: + case CMN_TYPE_CXLA: + break; + /* Something has gone horribly wrong */ + default: + dev_err(cmn->dev, "invalid device node type: 0x%hx\n", dn->type); + return -ENODEV; + } + } + } + + /* Correct for any nodes we skipped */ + cmn->num_dns = dn - cmn->dns; + + /* + * If mesh_x wasn't set during discovery then we never saw + * an XP at (0,1), thus we must have an Nx1 configuration. + */ + if (!cmn->mesh_x) + cmn->mesh_x = cmn->num_xps; + cmn->mesh_y = cmn->num_xps / cmn->mesh_x; + + dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n", + cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn)); + + return 0; +} + +static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn) +{ + struct resource *cfg, *root; + + cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!cfg) + return -EINVAL; + + root = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!root) + return -EINVAL; + + if (!resource_contains(cfg, root)) + swap(cfg, root); + /* + * Note that devm_ioremap_resource() is dumb and won't let the platform + * device claim cfg when the ACPI companion device has already claimed + * root within it. But since they *are* already both claimed in the + * appropriate name, we don't really need to do it again here anyway. + */ + cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg)); + if (!cmn->base) + return -ENOMEM; + + return root->start - cfg->start; +} + +static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn) +{ + struct device_node *np = pdev->dev.of_node; + u32 rootnode; + int ret; + + cmn->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(cmn->base)) + return PTR_ERR(cmn->base); + + ret = of_property_read_u32(np, "arm,root-node", &rootnode); + if (ret) + return ret; + + return rootnode; +} + +static int arm_cmn_probe(struct platform_device *pdev) +{ + struct arm_cmn *cmn; + const char *name; + static atomic_t id; + int err, rootnode, this_id; + + cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); + if (!cmn) + return -ENOMEM; + + cmn->dev = &pdev->dev; + platform_set_drvdata(pdev, cmn); + + if (has_acpi_companion(cmn->dev)) + rootnode = arm_cmn_acpi_probe(pdev, cmn); + else + rootnode = arm_cmn_of_probe(pdev, cmn); + if (rootnode < 0) + return rootnode; + + err = arm_cmn_discover(cmn, rootnode); + if (err) + return err; + + err = arm_cmn_init_dtcs(cmn); + if (err) + return err; + + err = arm_cmn_init_irqs(cmn); + if (err) + return err; + + cmn->cpu = raw_smp_processor_id(); + cmn->pmu = (struct pmu) { + .module = THIS_MODULE, + .attr_groups = arm_cmn_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .pmu_enable = arm_cmn_pmu_enable, + .pmu_disable = arm_cmn_pmu_disable, + .event_init = arm_cmn_event_init, + .add = arm_cmn_event_add, + .del = arm_cmn_event_del, + .start = arm_cmn_event_start, + .stop = arm_cmn_event_stop, + .read = arm_cmn_event_read, + .start_txn = arm_cmn_start_txn, + .commit_txn = arm_cmn_commit_txn, + .cancel_txn = arm_cmn_end_txn, + }; + + this_id = atomic_fetch_inc(&id); + if (this_id == 0) { + name = "arm_cmn"; + } else { + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id); + if (!name) + return -ENOMEM; + } + + err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + if (err) + return err; + + err = perf_pmu_register(&cmn->pmu, name, -1); + if (err) + cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + return err; +} + +static int arm_cmn_remove(struct platform_device *pdev) +{ + struct arm_cmn *cmn = platform_get_drvdata(pdev); + int i; + + writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL); + + perf_pmu_unregister(&cmn->pmu); + cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + + for (i = 0; i < cmn->num_dtcs; i++) + irq_set_affinity_hint(cmn->dtc[i].irq, NULL); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id arm_cmn_of_match[] = { + { .compatible = "arm,cmn-600", }, + {} +}; +MODULE_DEVICE_TABLE(of, arm_cmn_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id arm_cmn_acpi_match[] = { + { "ARMHC600", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); +#endif + +static struct platform_driver arm_cmn_driver = { + .driver = { + .name = "arm-cmn", + .of_match_table = of_match_ptr(arm_cmn_of_match), + .acpi_match_table = ACPI_PTR(arm_cmn_acpi_match), + }, + .probe = arm_cmn_probe, + .remove = arm_cmn_remove, +}; + +static int __init arm_cmn_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/cmn:online", NULL, + arm_cmn_pmu_offline_cpu); + if (ret < 0) + return ret; + + arm_cmn_hp_state = ret; + ret = platform_driver_register(&arm_cmn_driver); + if (ret) + cpuhp_remove_multi_state(arm_cmn_hp_state); + return ret; +} + +static void __exit arm_cmn_exit(void) +{ + platform_driver_unregister(&arm_cmn_driver); + cpuhp_remove_multi_state(arm_cmn_hp_state); +} + +module_init(arm_cmn_init); +module_exit(arm_cmn_exit); + +MODULE_AUTHOR("Robin Murphy "); +MODULE_DESCRIPTION("Arm CMN-600 PMU driver"); +MODULE_LICENSE("GPL v2"); -- cgit From 490d7b7c0845eacf5593db333fd2ae7715416e16 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 24 Sep 2020 12:07:00 +0100 Subject: arm64: perf: Add missing ISB in armv8pmu_enable_counter() Writes to the PMXEVTYPER_EL0 register are not self-synchronising. In armv8pmu_enable_event(), the PE can reorder configuring the event type after we have enabled the counter and the interrupt. This can lead to an interrupt being asserted because of the previous event type that we were counting using the same counter, not the one that we've just configured. The same rationale applies to writes to the PMINTENSET_EL1 register. The PE can reorder enabling the interrupt at any point in the future after we have enabled the event. Prevent both situations from happening by adding an ISB just before we enable the event counter. Fixes: 030896885ade ("arm64: Performance counters support") Reported-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-2-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 34a7cd3af699..e43ab350a5a2 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -541,6 +541,11 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event) static inline void armv8pmu_enable_counter(u32 mask) { + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); write_sysreg(mask, pmcntenset_el0); } -- cgit From 0fdf1bb75953a67e63e5055a7709c629ab6d7692 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Sep 2020 12:07:01 +0100 Subject: arm64: perf: Avoid PMXEV* indirection Currently we access the counter registers and their respective type registers indirectly. This requires us to write to PMSELR, issue an ISB, then access the relevant PMXEV* registers. This is unfortunate, because: * Under virtualization, accessing one register requires two traps to the hypervisor, even though we could access the register directly with a single trap. * We have to issue an ISB which we could otherwise avoid the cost of. * When we use NMIs, the NMI handler will have to save/restore the select register in case the code it preempted was attempting to access a counter or its type register. We can avoid these issues by directly accessing the relevant registers. This patch adds helpers to do so. In armv8pmu_enable_event() we still need the ISB to prevent the PE from reordering the write to PMINTENSET_EL1 register. If the interrupt is enabled before we disable the counter and the new event is configured, we might get an interrupt triggered by the previously programmed event overflowing, but which we wrongly attribute to the event that we are enabling. Execute an ISB after we disable the counter. In the process, remove the comment that refers to the ARMv7 PMU. [Julien T.: Don't inline read/write functions to avoid big code-size increase, remove unused read_pmevtypern function, fix counter index issue.] [Alexandru E.: Removed comment, removed trailing semicolons in macros, added ISB] Signed-off-by: Mark Rutland Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-3-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 99 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index e43ab350a5a2..39596ed4ab86 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -371,6 +371,73 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + static inline u32 armv8pmu_pmcr_read(void) { return read_sysreg(pmcr_el0); @@ -393,17 +460,11 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline void armv8pmu_select_counter(int idx) +static inline u32 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(counter, pmselr_el0); - isb(); -} -static inline u64 armv8pmu_read_evcntr(int idx) -{ - armv8pmu_select_counter(idx); - return read_sysreg(pmxevcntr_el0); + return read_pmevcntrn(counter); } static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) @@ -471,8 +532,9 @@ static u64 armv8pmu_read_counter(struct perf_event *event) static inline void armv8pmu_write_evcntr(int idx, u64 value) { - armv8pmu_select_counter(idx); - write_sysreg(value, pmxevcntr_el0); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); } static inline void armv8pmu_write_hw_counter(struct perf_event *event, @@ -503,9 +565,10 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) static inline void armv8pmu_write_evtype(int idx, u32 val) { - armv8pmu_select_counter(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + write_pmevtypern(counter, val); } static inline void armv8pmu_write_event_type(struct perf_event *event) @@ -525,7 +588,10 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx - 1, hwc->config_base); armv8pmu_write_evtype(idx, chain_evt); } else { - armv8pmu_write_evtype(idx, hwc->config_base); + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -564,6 +630,11 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) static inline void armv8pmu_disable_counter(u32 mask) { write_sysreg(mask, pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); } static inline void armv8pmu_disable_event_counter(struct perf_event *event) @@ -636,7 +707,7 @@ static void armv8pmu_enable_event(struct perf_event *event) armv8pmu_disable_event_counter(event); /* - * Set event (if destined for PMNx counters). + * Set event. */ armv8pmu_write_event_type(event); -- cgit From 2a0e2a02e4b719174547d6f04c27410c6fe456f5 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:02 +0100 Subject: arm64: perf: Remove PMU locking The PMU is disabled and enabled, and the counters are programmed from contexts where interrupts or preemption is disabled. The functions to toggle the PMU and to program the PMU counters access the registers directly and don't access data modified by the interrupt handler. That, and the fact that they're always called from non-preemptible contexts, means that we don't need to disable interrupts or use a spinlock. [Alexandru E.: Explained why locking is not needed, removed WARN_ONs] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-4-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 39596ed4ab86..077df5aea19a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -691,15 +691,10 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - /* * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* * Disable counter @@ -720,21 +715,10 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_disable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* * Disable counter */ @@ -744,30 +728,18 @@ static void armv8pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv8pmu_disable_event_irq(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_start(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) -- cgit From 05ab72813340d11205556c0d1bc08e6857a3856c Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:03 +0100 Subject: arm64: perf: Defer irq_work to IPI_IRQ_WORK When handling events, armv8pmu_handle_irq() calls perf_event_overflow(), and subsequently calls irq_work_run() to handle any work queued by perf_event_overflow(). As perf_event_overflow() raises IPI_IRQ_WORK when queuing the work, this isn't strictly necessary and the work could be handled as part of the IPI_IRQ_WORK handler. In the common case the IPI handler will run immediately after the PMU IRQ handler, and where the PE is heavily loaded with interrupts other handlers may run first, widening the window where some counters are disabled. In practice this window is unlikely to be a significant issue, and removing the call to irq_work_run() would make the PMU IRQ handler NMI safe in addition to making it simpler, so let's do that. [Alexandru E.: Reworded commit message] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-5-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 077df5aea19a..3605f77ad4df 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -792,20 +792,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - return IRQ_HANDLED; } -- cgit From 95e92e45a454a10a8114294d0f7aec930fb85891 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:04 +0100 Subject: KVM: arm64: pmu: Make overflow handler NMI safe kvm_vcpu_kick() is not NMI safe. When the overflow handler is called from NMI context, defer waking the vcpu to an irq_work queue. A vcpu can be freed while it's not running by kvm_destroy_vm(). Prevent running the irq_work for a non-existent vcpu by calling irq_work_sync() on the PMU destroy path. [Alexandru E.: Added irq_work_sync()] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Marc Zyngier Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Suzuki K Pouloze Cc: kvm@vger.kernel.org Cc: kvmarm@lists.cs.columbia.edu Link: https://lore.kernel.org/r/20200924110706.254996-6-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- arch/arm64/kvm/pmu-emul.c | 26 +++++++++++++++++++++++++- include/kvm/arm_pmu.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f0d0312c0a55..81916e360b1e 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -269,6 +269,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_release_perf_event(&pmu->pmc[i]); + irq_work_sync(&vcpu->arch.pmu.overflow_work); } u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) @@ -433,6 +434,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) kvm_pmu_update_state(vcpu); } +/** + * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding + * to the event. + * This is why we need a callback to do it once outside of the NMI context. + */ +static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) +{ + struct kvm_vcpu *vcpu; + struct kvm_pmu *pmu; + + pmu = container_of(work, struct kvm_pmu, overflow_work); + vcpu = kvm_pmc_to_vcpu(pmu->pmc); + + kvm_vcpu_kick(vcpu); +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -465,7 +482,11 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); - kvm_vcpu_kick(vcpu); + + if (!in_nmi()) + kvm_vcpu_kick(vcpu); + else + irq_work_queue(&vcpu->arch.pmu.overflow_work); } cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); @@ -764,6 +785,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return ret; } + init_irq_work(&vcpu->arch.pmu.overflow_work, + kvm_pmu_perf_overflow_notify_vcpu); + vcpu->arch.pmu.created = true; return 0; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6db030439e29..dbf4f08d42e5 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -27,6 +27,7 @@ struct kvm_pmu { bool ready; bool created; bool irq_level; + struct irq_work overflow_work; }; #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) -- cgit From f76b130bdb8949eac002b8e0ddb85576ed137838 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:05 +0100 Subject: arm_pmu: Introduce pmu_irq_ops Currently the PMU interrupt can either be a normal irq or a percpu irq. Supporting NMI will introduce two cases for each existing one. It becomes a mess of 'if's when managing the interrupt. Define sets of callbacks for operations commonly done on the interrupt. The appropriate set of callbacks is selected at interrupt request time and simplifies interrupt enabling/disabling and freeing. Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/20200924110706.254996-7-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 90 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index df352b334ea7..a770726e98d4 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -26,8 +26,46 @@ #include +static int armpmu_count_irq_users(const int irq); + +struct pmu_irq_ops { + void (*enable_pmuirq)(unsigned int irq); + void (*disable_pmuirq)(unsigned int irq); + void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid); +}; + +static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid) +{ + free_irq(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmuirq_ops = { + .enable_pmuirq = enable_irq, + .disable_pmuirq = disable_irq_nosync, + .free_pmuirq = armpmu_free_pmuirq +}; + +static void armpmu_enable_percpu_pmuirq(unsigned int irq) +{ + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_irq(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmuirq_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmuirq, + .disable_pmuirq = disable_percpu_irq, + .free_pmuirq = armpmu_free_percpu_pmuirq +}; + static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); static inline u64 arm_pmu_event_max_period(struct perf_event *event) { @@ -544,6 +582,23 @@ static int armpmu_count_irq_users(const int irq) return count; } +static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) +{ + const struct pmu_irq_ops *ops = NULL; + int cpu; + + for_each_possible_cpu(cpu) { + if (per_cpu(cpu_irq, cpu) != irq) + continue; + + ops = per_cpu(cpu_irq_ops, cpu); + if (ops) + break; + } + + return ops; +} + void armpmu_free_irq(int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) @@ -551,18 +606,18 @@ void armpmu_free_irq(int irq, int cpu) if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - if (!irq_is_percpu_devid(irq)) - free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); - else if (armpmu_count_irq_users(irq) == 1) - free_percpu_irq(irq, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu); per_cpu(cpu_irq, cpu) = 0; + per_cpu(cpu_irq_ops, cpu) = NULL; } int armpmu_request_irq(int irq, int cpu) { int err = 0; const irq_handler_t handler = armpmu_dispatch_irq; + const struct pmu_irq_ops *irq_ops; + if (!irq) return 0; @@ -584,15 +639,26 @@ int armpmu_request_irq(int irq, int cpu) irq_set_status_flags(irq, IRQ_NOAUTOEN); err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&cpu_armpmu, cpu)); + + irq_ops = &pmuirq_ops; } else if (armpmu_count_irq_users(irq) == 0) { err = request_percpu_irq(irq, handler, "arm-pmu", &cpu_armpmu); + + irq_ops = &percpu_pmuirq_ops; + } else { + /* Per cpudevid irq was already requested by another CPU */ + irq_ops = armpmu_find_irq_ops(irq); + + if (WARN_ON(!irq_ops)) + err = -EINVAL; } if (err) goto err_out; per_cpu(cpu_irq, cpu) = irq; + per_cpu(cpu_irq_ops, cpu) = irq_ops; return 0; err_out: @@ -625,12 +691,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) per_cpu(cpu_armpmu, cpu) = pmu; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - enable_percpu_irq(irq, IRQ_TYPE_NONE); - else - enable_irq(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); return 0; } @@ -644,12 +706,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); - else - disable_irq_nosync(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); per_cpu(cpu_armpmu, cpu) = NULL; -- cgit From d8f6267f7ce5dc7b8920910e7e75216f77e06d21 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:06 +0100 Subject: arm_pmu: arm64: Use NMIs for PMU Add required PMU interrupt operations for NMIs. Request interrupt lines as NMIs when possible, otherwise fall back to normal interrupts. NMIs are only supported on the arm64 architecture with a GICv3 irqchip. [Alexandru E.: Added that NMIs only work on arm64 + GICv3, print message when PMU is using NMIs] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/20200924110706.254996-8-alexandru.elisei@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 71 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a770726e98d4..cb2f55f450e4 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -45,6 +45,17 @@ static const struct pmu_irq_ops pmuirq_ops = { .free_pmuirq = armpmu_free_pmuirq }; +static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid) +{ + free_nmi(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmunmi_ops = { + .enable_pmuirq = enable_nmi, + .disable_pmuirq = disable_nmi_nosync, + .free_pmuirq = armpmu_free_pmunmi +}; + static void armpmu_enable_percpu_pmuirq(unsigned int irq) { enable_percpu_irq(irq, IRQ_TYPE_NONE); @@ -63,10 +74,37 @@ static const struct pmu_irq_ops percpu_pmuirq_ops = { .free_pmuirq = armpmu_free_percpu_pmuirq }; +static void armpmu_enable_percpu_pmunmi(unsigned int irq) +{ + if (!prepare_percpu_nmi(irq)) + enable_percpu_nmi(irq, IRQ_TYPE_NONE); +} + +static void armpmu_disable_percpu_pmunmi(unsigned int irq) +{ + disable_percpu_nmi(irq); + teardown_percpu_nmi(irq); +} + +static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_nmi(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmunmi_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmunmi, + .disable_pmuirq = armpmu_disable_percpu_pmunmi, + .free_pmuirq = armpmu_free_percpu_pmunmi +}; + static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); +static bool has_nmi; + static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) @@ -637,15 +675,31 @@ int armpmu_request_irq(int irq, int cpu) IRQF_NO_THREAD; irq_set_status_flags(irq, IRQ_NOAUTOEN); - err = request_irq(irq, handler, irq_flags, "arm-pmu", + + err = request_nmi(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&cpu_armpmu, cpu)); - irq_ops = &pmuirq_ops; + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_irq(irq, handler, irq_flags, "arm-pmu", + per_cpu_ptr(&cpu_armpmu, cpu)); + irq_ops = &pmuirq_ops; + } else { + has_nmi = true; + irq_ops = &pmunmi_ops; + } } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); - - irq_ops = &percpu_pmuirq_ops; + err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &cpu_armpmu); + irq_ops = &percpu_pmuirq_ops; + } else { + has_nmi= true; + irq_ops = &percpu_pmunmi_ops; + } } else { /* Per cpudevid irq was already requested by another CPU */ irq_ops = armpmu_find_irq_ops(irq); @@ -928,8 +982,9 @@ int armpmu_register(struct arm_pmu *pmu) if (!__oprofile_cpu_pmu) __oprofile_cpu_pmu = pmu; - pr_info("enabled with %s PMU driver, %d counters available\n", - pmu->name, pmu->num_events); + pr_info("enabled with %s PMU driver, %d counters available%s\n", + pmu->name, pmu->num_events, + has_nmi ? ", using NMIs" : ""); return 0; -- cgit From 5735f515843019257913432a8c36eb558be388db Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:11 +1000 Subject: firmware: arm_sdei: Remove sdei_is_err() sdei_is_err() is only called by sdei_to_linux_errno(). The logic of checking on the error number is common to them. They can be combined finely. This removes sdei_is_err() and its logic is combined to the function sdei_to_linux_errno(). Also, the assignment of @err to zero is also dropped in invoke_sdei_fn() because it's always overridden afterwards. This shouldn't cause functional changes. Signed-off-by: Gavin Shan Reviewed-by: James Morse Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200922130423.10173-2-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index b4b9ce97f415..2d256b2ed4b4 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -114,26 +114,7 @@ static int sdei_to_linux_errno(unsigned long sdei_err) return -ENOMEM; } - /* Not an error value ... */ - return sdei_err; -} - -/* - * If x0 is any of these values, then the call failed, use sdei_to_linux_errno() - * to translate. - */ -static int sdei_is_err(struct arm_smccc_res *res) -{ - switch (res->a0) { - case SDEI_NOT_SUPPORTED: - case SDEI_INVALID_PARAMETERS: - case SDEI_DENIED: - case SDEI_PENDING: - case SDEI_OUT_OF_RESOURCE: - return true; - } - - return false; + return 0; } static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0, @@ -141,14 +122,13 @@ static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0, unsigned long arg3, unsigned long arg4, u64 *result) { - int err = 0; + int err; struct arm_smccc_res res; if (sdei_firmware_call) { sdei_firmware_call(function_id, arg0, arg1, arg2, arg3, arg4, &res); - if (sdei_is_err(&res)) - err = sdei_to_linux_errno(res.a0); + err = sdei_to_linux_errno(res.a0); } else { /* * !sdei_firmware_call means we failed to probe or called -- cgit From 119884249fdba34a06df250a4402a6b2f3697a47 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:12 +1000 Subject: firmware: arm_sdei: Common block for failing path in sdei_event_create() There are multiple calls of kfree(event) in the failing path of sdei_event_create() to free the SDEI event. It means we need to call it again when adding more code in the failing path. It's prone to miss doing that and introduce memory leakage. This introduces common block for failing path in sdei_event_create() to resolve the issue. This shouldn't cause functional changes. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Acked-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-3-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 2d256b2ed4b4..a126ab7e3490 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -190,33 +190,31 @@ static struct sdei_event *sdei_event_create(u32 event_num, lockdep_assert_held(&sdei_events_lock); event = kzalloc(sizeof(*event), GFP_KERNEL); - if (!event) - return ERR_PTR(-ENOMEM); + if (!event) { + err = -ENOMEM; + goto fail; + } INIT_LIST_HEAD(&event->list); event->event_num = event_num; err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY, &result); - if (err) { - kfree(event); - return ERR_PTR(err); - } + if (err) + goto fail; event->priority = result; err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_TYPE, &result); - if (err) { - kfree(event); - return ERR_PTR(err); - } + if (err) + goto fail; event->type = result; if (event->type == SDEI_EVENT_TYPE_SHARED) { reg = kzalloc(sizeof(*reg), GFP_KERNEL); if (!reg) { - kfree(event); - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto fail; } reg->event_num = event_num; @@ -231,8 +229,8 @@ static struct sdei_event *sdei_event_create(u32 event_num, regs = alloc_percpu(struct sdei_registered_event); if (!regs) { - kfree(event); - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto fail; } for_each_possible_cpu(cpu) { @@ -252,6 +250,10 @@ static struct sdei_event *sdei_event_create(u32 event_num, spin_unlock(&sdei_list_lock); return event; + +fail: + kfree(event); + return ERR_PTR(err); } static void sdei_event_destroy_llocked(struct sdei_event *event) -- cgit From 663c0e89c8defd75a9d34ae31dbc315d6cf894bd Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:13 +1000 Subject: firmware: arm_sdei: Retrieve event number from event instance In sdei_event_create(), the event number is retrieved from the variable @event_num for the shared event. The event number was stored in the event instance. So we can fetch it from the event instance, similar to what we're doing for the private event. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Acked-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-4-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index a126ab7e3490..0f49fff20cc7 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -217,7 +217,7 @@ static struct sdei_event *sdei_event_create(u32 event_num, goto fail; } - reg->event_num = event_num; + reg->event_num = event->event_num; reg->priority = event->priority; reg->callback = cb; -- cgit From 10fd7c42b7953b3316806388493bff22abd3f78c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:14 +1000 Subject: firmware: arm_sdei: Avoid nested statements in sdei_init() In sdei_init(), the nested statements can be avoided by bailing on error from platform_driver_register() or absent ACPI SDEI table. With it, the code looks a bit more readable. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-5-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 0f49fff20cc7..1fa4e577b78e 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1081,17 +1081,18 @@ static bool __init sdei_present_acpi(void) static int __init sdei_init(void) { - int ret = platform_driver_register(&sdei_driver); - - if (!ret && sdei_present_acpi()) { - struct platform_device *pdev; - - pdev = platform_device_register_simple(sdei_driver.driver.name, - 0, NULL, 0); - if (IS_ERR(pdev)) - pr_info("Failed to register ACPI:SDEI platform device %ld\n", - PTR_ERR(pdev)); - } + struct platform_device *pdev; + int ret; + + ret = platform_driver_register(&sdei_driver); + if (ret || !sdei_present_acpi()) + return ret; + + pdev = platform_device_register_simple(sdei_driver.driver.name, + 0, NULL, 0); + if (IS_ERR(pdev)) + pr_info("Failed to register ACPI:SDEI platform device %ld\n", + PTR_ERR(pdev)); return ret; } -- cgit From 63627cae41e33763ca967bd2bb8cfe5d281bfe64 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:15 +1000 Subject: firmware: arm_sdei: Unregister driver on error in sdei_init() The SDEI platform device is created from device-tree node or ACPI (SDEI) table. For the later case, the platform device is created explicitly by this module. It'd better to unregister the driver on failure to create the device to keep the symmetry. The driver, owned by this module, isn't needed if the device isn't existing. Besides, the errno (@ret) should be updated accordingly in this case. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-6-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 1fa4e577b78e..e7e9059c395b 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1090,9 +1090,12 @@ static int __init sdei_init(void) pdev = platform_device_register_simple(sdei_driver.driver.name, 0, NULL, 0); - if (IS_ERR(pdev)) - pr_info("Failed to register ACPI:SDEI platform device %ld\n", - PTR_ERR(pdev)); + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); + platform_driver_unregister(&sdei_driver); + pr_info("Failed to register ACPI:SDEI platform device %d\n", + ret); + } return ret; } -- cgit From bc110fd322816c9684603a55404b51228c0feca8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:16 +1000 Subject: firmware: arm_sdei: Remove duplicate check in sdei_get_conduit() The following two checks are duplicate because @acpi_disabled doesn't depend on CONFIG_ACPI. So the duplicate check (IS_ENABLED(CONFIG_ACPI)) can be dropped. More details is provided to keep the commit log complete: * @acpi_disabled is defined in arch/arm64/kernel/acpi.c when CONFIG_ACPI is enabled. * @acpi_disabled in defined in include/acpi.h when CONFIG_ACPI is disabled. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Acked-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-7-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index e7e9059c395b..5daa4e20595c 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -958,7 +958,7 @@ static int sdei_get_conduit(struct platform_device *pdev) } pr_warn("invalid \"method\" property: %s\n", method); - } else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) { + } else if (!acpi_disabled) { if (acpi_psci_use_hvc()) { sdei_firmware_call = &sdei_smccc_hvc; return SMCCC_CONDUIT_HVC; -- cgit From 101119a35ca108022391ad5b84d31e203970185c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:17 +1000 Subject: firmware: arm_sdei: Remove redundant error message in sdei_probe() This removes the redundant error message in sdei_probe() because the case can be identified from the errno in next error message. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Acked-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-8-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 5daa4e20595c..4caeef3e1e0b 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -982,8 +982,6 @@ static int sdei_probe(struct platform_device *pdev) return 0; err = sdei_api_get_version(&ver); - if (err == -EOPNOTSUPP) - pr_err("advertised but not implemented in platform firmware\n"); if (err) { pr_err("Failed to get SDEI version: %d\n", err); sdei_mark_interface_broken(); -- cgit From 1bbc75518503fe7992c8ec09557b453ec7222dcf Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:18 +1000 Subject: firmware: arm_sdei: Remove while loop in sdei_event_register() This removes the unnecessary while loop in sdei_event_register() because of the following two reasons. This shouldn't cause any functional changes. * The while loop is executed for once, meaning it's not needed in theory. * With the while loop removed, the nested statements can be avoid to make the code a bit cleaner. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200922130423.10173-9-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 52 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 4caeef3e1e0b..6595bd66aa73 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -590,36 +590,34 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) WARN_ON(in_nmi()); mutex_lock(&sdei_events_lock); - do { - if (sdei_event_find(event_num)) { - pr_warn("Event %u already registered\n", event_num); - err = -EBUSY; - break; - } + if (sdei_event_find(event_num)) { + pr_warn("Event %u already registered\n", event_num); + err = -EBUSY; + goto unlock; + } - event = sdei_event_create(event_num, cb, arg); - if (IS_ERR(event)) { - err = PTR_ERR(event); - pr_warn("Failed to create event %u: %d\n", event_num, - err); - break; - } + event = sdei_event_create(event_num, cb, arg); + if (IS_ERR(event)) { + err = PTR_ERR(event); + pr_warn("Failed to create event %u: %d\n", event_num, err); + goto unlock; + } - cpus_read_lock(); - err = _sdei_event_register(event); - if (err) { - sdei_event_destroy(event); - pr_warn("Failed to register event %u: %d\n", event_num, - err); - } else { - spin_lock(&sdei_list_lock); - event->reregister = true; - spin_unlock(&sdei_list_lock); - } - cpus_read_unlock(); - } while (0); - mutex_unlock(&sdei_events_lock); + cpus_read_lock(); + err = _sdei_event_register(event); + if (err) { + sdei_event_destroy(event); + pr_warn("Failed to register event %u: %d\n", event_num, err); + goto cpu_unlock; + } + spin_lock(&sdei_list_lock); + event->reregister = true; + spin_unlock(&sdei_list_lock); +cpu_unlock: + cpus_read_unlock(); +unlock: + mutex_unlock(&sdei_events_lock); return err; } -- cgit From b06146b698e6e835253d01901ecf0ab84646591e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:19 +1000 Subject: firmware: arm_sdei: Remove while loop in sdei_event_unregister() This removes the unnecessary while loop in sdei_event_unregister() because of the following two reasons. This shouldn't cause any functional changes. * The while loop is executed for once, meaning it's not needed in theory. * With the while loop removed, the nested statements can be avoid to make the code a bit cleaner. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200922130423.10173-10-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 6595bd66aa73..790bff70d169 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -491,24 +491,23 @@ int sdei_event_unregister(u32 event_num) mutex_lock(&sdei_events_lock); event = sdei_event_find(event_num); - do { - if (!event) { - pr_warn("Event %u not registered\n", event_num); - err = -ENOENT; - break; - } + if (!event) { + pr_warn("Event %u not registered\n", event_num); + err = -ENOENT; + goto unlock; + } - spin_lock(&sdei_list_lock); - event->reregister = false; - event->reenable = false; - spin_unlock(&sdei_list_lock); + spin_lock(&sdei_list_lock); + event->reregister = false; + event->reenable = false; + spin_unlock(&sdei_list_lock); - err = _sdei_event_unregister(event); - if (err) - break; + err = _sdei_event_unregister(event); + if (err) + goto unlock; - sdei_event_destroy(event); - } while (0); + sdei_event_destroy(event); +unlock: mutex_unlock(&sdei_events_lock); return err; -- cgit From a27c04e1de876297b78a6b812049e6fecc6e4d9a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:20 +1000 Subject: firmware: arm_sdei: Cleanup on cross call function This applies cleanup on the cross call functions, no functional changes are introduced: * Wrap the code block of CROSSCALL_INIT inside "do { } while (0)" as linux kernel usually does. Otherwise, scripts/checkpatch.pl reports warning regarding this. * Use smp_call_func_t for @fn argument in sdei_do_cross_call() as the function is called on target CPU(s). * Remove unnecessary space before @event in sdei_do_cross_call() Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-11-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 790bff70d169..786c1b6d4af3 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -78,11 +78,15 @@ struct sdei_crosscall_args { int first_error; }; -#define CROSSCALL_INIT(arg, event) (arg.event = event, \ - arg.first_error = 0, \ - atomic_set(&arg.errors, 0)) - -static inline int sdei_do_cross_call(void *fn, struct sdei_event * event) +#define CROSSCALL_INIT(arg, event) \ + do { \ + arg.event = event; \ + arg.first_error = 0; \ + atomic_set(&arg.errors, 0); \ + } while (0) + +static inline int sdei_do_cross_call(smp_call_func_t fn, + struct sdei_event *event) { struct sdei_crosscall_args arg; -- cgit From f4673625a52c69a12d2a8f71bcf408c685c148ec Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:21 +1000 Subject: firmware: arm_sdei: Introduce sdei_do_local_call() During the CPU hotplug, the private events are registered, enabled or unregistered on the specific CPU. It repeats the same steps: initializing cross call argument, make function call on local CPU, check the returned error. This introduces sdei_do_local_call() to cover the first steps. The other benefit is to make CROSSCALL_INIT and struct sdei_crosscall_args are only visible to sdei_do_{cross, local}_call(). Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-12-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 786c1b6d4af3..cf1a37b40e24 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -85,6 +85,17 @@ struct sdei_crosscall_args { atomic_set(&arg.errors, 0); \ } while (0) +static inline int sdei_do_local_call(smp_call_func_t fn, + struct sdei_event *event) +{ + struct sdei_crosscall_args arg; + + CROSSCALL_INIT(arg, event); + fn(&arg); + + return arg.first_error; +} + static inline int sdei_do_cross_call(smp_call_func_t fn, struct sdei_event *event) { @@ -677,7 +688,7 @@ static int sdei_reregister_shared(void) static int sdei_cpuhp_down(unsigned int cpu) { struct sdei_event *event; - struct sdei_crosscall_args arg; + int err; /* un-register private events */ spin_lock(&sdei_list_lock); @@ -685,12 +696,11 @@ static int sdei_cpuhp_down(unsigned int cpu) if (event->type == SDEI_EVENT_TYPE_SHARED) continue; - CROSSCALL_INIT(arg, event); - /* call the cross-call function locally... */ - _local_event_unregister(&arg); - if (arg.first_error) + err = sdei_do_local_call(_local_event_unregister, event); + if (err) { pr_err("Failed to unregister event %u: %d\n", - event->event_num, arg.first_error); + event->event_num, err); + } } spin_unlock(&sdei_list_lock); @@ -700,7 +710,7 @@ static int sdei_cpuhp_down(unsigned int cpu) static int sdei_cpuhp_up(unsigned int cpu) { struct sdei_event *event; - struct sdei_crosscall_args arg; + int err; /* re-register/enable private events */ spin_lock(&sdei_list_lock); @@ -709,20 +719,19 @@ static int sdei_cpuhp_up(unsigned int cpu) continue; if (event->reregister) { - CROSSCALL_INIT(arg, event); - /* call the cross-call function locally... */ - _local_event_register(&arg); - if (arg.first_error) + err = sdei_do_local_call(_local_event_register, event); + if (err) { pr_err("Failed to re-register event %u: %d\n", - event->event_num, arg.first_error); + event->event_num, err); + } } if (event->reenable) { - CROSSCALL_INIT(arg, event); - _local_event_enable(&arg); - if (arg.first_error) + err = sdei_do_local_call(_local_event_enable, event); + if (err) { pr_err("Failed to re-enable event %u: %d\n", - event->event_num, arg.first_error); + event->event_num, err); + } } } spin_unlock(&sdei_list_lock); -- cgit From d2fc580d2dcaf92863fbb194d4e70ef1383b92c4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:22 +1000 Subject: firmware: arm_sdei: Remove _sdei_event_register() The function _sdei_event_register() is called by sdei_event_register() and sdei_device_thaw() as the following functional call chain shows. _sdei_event_register() covers the shared and private events, but sdei_device_thaw() only covers the shared events. So the logic to cover the private events in _sdei_event_register() isn't needed by sdei_device_thaw(). Similarly, sdei_reregister_event_llocked() covers the shared and private events in the regard of reenablement. The logic to cover the private events isn't needed by sdei_device_thaw() either. sdei_event_register sdei_device_thaw _sdei_event_register sdei_reregister_shared sdei_reregister_event_llocked _sdei_event_register This removes _sdei_event_register() and sdei_reregister_event_llocked(). Their logic is moved to sdei_event_register() and sdei_reregister_shared(). This shouldn't cause any logical changes. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-13-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 77 +++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index cf1a37b40e24..361d142ad2a8 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -577,25 +577,6 @@ static void _local_event_register(void *data) sdei_cross_call_return(arg, err); } -static int _sdei_event_register(struct sdei_event *event) -{ - int err; - - lockdep_assert_held(&sdei_events_lock); - - if (event->type == SDEI_EVENT_TYPE_SHARED) - return sdei_api_event_register(event->event_num, - sdei_entry_point, - event->registered, - SDEI_EVENT_REGISTER_RM_ANY, 0); - - err = sdei_do_cross_call(_local_event_register, event); - if (err) - sdei_do_cross_call(_local_event_unregister, event); - - return err; -} - int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) { int err; @@ -618,7 +599,17 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) } cpus_read_lock(); - err = _sdei_event_register(event); + if (event->type == SDEI_EVENT_TYPE_SHARED) { + err = sdei_api_event_register(event->event_num, + sdei_entry_point, + event->registered, + SDEI_EVENT_REGISTER_RM_ANY, 0); + } else { + err = sdei_do_cross_call(_local_event_register, event); + if (err) + sdei_do_cross_call(_local_event_unregister, event); + } + if (err) { sdei_event_destroy(event); pr_warn("Failed to register event %u: %d\n", event_num, err); @@ -635,33 +626,6 @@ unlock: return err; } -static int sdei_reregister_event_llocked(struct sdei_event *event) -{ - int err; - - lockdep_assert_held(&sdei_events_lock); - lockdep_assert_held(&sdei_list_lock); - - err = _sdei_event_register(event); - if (err) { - pr_err("Failed to re-register event %u\n", event->event_num); - sdei_event_destroy_llocked(event); - return err; - } - - if (event->reenable) { - if (event->type == SDEI_EVENT_TYPE_SHARED) - err = sdei_api_event_enable(event->event_num); - else - err = sdei_do_cross_call(_local_event_enable, event); - } - - if (err) - pr_err("Failed to re-enable event %u\n", event->event_num); - - return err; -} - static int sdei_reregister_shared(void) { int err = 0; @@ -674,9 +638,24 @@ static int sdei_reregister_shared(void) continue; if (event->reregister) { - err = sdei_reregister_event_llocked(event); - if (err) + err = sdei_api_event_register(event->event_num, + sdei_entry_point, event->registered, + SDEI_EVENT_REGISTER_RM_ANY, 0); + if (err) { + pr_err("Failed to re-register event %u\n", + event->event_num); + sdei_event_destroy_llocked(event); break; + } + } + + if (event->reenable) { + err = sdei_api_event_enable(event->event_num); + if (err) { + pr_err("Failed to re-enable event %u\n", + event->event_num); + break; + } } } spin_unlock(&sdei_list_lock); -- cgit From 4b2b76cbbc8ff5dabc18123d94a1125143aea567 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 22 Sep 2020 23:04:23 +1000 Subject: firmware: arm_sdei: Remove _sdei_event_unregister() _sdei_event_unregister() is called by sdei_event_unregister() and sdei_device_freeze(). _sdei_event_unregister() covers the shared and private events, but sdei_device_freeze() only covers the shared events. So the logic to cover the private events isn't needed by sdei_device_freeze(). sdei_event_unregister sdei_device_freeze _sdei_event_unregister sdei_unregister_shared _sdei_event_unregister This removes _sdei_event_unregister(). Its logic is moved to its callers accordingly. This shouldn't cause any logical changes. Signed-off-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Link: https://lore.kernel.org/r/20200922130423.10173-14-gshan@redhat.com Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 361d142ad2a8..840754dcc6ca 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -487,16 +487,6 @@ static void _local_event_unregister(void *data) sdei_cross_call_return(arg, err); } -static int _sdei_event_unregister(struct sdei_event *event) -{ - lockdep_assert_held(&sdei_events_lock); - - if (event->type == SDEI_EVENT_TYPE_SHARED) - return sdei_api_event_unregister(event->event_num); - - return sdei_do_cross_call(_local_event_unregister, event); -} - int sdei_event_unregister(u32 event_num) { int err; @@ -517,7 +507,11 @@ int sdei_event_unregister(u32 event_num) event->reenable = false; spin_unlock(&sdei_list_lock); - err = _sdei_event_unregister(event); + if (event->type == SDEI_EVENT_TYPE_SHARED) + err = sdei_api_event_unregister(event->event_num); + else + err = sdei_do_cross_call(_local_event_unregister, event); + if (err) goto unlock; @@ -543,7 +537,7 @@ static int sdei_unregister_shared(void) if (event->type != SDEI_EVENT_TYPE_SHARED) continue; - err = _sdei_event_unregister(event); + err = sdei_api_event_unregister(event->event_num); if (err) break; } -- cgit From 48118151d8cc7a457f61d91df0c053473d4b31b1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Sep 2020 12:18:44 +0200 Subject: arm64: mm: Pin down ASIDs for sharing mm with devices To enable address space sharing with the IOMMU, introduce arm64_mm_context_get() and arm64_mm_context_put(), that pin down a context and ensure that it will keep its ASID after a rollover. Export the symbols to let the modular SMMUv3 driver use them. Pinning is necessary because a device constantly needs a valid ASID, unlike tasks that only require one when running. Without pinning, we would need to notify the IOMMU when we're about to use a new ASID for a task, and it would get complicated when a new task is assigned a shared ASID. Consider the following scenario with no ASID pinned: 1. Task t1 is running on CPUx with shared ASID (gen=1, asid=1) 2. Task t2 is scheduled on CPUx, gets ASID (1, 2) 3. Task tn is scheduled on CPUy, a rollover occurs, tn gets ASID (2, 1) We would now have to immediately generate a new ASID for t1, notify the IOMMU, and finally enable task tn. We are holding the lock during all that time, since we can't afford having another CPU trigger a rollover. The IOMMU issues invalidation commands that can take tens of milliseconds. It gets needlessly complicated. All we wanted to do was schedule task tn, that has no business with the IOMMU. By letting the IOMMU pin tasks when needed, we avoid stalling the slow path, and let the pinning fail when we're out of shareable ASIDs. After a rollover, the allocator expects at least one ASID to be available in addition to the reserved ones (one per CPU). So (NR_ASIDS - NR_CPUS - 1) is the maximum number of ASIDs that can be shared with the IOMMU. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20200918101852.582559-5-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 3 + arch/arm64/include/asm/mmu_context.h | 11 +++- arch/arm64/mm/context.c | 105 +++++++++++++++++++++++++++++++++-- 3 files changed, 112 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a7a5ecaa2e83..0fda85b2cc1b 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,11 +17,14 @@ #ifndef __ASSEMBLY__ +#include + typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT void *sigpage; #endif + refcount_t pinned; void *vdso; unsigned long flags; } mm_context_t; diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f2d7537d6f83..0672236e1aea 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -177,7 +177,13 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) #define destroy_context(mm) do { } while(0) void check_and_switch_context(struct mm_struct *mm); -#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + atomic64_set(&mm->context.id, 0); + refcount_set(&mm->context.pinned, 0); + return 0; +} #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -248,6 +254,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); +unsigned long arm64_mm_context_get(struct mm_struct *mm); +void arm64_mm_context_put(struct mm_struct *mm); + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 9b11c096a042..001737a8f309 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -27,6 +27,10 @@ static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +static unsigned long max_pinned_asids; +static unsigned long nr_pinned_asids; +static unsigned long *pinned_asid_map; + #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) @@ -72,7 +76,7 @@ void verify_cpu_asid_bits(void) } } -static void set_kpti_asid_bits(void) +static void set_kpti_asid_bits(unsigned long *map) { unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); /* @@ -81,13 +85,15 @@ static void set_kpti_asid_bits(void) * is set, then the ASID will map only userspace. Thus * mark even as reserved for kernel. */ - memset(asid_map, 0xaa, len); + memset(map, 0xaa, len); } static void set_reserved_asid_bits(void) { - if (arm64_kernel_unmapped_at_el0()) - set_kpti_asid_bits(); + if (pinned_asid_map) + bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS); + else if (arm64_kernel_unmapped_at_el0()) + set_kpti_asid_bits(asid_map); else bitmap_clear(asid_map, 0, NUM_USER_ASIDS); } @@ -165,6 +171,14 @@ static u64 new_context(struct mm_struct *mm) if (check_update_reserved_asid(asid, newasid)) return newasid; + /* + * If it is pinned, we can keep using it. Note that reserved + * takes priority, because even if it is also pinned, we need to + * update the generation into the reserved_asids. + */ + if (refcount_read(&mm->context.pinned)) + return newasid; + /* * We had a valid ASID in a previous life, so try to re-use * it if possible. @@ -256,6 +270,71 @@ switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } +unsigned long arm64_mm_context_get(struct mm_struct *mm) +{ + unsigned long flags; + u64 asid; + + if (!pinned_asid_map) + return 0; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + + asid = atomic64_read(&mm->context.id); + + if (refcount_inc_not_zero(&mm->context.pinned)) + goto out_unlock; + + if (nr_pinned_asids >= max_pinned_asids) { + asid = 0; + goto out_unlock; + } + + if (!asid_gen_match(asid)) { + /* + * We went through one or more rollover since that ASID was + * used. Ensure that it is still valid, or generate a new one. + */ + asid = new_context(mm); + atomic64_set(&mm->context.id, asid); + } + + nr_pinned_asids++; + __set_bit(asid2idx(asid), pinned_asid_map); + refcount_set(&mm->context.pinned, 1); + +out_unlock: + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + + asid &= ~ASID_MASK; + + /* Set the equivalent of USER_ASID_BIT */ + if (asid && arm64_kernel_unmapped_at_el0()) + asid |= 1; + + return asid; +} +EXPORT_SYMBOL_GPL(arm64_mm_context_get); + +void arm64_mm_context_put(struct mm_struct *mm) +{ + unsigned long flags; + u64 asid = atomic64_read(&mm->context.id); + + if (!pinned_asid_map) + return; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + + if (refcount_dec_and_test(&mm->context.pinned)) { + __clear_bit(asid2idx(asid), pinned_asid_map); + nr_pinned_asids--; + } + + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +EXPORT_SYMBOL_GPL(arm64_mm_context_put); + /* Errata workaround post TTBRx_EL1 update. */ asmlinkage void post_ttbr_update_workaround(void) { @@ -296,8 +375,11 @@ static int asids_update_limit(void) { unsigned long num_available_asids = NUM_USER_ASIDS; - if (arm64_kernel_unmapped_at_el0()) + if (arm64_kernel_unmapped_at_el0()) { num_available_asids /= 2; + if (pinned_asid_map) + set_kpti_asid_bits(pinned_asid_map); + } /* * Expect allocation after rollover to fail if we don't have at least * one more ASID than CPUs. ASID #0 is reserved for init_mm. @@ -305,6 +387,13 @@ static int asids_update_limit(void) WARN_ON(num_available_asids - 1 <= num_possible_cpus()); pr_info("ASID allocator initialised with %lu entries\n", num_available_asids); + + /* + * There must always be an ASID available after rollover. Ensure that, + * even if all CPUs have a reserved ASID and the maximum number of ASIDs + * are pinned, there still is at least one empty slot in the ASID map. + */ + max_pinned_asids = num_available_asids - num_possible_cpus() - 2; return 0; } arch_initcall(asids_update_limit); @@ -319,13 +408,17 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), + sizeof(*pinned_asid_map), GFP_KERNEL); + nr_pinned_asids = 0; + /* * We cannot call set_reserved_asid_bits() here because CPU * caps are not finalized yet, so it is safer to assume KPTI * and reserve kernel ASID's from beginning. */ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) - set_kpti_asid_bits(); + set_kpti_asid_bits(asid_map); return 0; } early_initcall(asids_init); -- cgit From 6f3c4afae9805bda1682e0ad17b8ce56d85c9a83 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Sep 2020 12:18:46 +0200 Subject: arm64: cpufeature: Export symbol read_sanitised_ftr_reg() The SMMUv3 driver would like to read the MMFR0 PARANGE field in order to share CPU page tables with devices. Allow the driver to be built as module by exporting the read_sanitized_ftr_reg() cpufeature symbol. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Jonathan Cameron Acked-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200918101852.582559-7-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6424584be01e..07f10ad8855c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1111,6 +1111,7 @@ u64 read_sanitised_ftr_reg(u32 id) return 0; return regp->sys_val; } +EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ case r: return read_sysreg_s(r) -- cgit From 6a1bdb173f9967b2329aab0f25bcba963f54e06b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 30 Sep 2020 13:20:40 +0100 Subject: arm64: mm: Make flush_tlb_fix_spurious_fault() a no-op Our use of broadcast TLB maintenance means that spurious page-faults that have been handled already by another CPU do not require additional TLB maintenance. Make flush_tlb_fix_spurious_fault() a no-op and rely on the existing TLB invalidation instead. Add an explicit flush_tlb_page() when making a page dirty, as the TLB is permitted to cache the old read-only entry. Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20200728092220.GA21800@willie-the-truck Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 8 ++++++++ arch/arm64/mm/fault.c | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bc68da9f5706..02ad3105c14c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -50,6 +50,14 @@ extern void __pgd_error(const char *file, int line, unsigned long val); __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +/* + * Outside of a few very special situations (e.g. hibernation), we always + * use broadcast TLB invalidation instructions, therefore a spurious page + * fault on one CPU which has been handled concurrently by another CPU + * does not need to perform additional invalidation. + */ +#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f07333e86c2f..a696a7921da4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -218,7 +218,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - flush_tlb_fix_spurious_fault(vma, address); + /* Invalidate a stale read-only entry */ + if (dirty) + flush_tlb_page(vma, address); return 1; } -- cgit From 80d6b466679c3dced3b359a6379c6d913de39afd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 Oct 2020 09:48:21 +0100 Subject: arm64: dbm: Invalidate local TLB when setting TCR_EL1.HD TCR_EL1.HD is permitted to be cached in a TLB, so invalidate the local TLB after setting the bit when detected support for the feature. Although this isn't strictly necessary, since we can happily operate with the bit effectively clear, the current code uses an ISB in a half-hearted attempt to make the change effective, so let's just fix that up. Link: https://lore.kernel.org/r/20201001110405.18617-1-will@kernel.org Reviewed-by: Catalin Marinas Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6424584be01e..a474a4f39c95 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1443,6 +1443,7 @@ static inline void __cpu_enable_hw_dbm(void) write_sysreg(tcr, tcr_el1); isb(); + local_flush_tlb_all(); } static bool cpu_has_broken_dbm(void) -- cgit From d9ef632fab9ba81b708763bcbcfdbea9a55c95d2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 Oct 2020 11:54:54 +0100 Subject: perf: arm-cmn: Fix unsigned comparison to less than zero Ensure that the 'irq' field of 'struct arm_cmn_dtc' is a signed int so that it can be compared '< 0'. Link: https://lore.kernel.org/r/20200929170835.GA15956@embeddedor Addresses-Coverity-ID: 1497488 ("Unsigned compared against 0") Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Reported-by: Gustavo A. R. Silva Reviewed-by: Gustavo A. R. Silva Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index e824b5b83ea2..cd4da4c5dac0 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -217,7 +217,7 @@ struct arm_cmn_node { struct arm_cmn_dtc { void __iomem *base; - unsigned int irq; + int irq; int irq_friend; bool cc_active; -- cgit From 887e2cff0f8dc4dac4c35e631b711325c6dd65b8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 Oct 2020 11:57:01 +0100 Subject: perf: arm-cmn: Fix conversion specifiers for node type The node type field is an enum type, so print it as a 32-bit quantity rather than as an unsigned short. Link: https://lore.kernel.org/r/202009302350.QIzfkx62-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index cd4da4c5dac0..a76ff594f3ca 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1324,7 +1324,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c else level = 2; - dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#x\n", + dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6x id:%-4hd off:%#x\n", (level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ', node->type, node->logid, offset); } @@ -1430,7 +1430,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) break; /* Something has gone horribly wrong */ default: - dev_err(cmn->dev, "invalid device node type: 0x%hx\n", dn->type); + dev_err(cmn->dev, "invalid device node type: 0x%x\n", dn->type); return -ENODEV; } } -- cgit