summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/arc/Makefile2
-rw-r--r--arch/arc/include/asm/cmpxchg.h4
-rw-r--r--arch/arc/include/asm/page.h12
-rw-r--r--arch/arc/include/asm/pgtable.h12
-rw-r--r--arch/arc/include/uapi/asm/page.h1
-rw-r--r--arch/arc/include/uapi/asm/sigcontext.h1
-rw-r--r--arch/arc/kernel/entry.S4
-rw-r--r--arch/arc/kernel/kgdb.c1
-rw-r--r--arch/arc/kernel/process.c8
-rw-r--r--arch/arc/kernel/signal.c47
-rw-r--r--arch/arc/kernel/vmlinux.lds.S2
-rw-r--r--arch/arc/mm/init.c11
-rw-r--r--arch/arc/mm/ioremap.c5
-rw-r--r--arch/arc/mm/tlb.c2
-rw-r--r--arch/arm/boot/dts/imx6dl-yapp4-common.dtsi6
-rw-r--r--arch/arm/boot/dts/imx6q-dhcom-som.dtsi12
-rw-r--r--arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi2
-rw-r--r--arch/arm/boot/dts/imx7d-meerkat96.dts2
-rw-r--r--arch/arm/boot/dts/imx7d-pico.dtsi2
-rw-r--r--arch/arm/include/asm/cpuidle.h5
-rw-r--r--arch/arm/mach-imx/pm-imx27.c1
-rw-r--r--arch/arm/mach-npcm/Kconfig1
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c14
-rw-r--r--arch/arm/mach-omap1/board-h2.c4
-rw-r--r--arch/arm/mach-omap1/pm.c10
-rw-r--r--arch/arm/mach-omap2/board-n8x0.c2
-rw-r--r--arch/arm/mach-pxa/pxa_cplds_irqs.c7
-rw-r--r--arch/arm/tools/syscall.tbl2
-rw-r--r--arch/arm/xen/mm.c20
-rw-r--r--arch/arm64/Kbuild3
-rw-r--r--arch/arm64/Kconfig.platforms1
-rw-r--r--arch/arm64/Makefile3
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts3
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts5
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts10
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi23
-rw-r--r--arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774a1.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774b1.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774c0.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r8a774e1.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77950.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77951.dtsi12
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77960.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77961.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77965.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77970.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77980.dtsi8
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts2
-rw-r--r--arch/arm64/boot/dts/renesas/r8a77990.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-common.dtsi3
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64-main.dtsi11
-rw-r--r--arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi3
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-main.dtsi10
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi4
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi13
-rw-r--r--arch/arm64/boot/dts/ti/k3-am654-base-board.dts31
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-main.dtsi8
-rw-r--r--arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi7
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-main.dtsi10
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi11
-rw-r--r--arch/arm64/include/asm/Kbuild2
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/cpucaps.h74
-rw-r--r--arch/arm64/include/asm/kvm_asm.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/unistd32.h3
-rw-r--r--arch/arm64/kvm/arm.c20
-rw-r--r--arch/arm64/kvm/hyp/exception.c18
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/adjust_pc.h18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c3
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/mmu.c12
-rw-r--r--arch/arm64/kvm/reset.c28
-rw-r--r--arch/arm64/kvm/sys_regs.c42
-rw-r--r--arch/arm64/mm/flush.c4
-rw-r--r--arch/arm64/mm/init.c3
-rw-r--r--arch/arm64/mm/mmu.c3
-rw-r--r--arch/arm64/mm/proc.S12
-rw-r--r--arch/arm64/tools/Makefile22
-rw-r--r--arch/arm64/tools/cpucaps65
-rwxr-xr-xarch/arm64/tools/gen-cpucaps.awk40
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/m68k/kernel/signal.c3
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/mips/alchemy/board-xxs1500.c1
-rw-r--r--arch/mips/include/asm/mips-boards/launch.h5
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/mips/lib/mips-atomic.c12
-rw-r--r--arch/mips/mm/cache.c30
-rw-r--r--arch/mips/ralink/of.c2
-rw-r--r--arch/openrisc/include/asm/barrier.h9
-rw-r--r--arch/openrisc/kernel/setup.c2
-rw-r--r--arch/openrisc/mm/init.c6
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010si-post.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041si-post.dtsi16
-rw-r--r--arch/powerpc/include/asm/hvcall.h3
-rw-r--r--arch/powerpc/include/asm/interrupt.h9
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/paravirt.h22
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h6
-rw-r--r--arch/powerpc/include/asm/pte-walk.h29
-rw-r--r--arch/powerpc/include/asm/ptrace.h45
-rw-r--r--arch/powerpc/include/asm/syscall.h42
-rw-r--r--arch/powerpc/include/asm/uaccess.h2
-rw-r--r--arch/powerpc/kernel/eeh.c23
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S38
-rw-r--r--arch/powerpc/kernel/interrupt.c4
-rw-r--r--arch/powerpc/kernel/io-workarounds.c16
-rw-r--r--arch/powerpc/kernel/iommu.c11
-rw-r--r--arch/powerpc/kernel/kprobes.c4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c7
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/signal.h4
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S7
-rw-r--r--arch/powerpc/lib/feature-fixups.c114
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S10
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c29
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/sh/kernel/traps.c1
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/x86/Makefile13
-rw-r--r--arch/x86/boot/compressed/Makefile7
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/compressed/sev.c (renamed from arch/x86/boot/compressed/sev-es.c)4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/events/core.c6
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/lbr.c26
-rw-r--r--arch/x86/events/intel/uncore_snbep.c9
-rw-r--r--arch/x86/events/perf_event.h6
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/disabled-features.h7
-rw-r--r--arch/x86/include/asm/fpu/api.h6
-rw-r--r--arch/x86/include/asm/fpu/internal.h7
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/asm/msr-index.h6
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/sev-common.h62
-rw-r--r--arch/x86/include/asm/sev.h (renamed from arch/x86/include/asm/sev-es.h)30
-rw-r--r--arch/x86/include/asm/thermal.h4
-rw-r--r--arch/x86/include/asm/vdso/clocksource.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h2
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/alternative.c64
-rw-r--r--arch/x86/kernel/apic/apic.c1
-rw-r--r--arch/x86/kernel/apic/vector.c20
-rw-r--r--arch/x86/kernel/cpu/amd.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/fpu/xstate.c57
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/kvm.c129
-rw-r--r--arch/x86/kernel/kvmclock.c26
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/setup.c44
-rw-r--r--arch/x86/kernel/sev-shared.c (renamed from arch/x86/kernel/sev-es-shared.c)21
-rw-r--r--arch/x86/kernel/sev.c (renamed from arch/x86/kernel/sev-es.c)140
-rw-r--r--arch/x86/kernel/signal_compat.c9
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kvm/cpuid.c21
-rw-r--r--arch/x86/kvm/emulate.c7
-rw-r--r--arch/x86/kvm/hyperv.c8
-rw-r--r--arch/x86/kvm/kvm_emulate.h4
-rw-r--r--arch/x86/kvm/lapic.c38
-rw-r--r--arch/x86/kvm/mmu/mmu.c46
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h14
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c23
-rw-r--r--arch/x86/kvm/svm/avic.c12
-rw-r--r--arch/x86/kvm/svm/nested.c23
-rw-r--r--arch/x86/kvm/svm/sev.c58
-rw-r--r--arch/x86/kvm/svm/svm.c70
-rw-r--r--arch/x86/kvm/svm/svm.h41
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/vmx/capabilities.h6
-rw-r--r--arch/x86/kvm/vmx/nested.c29
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c14
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h1
-rw-r--r--arch/x86/kvm/vmx/vmx.c227
-rw-r--r--arch/x86/kvm/vmx/vmx.h12
-rw-r--r--arch/x86/kvm/x86.c207
-rw-r--r--arch/x86/mm/extable.c2
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c17
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/fixup.c44
-rw-r--r--arch/x86/platform/efi/efi_64.c2
-rw-r--r--arch/x86/platform/efi/quirks.c12
-rw-r--r--arch/x86/realmode/init.c16
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S4
-rw-r--r--arch/x86/xen/enlighten_pv.c8
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl2
214 files changed, 1989 insertions, 1207 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 5622578742fd..3000a2e8ee21 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -482,7 +482,7 @@
550 common process_madvise sys_process_madvise
551 common epoll_pwait2 sys_epoll_pwait2
552 common mount_setattr sys_mount_setattr
-553 common quotactl_path sys_quotactl_path
+# 553 reserved for quotactl_path
554 common landlock_create_ruleset sys_landlock_create_ruleset
555 common landlock_add_rule sys_landlock_add_rule
556 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 4392c9c189c4..e47adc97a89b 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -31,7 +31,7 @@ endif
ifdef CONFIG_ARC_CURR_IN_REG
-# For a global register defintion, make sure it gets passed to every file
+# For a global register definition, make sure it gets passed to every file
# We had a customer reported bug where some code built in kernel was NOT using
# any kernel headers, and missing the r25 global register
# Can't do unconditionally because of recursive include issues
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 9b87e162e539..dfeffa25499b 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
*
* Technically the lock is also needed for UP (boils down to irq save/restore)
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
+ * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
* Other way around, xchg is one instruction anyways, so can't be interrupted
* as such
*/
@@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
/*
* "atomic" variant of xchg()
* REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following defintion
+ * Since xchg() doesn't always do that, it would seem that following definition
* is incorrect. But here's the rationale:
* SMP : Even xchg() takes the atomic_ops_lock, so OK.
* LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index ad9b7fe4dba3..4a9d33372fe2 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -7,6 +7,18 @@
#include <uapi/asm/page.h>
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
+#define PAGE_MASK_PHYS (0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#define PAGE_MASK_PHYS PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
#ifndef __ASSEMBLY__
#define clear_page(paddr) memset((paddr), 0, PAGE_SIZE)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 163641726a2b..5878846f00cf 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -107,8 +107,8 @@
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
-
+#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+ _PAGE_SPECIAL)
/* More Abbrevaited helpers */
#define PAGE_U_NONE __pgprot(___DEF)
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
@@ -132,13 +132,7 @@
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 40
-#else
-#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 32
-#endif
+#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
/**************************************************************************
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a97e2718a21..2a4ad619abfb 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -33,5 +33,4 @@
#define PAGE_MASK (~(PAGE_SIZE-1))
-
#endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h
index 95f8a4380e11..7a5449dfcb29 100644
--- a/arch/arc/include/uapi/asm/sigcontext.h
+++ b/arch/arc/include/uapi/asm/sigcontext.h
@@ -18,6 +18,7 @@
*/
struct sigcontext {
struct user_regs_struct regs;
+ struct user_regs_arcv2 v2abi;
};
#endif /* _ASM_ARC_SIGCONTEXT_H */
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1743506081da..2cb8dfe866b6 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -177,7 +177,7 @@ tracesys:
; Do the Sys Call as we normally would.
; Validate the Sys Call number
- cmp r8, NR_syscalls
+ cmp r8, NR_syscalls - 1
mov.hi r0, -ENOSYS
bhi tracesys_exit
@@ -255,7 +255,7 @@ ENTRY(EV_Trap)
;============ Normal syscall case
; syscall num shd not exceed the total system calls avail
- cmp r8, NR_syscalls
+ cmp r8, NR_syscalls - 1
mov.hi r0, -ENOSYS
bhi .Lret_from_system_call
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index ecfbc42d3a40..345a0000554c 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
ptr = &remcomInBuffer[1];
if (kgdb_hex2long(&ptr, &addr))
regs->ret = addr;
+ fallthrough;
case 'D':
case 'k':
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index d838d0d57696..3793876f42d9 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
int ret;
/*
- * This is only for old cores lacking LLOCK/SCOND, which by defintion
+ * This is only for old cores lacking LLOCK/SCOND, which by definition
* can't possibly be SMP. Thus doesn't need to be SMP safe.
* And this also helps reduce the overhead for serializing in
* the UP case
*/
WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
- /* Z indicates to userspace if operation succeded */
+ /* Z indicates to userspace if operation succeeded */
regs->status32 &= ~STATUS_Z_MASK;
ret = access_ok(uaddr, sizeof(*uaddr));
@@ -107,7 +107,7 @@ fail:
void arch_cpu_idle(void)
{
- /* Re-enable interrupts <= default irq priority before commiting SLEEP */
+ /* Re-enable interrupts <= default irq priority before committing SLEEP */
const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
__asm__ __volatile__(
@@ -120,7 +120,7 @@ void arch_cpu_idle(void)
void arch_cpu_idle(void)
{
- /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+ /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */
__asm__ __volatile__("sleep 0x3 \n");
}
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index fdbe06c98895..cb2f88502baf 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -61,6 +61,41 @@ struct rt_sigframe {
unsigned int sigret_magic;
};
+static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+ int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+ struct user_regs_arcv2 v2abi;
+
+ v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+ v2abi.r58 = regs->r58;
+ v2abi.r59 = regs->r59;
+#else
+ v2abi.r58 = v2abi.r59 = 0;
+#endif
+ err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+#endif
+ return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+ int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+ struct user_regs_arcv2 v2abi;
+
+ err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+ regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+ regs->r58 = v2abi.r58;
+ regs->r59 = v2abi.r59;
+#endif
+#endif
+ return err;
+}
+
static int
stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
sigset_t *set)
@@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+ if (is_isa_arcv2())
+ err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
return err ? -EFAULT : 0;
@@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
err |= __copy_from_user(&uregs.scratch,
&(sf->uc.uc_mcontext.regs.scratch),
sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+ if (is_isa_arcv2())
+ err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
if (err)
return -EFAULT;
@@ -259,7 +302,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
regs->r2 = (unsigned long)&sf->uc;
/*
- * small optim to avoid unconditonally calling do_sigaltstack
+ * small optim to avoid unconditionally calling do_sigaltstack
* in sigreturn path, now that we only have rt_sigreturn
*/
magic = MAGIC_SIGALTSTK;
@@ -391,7 +434,7 @@ void do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs)
{
/*
- * ASM glue gaurantees that this is only called when returning to
+ * ASM glue guarantees that this is only called when returning to
* user mode
*/
if (test_thread_flag(TIF_NOTIFY_RESUME))
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 33ce59d91461..e2146a8da195 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -57,7 +57,6 @@ SECTIONS
.init.ramfs : { INIT_RAM_FS }
. = ALIGN(PAGE_SIZE);
- _stext = .;
HEAD_TEXT_SECTION
INIT_TEXT_SECTION(L1_CACHE_BYTES)
@@ -83,6 +82,7 @@ SECTIONS
.text : {
_text = .;
+ _stext = .;
TEXT_TEXT
SCHED_TEXT
CPUIDLE_TEXT
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 33832e36bdb7..e2ed355438c9 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -157,7 +157,16 @@ void __init setup_arch_memory(void)
min_high_pfn = PFN_DOWN(high_mem_start);
max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
- max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
+ /*
+ * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
+ * For HIGHMEM without PAE max_high_pfn should be less than
+ * min_low_pfn to guarantee that these two regions don't overlap.
+ * For PAE case highmem is greater than lowmem, so it is natural
+ * to use max_high_pfn.
+ *
+ * In both cases, holes should be handled by pfn_valid().
+ */
+ max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index fac4adc90204..95c649fbc95a 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap);
void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
unsigned long flags)
{
+ unsigned int off;
unsigned long vaddr;
struct vm_struct *area;
- phys_addr_t off, end;
+ phys_addr_t end;
pgprot_t prot = __pgprot(flags);
/* Don't allow wraparound, zero size */
@@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
/* Mappings have to be page-aligned */
off = paddr & ~PAGE_MASK;
- paddr &= PAGE_MASK;
+ paddr &= PAGE_MASK_PHYS;
size = PAGE_ALIGN(end + 1) - paddr;
/*
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9bb3c24f3677..9c7c68247289 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
pte_t *ptep)
{
unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
- phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
+ phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
struct page *page = pfn_to_page(pte_pfn(*ptep));
create_tlb(vma, vaddr, ptep);
diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
index 7d2c72562c73..9148a01ed6d9 100644
--- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
+++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
@@ -105,9 +105,13 @@
phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
phy-reset-duration = <20>;
phy-supply = <&sw2_reg>;
- phy-handle = <&ethphy0>;
status = "okay";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+
mdio {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
index 236fc205c389..d0768ae429fa 100644
--- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
@@ -406,6 +406,18 @@
vin-supply = <&sw1_reg>;
};
+&reg_pu {
+ vin-supply = <&sw1_reg>;
+};
+
+&reg_vdd1p1 {
+ vin-supply = <&sw2_reg>;
+};
+
+&reg_vdd2p5 {
+ vin-supply = <&sw2_reg>;
+};
+
&uart1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart1>;
diff --git a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi
index 828cf3e39784..c4e146f3341b 100644
--- a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi
@@ -126,7 +126,7 @@
compatible = "nxp,pca8574";
reg = <0x3a>;
gpio-controller;
- #gpio-cells = <1>;
+ #gpio-cells = <2>;
};
};
diff --git a/arch/arm/boot/dts/imx7d-meerkat96.dts b/arch/arm/boot/dts/imx7d-meerkat96.dts
index 5339210b63d0..dd8003bd1fc0 100644
--- a/arch/arm/boot/dts/imx7d-meerkat96.dts
+++ b/arch/arm/boot/dts/imx7d-meerkat96.dts
@@ -193,7 +193,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usdhc1>;
keep-power-in-suspend;
- tuning-step = <2>;
+ fsl,tuning-step = <2>;
vmmc-supply = <&reg_3p3v>;
no-1-8-v;
broken-cd;
diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi
index e57da0d32b98..e519897fae08 100644
--- a/arch/arm/boot/dts/imx7d-pico.dtsi
+++ b/arch/arm/boot/dts/imx7d-pico.dtsi
@@ -351,7 +351,7 @@
pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
bus-width = <4>;
- tuning-step = <2>;
+ fsl,tuning-step = <2>;
vmmc-supply = <&reg_3p3v>;
wakeup-source;
no-1-8-v;
diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 0d67ed682e07..bc4ffa7ca04c 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -7,9 +7,11 @@
#ifdef CONFIG_CPU_IDLE
extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
+#define __cpuidle_method_section __used __section("__cpuidle_method_of_table")
#else
static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index) { return -ENODEV; }
+#define __cpuidle_method_section __maybe_unused /* drop silently */
#endif
/* Common ARM WFI state */
@@ -42,8 +44,7 @@ struct of_cpuidle_method {
#define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \
static const struct of_cpuidle_method __cpuidle_method_of_table_##name \
- __used __section("__cpuidle_method_of_table") \
- = { .method = _method, .ops = _ops }
+ __cpuidle_method_section = { .method = _method, .ops = _ops }
extern int arm_cpuidle_suspend(int index);
diff --git a/arch/arm/mach-imx/pm-imx27.c b/arch/arm/mach-imx/pm-imx27.c
index 020e6deb67c8..237e8aa9fe83 100644
--- a/arch/arm/mach-imx/pm-imx27.c
+++ b/arch/arm/mach-imx/pm-imx27.c
@@ -12,6 +12,7 @@
#include <linux/suspend.h>
#include <linux/io.h>
+#include "common.h"
#include "hardware.h"
static int mx27_suspend_enter(suspend_state_t state)
diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig
index 658c8efb4ca1..a71cf1d189ae 100644
--- a/arch/arm/mach-npcm/Kconfig
+++ b/arch/arm/mach-npcm/Kconfig
@@ -10,6 +10,7 @@ config ARCH_WPCM450
bool "Support for WPCM450 BMC (Hermon)"
depends on ARCH_MULTI_V5
select CPU_ARM926T
+ select WPCM450_AIC
select NPCM7XX_TIMER
help
General support for WPCM450 BMC (Hermon).
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 2ee527c00284..1026a816dcc0 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -458,20 +458,6 @@ static struct gpiod_lookup_table leds_gpio_table = {
#ifdef CONFIG_LEDS_TRIGGERS
DEFINE_LED_TRIGGER(ams_delta_camera_led_trigger);
-
-static int ams_delta_camera_power(struct device *dev, int power)
-{
- /*
- * turn on camera LED
- */
- if (power)
- led_trigger_event(ams_delta_camera_led_trigger, LED_FULL);
- else
- led_trigger_event(ams_delta_camera_led_trigger, LED_OFF);
- return 0;
-}
-#else
-#define ams_delta_camera_power NULL
#endif
static struct platform_device ams_delta_audio_device = {
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index c40cf5ef8607..977b0b744c22 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -320,7 +320,7 @@ static int tps_setup(struct i2c_client *client, void *context)
{
if (!IS_BUILTIN(CONFIG_TPS65010))
return -ENOSYS;
-
+
tps65010_config_vregs1(TPS_LDO2_ENABLE | TPS_VLDO2_3_0V |
TPS_LDO1_ENABLE | TPS_VLDO1_3_0V);
@@ -394,6 +394,8 @@ static void __init h2_init(void)
BUG_ON(gpio_request(H2_NAND_RB_GPIO_PIN, "NAND ready") < 0);
gpio_direction_input(H2_NAND_RB_GPIO_PIN);
+ gpiod_add_lookup_table(&isp1301_gpiod_table);
+
omap_cfg_reg(L3_1610_FLASH_CS2B_OE);
omap_cfg_reg(M8_1610_FLASH_CS2B_WE);
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 2c1e2b32b9b3..a745d64d4699 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -655,9 +655,13 @@ static int __init omap_pm_init(void)
irq = INT_7XX_WAKE_UP_REQ;
else if (cpu_is_omap16xx())
irq = INT_1610_WAKE_UP_REQ;
- if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup",
- NULL))
- pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);
+ else
+ irq = -1;
+
+ if (irq >= 0) {
+ if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", NULL))
+ pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);
+ }
/* Program new power ramp-up time
* (0 for most boards since we don't lower voltage when in deep sleep)
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 418a61ecb827..5e86145db0e2 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -322,6 +322,7 @@ static int n8x0_mmc_get_cover_state(struct device *dev, int slot)
static void n8x0_mmc_callback(void *data, u8 card_mask)
{
+#ifdef CONFIG_MMC_OMAP
int bit, *openp, index;
if (board_is_n800()) {
@@ -339,7 +340,6 @@ static void n8x0_mmc_callback(void *data, u8 card_mask)
else
*openp = 0;
-#ifdef CONFIG_MMC_OMAP
omap_mmc_notify_cover_event(mmc_device, index, *openp);
#else
pr_warn("MMC: notify cover event not available\n");
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
index ec0d9b094744..bddfc7cd5d40 100644
--- a/arch/arm/mach-pxa/pxa_cplds_irqs.c
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev)
return fpga->irq;
base_irq = platform_get_irq(pdev, 1);
- if (base_irq < 0)
+ if (base_irq < 0) {
base_irq = 0;
+ } else {
+ ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0);
+ if (ret < 0)
+ return ret;
+ }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
fpga->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index c7679d7db98b..28e03b5fec00 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -456,7 +456,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index f8f07469d259..a7e54a087b80 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -135,24 +135,18 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
return;
}
-int xen_swiotlb_detect(void)
-{
- if (!xen_domain())
- return 0;
- if (xen_feature(XENFEAT_direct_mapped))
- return 1;
- /* legacy case */
- if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
- return 1;
- return 0;
-}
-
static int __init xen_mm_init(void)
{
struct gnttab_cache_flush cflush;
+ int rc;
+
if (!xen_swiotlb_detect())
return 0;
- xen_swiotlb_init();
+
+ rc = xen_swiotlb_init();
+ /* we can work with the default swiotlb */
+ if (rc < 0 && rc != -EEXIST)
+ return rc;
cflush.op = 0;
cflush.a.dev_bus_addr = 0;
diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild
index d6465823b281..7b393cfec071 100644
--- a/arch/arm64/Kbuild
+++ b/arch/arm64/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += kernel/ mm/
-obj-$(CONFIG_NET) += net/
+obj-y += kernel/ mm/ net/
obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_XEN) += xen/
obj-$(CONFIG_CRYPTO) += crypto/
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 6409b47b73e4..7336c1fd0dda 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -165,6 +165,7 @@ config ARCH_MEDIATEK
config ARCH_MESON
bool "Amlogic Platforms"
+ select COMMON_CLK
select MESON_IRQ_GPIO
help
This enables support for the arm64 based Amlogic SoCs
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7ef44478560d..b52481f0605d 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -175,6 +175,9 @@ vdso_install:
$(if $(CONFIG_COMPAT_VDSO), \
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@)
+archprepare:
+ $(Q)$(MAKE) $(build)=arch/arm64/tools kapi
+
# We use MRPROPER_FILES and CLEAN_FILES now
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
index 6c309b97587d..e8d31279b7a3 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
@@ -46,7 +46,8 @@
eee-broken-100tx;
qca,clk-out-frequency = <125000000>;
qca,clk-out-strength = <AR803X_STRENGTH_FULL>;
- vddio-supply = <&vddh>;
+ qca,keep-pll-enabled;
+ vddio-supply = <&vddio>;
vddio: vddio-regulator {
regulator-name = "VDDIO";
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
index df212ed5bb94..e65d1c477e2c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
@@ -31,11 +31,10 @@
reg = <0x4>;
eee-broken-1000t;
eee-broken-100tx;
-
qca,clk-out-frequency = <125000000>;
qca,clk-out-strength = <AR803X_STRENGTH_FULL>;
-
- vddio-supply = <&vddh>;
+ qca,keep-pll-enabled;
+ vddio-supply = <&vddio>;
vddio: vddio-regulator {
regulator-name = "VDDIO";
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index eca06a0c3cf8..a30249ebffa8 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -197,8 +197,8 @@
ddr: memory-controller@1080000 {
compatible = "fsl,qoriq-memory-controller";
reg = <0x0 0x1080000 0x0 0x1000>;
- interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
- big-endian;
+ interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+ little-endian;
};
dcfg: syscon@1e00000 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts
index 631e01c1b9fd..be1e7d6f0ecb 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts
@@ -88,11 +88,11 @@
pinctrl-0 = <&pinctrl_codec2>;
reg = <0x18>;
#sound-dai-cells = <0>;
- HPVDD-supply = <&reg_3p3v>;
- SPRVDD-supply = <&reg_3p3v>;
- SPLVDD-supply = <&reg_3p3v>;
- AVDD-supply = <&reg_3p3v>;
- IOVDD-supply = <&reg_3p3v>;
+ HPVDD-supply = <&reg_gen_3p3>;
+ SPRVDD-supply = <&reg_gen_3p3>;
+ SPLVDD-supply = <&reg_gen_3p3>;
+ AVDD-supply = <&reg_gen_3p3>;
+ IOVDD-supply = <&reg_gen_3p3>;
DVDD-supply = <&vgen4_reg>;
reset-gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
index 4dc8383478ee..a08a568c31d9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
@@ -45,8 +45,8 @@
reg_12p0_main: regulator-12p0-main {
compatible = "regulator-fixed";
regulator-name = "12V_MAIN";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
+ regulator-min-microvolt = <12000000>;
+ regulator-max-microvolt = <12000000>;
regulator-always-on;
};
@@ -77,15 +77,6 @@
regulator-always-on;
};
- reg_3p3v: regulator-3p3v {
- compatible = "regulator-fixed";
- vin-supply = <&reg_3p3_main>;
- regulator-name = "GEN_3V3";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- regulator-always-on;
- };
-
reg_usdhc2_vmmc: regulator-vsd-3v3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_reg_usdhc2>;
@@ -415,11 +406,11 @@
pinctrl-0 = <&pinctrl_codec1>;
reg = <0x18>;
#sound-dai-cells = <0>;
- HPVDD-supply = <&reg_3p3v>;
- SPRVDD-supply = <&reg_3p3v>;
- SPLVDD-supply = <&reg_3p3v>;
- AVDD-supply = <&reg_3p3v>;
- IOVDD-supply = <&reg_3p3v>;
+ HPVDD-supply = <&reg_gen_3p3>;
+ SPRVDD-supply = <&reg_gen_3p3>;
+ SPLVDD-supply = <&reg_gen_3p3>;
+ AVDD-supply = <&reg_gen_3p3>;
+ IOVDD-supply = <&reg_gen_3p3>;
DVDD-supply = <&vgen4_reg>;
reset-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>;
};
diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
index c62ddb9b2ba5..3771144a2ce4 100644
--- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
+++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
@@ -14,7 +14,6 @@
ports {
port@0 {
- reg = <0>;
csi20_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
@@ -29,7 +28,6 @@
ports {
port@0 {
- reg = <0>;
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index d64fb8b1b86c..46f8dbf68904 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -2573,6 +2573,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2628,6 +2632,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
index 5b05474dc272..d16a4be5ef77 100644
--- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
@@ -2419,6 +2419,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2474,6 +2478,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
index e7b4a929bb17..2e3d1981cac4 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
+++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
@@ -33,7 +33,7 @@
status = "okay";
ports {
- port {
+ port@0 {
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index 20fa3caa050e..1aef34447abd 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -1823,6 +1823,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
index 8eb006cbd9af..1f51237ab0a6 100644
--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
@@ -2709,6 +2709,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2764,6 +2768,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
index 25b87da32eeb..b643d3079db1 100644
--- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
@@ -192,6 +192,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
index 5c39152e4570..85d66d15465a 100644
--- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
@@ -3097,6 +3097,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -3152,6 +3156,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -3191,6 +3199,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
index 25d947a81b29..12476e354d74 100644
--- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
@@ -2761,6 +2761,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2816,6 +2820,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
index ab081f14af9a..d9804768425a 100644
--- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
@@ -2499,6 +2499,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2554,6 +2558,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index 657b20d3533b..dcb9df861d74 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -2575,6 +2575,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -2630,6 +2634,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
index 5a5d5649332a..e8f6352c3665 100644
--- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
@@ -1106,6 +1106,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
index 1ffa4a995a7a..7b51d464de0e 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
@@ -1439,6 +1439,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
@@ -1478,6 +1482,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
index 295d34f1d216..4715e4a4abe0 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
+++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
@@ -298,8 +298,6 @@
ports {
port@0 {
- reg = <0>;
-
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index 5010f23fafcc..0eaea58f4210 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -1970,6 +1970,10 @@
#address-cells = <1>;
#size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ };
+
port@1 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index e18747df219f..453ffcef24fa 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -349,7 +349,6 @@
ports {
port@0 {
- reg = <0>;
csi20_in: endpoint {
clock-lanes = <0>;
data-lanes = <1>;
@@ -364,8 +363,6 @@
ports {
port@0 {
- reg = <0>;
-
csi40_in: endpoint {
clock-lanes = <0>;
data-lanes = <1 2 3 4>;
diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
index b2bcbf23eefd..ca59d1f711f8 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
@@ -42,12 +42,12 @@
};
};
- dmss: dmss {
+ dmss: bus@48000000 {
compatible = "simple-mfd";
#address-cells = <2>;
#size-cells = <2>;
dma-ranges;
- ranges;
+ ranges = <0x00 0x48000000 0x00 0x48000000 0x00 0x06400000>;
ti,sci-dev-id = <25>;
@@ -134,7 +134,7 @@
};
};
- dmsc: dmsc@44043000 {
+ dmsc: system-controller@44043000 {
compatible = "ti,k2g-sci";
ti,host-id = <12>;
mbox-names = "rx", "tx";
@@ -148,7 +148,7 @@
#power-domain-cells = <2>;
};
- k3_clks: clocks {
+ k3_clks: clock-controller {
compatible = "ti,k2g-sci-clk";
#clock-cells = <2>;
};
@@ -373,8 +373,9 @@
clocks = <&k3_clks 145 0>;
};
- main_gpio_intr: interrupt-controller0 {
+ main_gpio_intr: interrupt-controller@a00000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x00a00000 0x00 0x800>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
index 99e94dee1bd4..deb19ae5e168 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
@@ -74,8 +74,9 @@
clocks = <&k3_clks 148 0>;
};
- mcu_gpio_intr: interrupt-controller1 {
+ mcu_gpio_intr: interrupt-controller@4210000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x04210000 0x00 0x200>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index cb340d1b401f..6cd3131eb9ff 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -433,8 +433,9 @@
#phy-cells = <0>;
};
- intr_main_gpio: interrupt-controller0 {
+ intr_main_gpio: interrupt-controller@a00000 {
compatible = "ti,sci-intr";
+ reg = <0x0 0x00a00000 0x0 0x400>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
@@ -444,18 +445,19 @@
ti,interrupt-ranges = <0 392 32>;
};
- main-navss {
+ main_navss: bus@30800000 {
compatible = "simple-mfd";
#address-cells = <2>;
#size-cells = <2>;
- ranges;
+ ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0xbc00000>;
dma-coherent;
dma-ranges;
ti,sci-dev-id = <118>;
- intr_main_navss: interrupt-controller1 {
+ intr_main_navss: interrupt-controller@310e0000 {
compatible = "ti,sci-intr";
+ reg = <0x0 0x310e0000 0x0 0x2000>;
ti,intr-trigger-type = <4>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
index 0388c02c2203..f5b8ef2f5f77 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
@@ -116,11 +116,11 @@
};
};
- mcu-navss {
+ mcu_navss: bus@28380000 {
compatible = "simple-mfd";
#address-cells = <2>;
#size-cells = <2>;
- ranges;
+ ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>;
dma-coherent;
dma-ranges;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index ed42f13e7663..7cb864b4d74a 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -6,24 +6,24 @@
*/
&cbass_wakeup {
- dmsc: dmsc {
+ dmsc: system-controller@44083000 {
compatible = "ti,am654-sci";
ti,host-id = <12>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
mbox-names = "rx", "tx";
mboxes= <&secure_proxy_main 11>,
<&secure_proxy_main 13>;
+ reg-names = "debug_messages";
+ reg = <0x44083000 0x1000>;
+
k3_pds: power-controller {
compatible = "ti,sci-pm-domain";
#power-domain-cells = <2>;
};
- k3_clks: clocks {
+ k3_clks: clock-controller {
compatible = "ti,k2g-sci-clk";
#clock-cells = <2>;
};
@@ -69,8 +69,9 @@
power-domains = <&k3_pds 115 TI_SCI_PD_EXCLUSIVE>;
};
- intr_wkup_gpio: interrupt-controller2 {
+ intr_wkup_gpio: interrupt-controller@42200000 {
compatible = "ti,sci-intr";
+ reg = <0x42200000 0x200>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
index 9e87fb313a54..eddb2ffb93ca 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
@@ -85,12 +85,6 @@
gpios = <&wkup_gpio0 27 GPIO_ACTIVE_LOW>;
};
};
-
- clk_ov5640_fixed: clock {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <24000000>;
- };
};
&wkup_pmx0 {
@@ -287,23 +281,6 @@
pinctrl-names = "default";
pinctrl-0 = <&main_i2c1_pins_default>;
clock-frequency = <400000>;
-
- ov5640: camera@3c {
- compatible = "ovti,ov5640";
- reg = <0x3c>;
-
- clocks = <&clk_ov5640_fixed>;
- clock-names = "xclk";
-
- port {
- csi2_cam0: endpoint {
- remote-endpoint = <&csi2_phy0>;
- clock-lanes = <0>;
- data-lanes = <1 2>;
- };
- };
- };
-
};
&main_i2c2 {
@@ -496,14 +473,6 @@
};
};
-&csi2_0 {
- csi2_phy0: endpoint {
- remote-endpoint = <&csi2_cam0>;
- clock-lanes = <0>;
- data-lanes = <1 2>;
- };
-};
-
&mcu_cpsw {
pinctrl-names = "default";
pinctrl-0 = <&mcu_cpsw_pins_default &mcu_mdio_pins_default>;
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
index f86c493a44f1..19fea8adbcff 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
@@ -68,8 +68,9 @@
};
};
- main_gpio_intr: interrupt-controller0 {
+ main_gpio_intr: interrupt-controller@a00000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x00a00000 0x00 0x800>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
@@ -85,9 +86,12 @@
#size-cells = <2>;
ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>;
ti,sci-dev-id = <199>;
+ dma-coherent;
+ dma-ranges;
- main_navss_intr: interrupt-controller1 {
+ main_navss_intr: interrupt-controller@310e0000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x310e0000 0x00 0x4000>;
ti,intr-trigger-type = <4>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
index 5e74e43822c3..5663fe3ea466 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
@@ -6,7 +6,7 @@
*/
&cbass_mcu_wakeup {
- dmsc: dmsc@44083000 {
+ dmsc: system-controller@44083000 {
compatible = "ti,k2g-sci";
ti,host-id = <12>;
@@ -23,7 +23,7 @@
#power-domain-cells = <2>;
};
- k3_clks: clocks {
+ k3_clks: clock-controller {
compatible = "ti,k2g-sci-clk";
#clock-cells = <2>;
};
@@ -96,8 +96,9 @@
clock-names = "fclk";
};
- wkup_gpio_intr: interrupt-controller2 {
+ wkup_gpio_intr: interrupt-controller@42200000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x42200000 0x00 0x400>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index c2aa45a3ac79..3bcafe4c1742 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -76,8 +76,9 @@
};
};
- main_gpio_intr: interrupt-controller0 {
+ main_gpio_intr: interrupt-controller@a00000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x00a00000 0x00 0x800>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
@@ -87,18 +88,19 @@
ti,interrupt-ranges = <8 392 56>;
};
- main-navss {
+ main_navss: bus@30000000 {
compatible = "simple-mfd";
#address-cells = <2>;
#size-cells = <2>;
- ranges;
+ ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>;
dma-coherent;
dma-ranges;
ti,sci-dev-id = <199>;
- main_navss_intr: interrupt-controller1 {
+ main_navss_intr: interrupt-controller@310e0000 {
compatible = "ti,sci-intr";
+ reg = <0x0 0x310e0000 0x0 0x4000>;
ti,intr-trigger-type = <4>;
interrupt-controller;
interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index d56e3475aee7..5e825e4d0306 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -6,7 +6,7 @@
*/
&cbass_mcu_wakeup {
- dmsc: dmsc@44083000 {
+ dmsc: system-controller@44083000 {
compatible = "ti,k2g-sci";
ti,host-id = <12>;
@@ -23,7 +23,7 @@
#power-domain-cells = <2>;
};
- k3_clks: clocks {
+ k3_clks: clock-controller {
compatible = "ti,k2g-sci-clk";
#clock-cells = <2>;
};
@@ -96,8 +96,9 @@
clock-names = "fclk";
};
- wkup_gpio_intr: interrupt-controller2 {
+ wkup_gpio_intr: interrupt-controller@42200000 {
compatible = "ti,sci-intr";
+ reg = <0x00 0x42200000 0x00 0x400>;
ti,intr-trigger-type = <1>;
interrupt-controller;
interrupt-parent = <&gic500>;
@@ -249,11 +250,11 @@
};
};
- mcu-navss {
+ mcu_navss: bus@28380000 {
compatible = "simple-mfd";
#address-cells = <2>;
#size-cells = <2>;
- ranges;
+ ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>;
dma-coherent;
dma-ranges;
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 07ac208edc89..26889dbfe904 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -5,3 +5,5 @@ generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += set_memory.h
generic-y += user.h
+
+generated-y += cpucaps.h
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 2175ec0004ed..451e11e5fd23 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
* This insanity brought to you by speculative system register reads,
* out-of-order memory accesses, sequence locks and Thomas Gleixner.
*
- * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
*/
#define arch_counter_enforce_ordering(val) do { \
u64 tmp, _val = (val); \
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
deleted file mode 100644
index b0c5eda0498f..000000000000
--- a/arch/arm64/include/asm/cpucaps.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm64/include/asm/cpucaps.h
- *
- * Copyright (C) 2016 ARM Ltd.
- */
-#ifndef __ASM_CPUCAPS_H
-#define __ASM_CPUCAPS_H
-
-#define ARM64_WORKAROUND_CLEAN_CACHE 0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
-#define ARM64_WORKAROUND_845719 2
-#define ARM64_HAS_SYSREG_GIC_CPUIF 3
-#define ARM64_HAS_PAN 4
-#define ARM64_HAS_LSE_ATOMICS 5
-#define ARM64_WORKAROUND_CAVIUM_23154 6
-#define ARM64_WORKAROUND_834220 7
-#define ARM64_HAS_NO_HW_PREFETCH 8
-#define ARM64_HAS_VIRT_HOST_EXTN 11
-#define ARM64_WORKAROUND_CAVIUM_27456 12
-#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_SPECTRE_V3A 14
-#define ARM64_HAS_CNP 15
-#define ARM64_HAS_NO_FPSIMD 16
-#define ARM64_WORKAROUND_REPEAT_TLBI 17
-#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
-#define ARM64_WORKAROUND_858921 19
-#define ARM64_WORKAROUND_CAVIUM_30115 20
-#define ARM64_HAS_DCPOP 21
-#define ARM64_SVE 22
-#define ARM64_UNMAP_KERNEL_AT_EL0 23
-#define ARM64_SPECTRE_V2 24
-#define ARM64_HAS_RAS_EXTN 25
-#define ARM64_WORKAROUND_843419 26
-#define ARM64_HAS_CACHE_IDC 27
-#define ARM64_HAS_CACHE_DIC 28
-#define ARM64_HW_DBM 29
-#define ARM64_SPECTRE_V4 30
-#define ARM64_MISMATCHED_CACHE_TYPE 31
-#define ARM64_HAS_STAGE2_FWB 32
-#define ARM64_HAS_CRC32 33
-#define ARM64_SSBS 34
-#define ARM64_WORKAROUND_1418040 35
-#define ARM64_HAS_SB 36
-#define ARM64_WORKAROUND_SPECULATIVE_AT 37
-#define ARM64_HAS_ADDRESS_AUTH_ARCH 38
-#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39
-#define ARM64_HAS_GENERIC_AUTH_ARCH 40
-#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41
-#define ARM64_HAS_IRQ_PRIO_MASKING 42
-#define ARM64_HAS_DCPODP 43
-#define ARM64_WORKAROUND_1463225 44
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
-#define ARM64_WORKAROUND_1542419 47
-#define ARM64_HAS_E0PD 48
-#define ARM64_HAS_RNG 49
-#define ARM64_HAS_AMU_EXTN 50
-#define ARM64_HAS_ADDRESS_AUTH 51
-#define ARM64_HAS_GENERIC_AUTH 52
-#define ARM64_HAS_32BIT_EL1 53
-#define ARM64_BTI 54
-#define ARM64_HAS_ARMv8_4_TTL 55
-#define ARM64_HAS_TLB_RANGE 56
-#define ARM64_MTE 57
-#define ARM64_WORKAROUND_1508412 58
-#define ARM64_HAS_LDAPR 59
-#define ARM64_KVM_PROTECTED_MODE 60
-#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61
-#define ARM64_HAS_EPAN 62
-
-#define ARM64_NCAPS 63
-
-#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cf8df032b9c3..5e9b33cbac51 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -63,6 +63,7 @@
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20
+#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21
#ifndef __ASSEMBLY__
@@ -201,6 +202,8 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
+
extern u64 __vgic_v3_get_gic_config(void);
extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f612c090f2e4..01b9857757f2 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -463,4 +463,9 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
}
+static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
+{
+ return test_bit(feature, vcpu->arch.features);
+}
+
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 7859749d6628..5dab69d2c22b 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
#define __NR_landlock_add_rule 445
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1cb39c0803a4..e720148232a0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -720,11 +720,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
}
- if (run->immediate_exit)
- return -EINTR;
-
vcpu_load(vcpu);
+ if (run->immediate_exit) {
+ ret = -EINTR;
+ goto out;
+ }
+
kvm_sigset_activate(vcpu);
ret = 1;
@@ -897,6 +899,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_sigset_deactivate(vcpu);
+out:
+ /*
+ * In the unlikely event that we are returning to userspace
+ * with pending exceptions or PC adjustment, commit these
+ * adjustments in order to give userspace a consistent view of
+ * the vcpu state. Note that this relies on __kvm_adjust_pc()
+ * being preempt-safe on VHE.
+ */
+ if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
+ KVM_ARM64_INCREMENT_PC)))
+ kvm_call_hyp(__kvm_adjust_pc, vcpu);
+
vcpu_put(vcpu);
return ret;
}
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 73629094f903..11541b94b328 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -296,7 +296,7 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
*vcpu_pc(vcpu) = vect_offset;
}
-void kvm_inject_exception(struct kvm_vcpu *vcpu)
+static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
if (vcpu_el1_is_32bit(vcpu)) {
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
@@ -329,3 +329,19 @@ void kvm_inject_exception(struct kvm_vcpu *vcpu)
}
}
}
+
+/*
+ * Adjust the guest PC (and potentially exception state) depending on
+ * flags provided by the emulation code.
+ */
+void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
+ kvm_inject_exception(vcpu);
+ vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
+ KVM_ARM64_EXCEPT_MASK);
+ } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
+ kvm_skip_instr(vcpu);
+ vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
+ }
+}
diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
index 61716359035d..4fdfeabefeb4 100644
--- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
+++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
@@ -13,8 +13,6 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
-void kvm_inject_exception(struct kvm_vcpu *vcpu);
-
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu)) {
@@ -44,22 +42,6 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
}
/*
- * Adjust the guest PC on entry, depending on flags provided by EL1
- * for the purpose of emulation (MMIO, sysreg) or exception injection.
- */
-static inline void __adjust_pc(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
- kvm_inject_exception(vcpu);
- vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
- KVM_ARM64_EXCEPT_MASK);
- } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
- kvm_skip_instr(vcpu);
- vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
- }
-}
-
-/*
* Skip an instruction while host sysregs are live.
* Assumes host is always 64-bit.
*/
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index f36420a80474..1632f001f4ed 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -28,6 +28,13 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu));
}
+static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+
+ __kvm_adjust_pc(kern_hyp_va(vcpu));
+}
+
static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
{
__kvm_flush_vm_context();
@@ -170,6 +177,7 @@ typedef void (*hcall_t)(struct kvm_cpu_context *);
static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_vcpu_run),
+ HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index e342f7f4f4fb..4b60c0056c04 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -23,8 +23,8 @@
extern unsigned long hyp_nr_cpus;
struct host_kvm host_kvm;
-struct hyp_pool host_s2_mem;
-struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_mem;
+static struct hyp_pool host_s2_dev;
/*
* Copies of the host's CPU features registers holding sanitized values.
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 7488f53b0aa2..a3d3a275344e 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -17,7 +17,6 @@
#include <nvhe/trap_handler.h>
struct hyp_pool hpool;
-struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -27,6 +26,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_mem_pgt_base;
static void *host_s2_dev_pgt_base;
+static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static int divide_memory_pool(void *virt, unsigned long size)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e9f6ea704d07..f7af9688c1f7 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -4,7 +4,6 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
-#include <hyp/adjust_pc.h>
#include <hyp/switch.h>
#include <hyp/sysreg-sr.h>
@@ -201,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
__debug_save_host_buffers_nvhe(vcpu);
- __adjust_pc(vcpu);
+ __kvm_adjust_pc(vcpu);
/*
* We must restore the 32-bit state before the sysregs, thanks
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 7b8f7db5c1ed..b3229924d243 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -4,7 +4,6 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
-#include <hyp/adjust_pc.h>
#include <hyp/switch.h>
#include <linux/arm-smccc.h>
@@ -132,7 +131,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__load_guest_stage2(vcpu->arch.hw_mmu);
__activate_traps(vcpu);
- __adjust_pc(vcpu);
+ __kvm_adjust_pc(vcpu);
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c5d1f3c87dbd..c10207fed2f3 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1156,13 +1156,13 @@ out_unlock:
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
if (!kvm->arch.mmu.pgt)
- return 0;
+ return false;
__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
(range->end - range->start) << PAGE_SHIFT,
range->may_block);
- return 0;
+ return false;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1170,7 +1170,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
kvm_pfn_t pfn = pte_pfn(range->pte);
if (!kvm->arch.mmu.pgt)
- return 0;
+ return false;
WARN_ON(range->end - range->start != 1);
@@ -1190,7 +1190,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
PAGE_SIZE, __pfn_to_phys(pfn),
KVM_PGTABLE_PROT_R, NULL);
- return 0;
+ return false;
}
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1200,7 +1200,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
pte_t pte;
if (!kvm->arch.mmu.pgt)
- return 0;
+ return false;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
@@ -1213,7 +1213,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
if (!kvm->arch.mmu.pgt)
- return 0;
+ return false;
return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 956cdc240148..d37ebee085cf 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -166,6 +166,25 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
return 0;
}
+static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *tmp;
+ bool is32bit;
+ int i;
+
+ is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+ if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
+ return false;
+
+ /* Check that the vcpus are either all 32bit or all 64bit */
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
+ return false;
+ }
+
+ return true;
+}
+
/**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
@@ -217,13 +236,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
}
+ if (!vcpu_allowed_register_width(vcpu)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
switch (vcpu->arch.target) {
default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
- if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) {
- ret = -EINVAL;
- goto out;
- }
pstate = VCPU_RESET_PSTATE_SVC;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 76ea2800c33e..1a7968ad078c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -399,14 +399,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg);
else
dbg_to_reg(vcpu, p, rd, dbg_reg);
- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
return true;
}
@@ -414,7 +414,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -424,7 +424,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -434,21 +434,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_bvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
+ vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
}
static bool trap_bcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg);
else
dbg_to_reg(vcpu, p, rd, dbg_reg);
- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
return true;
}
@@ -456,7 +456,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -467,7 +467,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -477,22 +477,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_bcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
+ vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
}
static bool trap_wvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg);
else
dbg_to_reg(vcpu, p, rd, dbg_reg);
- trace_trap_reg(__func__, rd->reg, p->is_write,
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
+ trace_trap_reg(__func__, rd->CRm, p->is_write,
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
return true;
}
@@ -500,7 +500,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -510,7 +510,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -520,21 +520,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_wvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
+ vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
}
static bool trap_wcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+ u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg);
else
dbg_to_reg(vcpu, p, rd, dbg_reg);
- trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+ trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
return true;
}
@@ -542,7 +542,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -552,7 +552,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+ __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
@@ -562,7 +562,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
static void reset_wcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
+ vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
}
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index ac485163a4a7..6d44c028d1c9 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte)
{
struct page *page = pte_page(pte);
- if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+ if (!test_bit(PG_dcache_clean, &page->flags)) {
sync_icache_aliases(page_address(page), page_size(page));
+ set_bit(PG_dcache_clean, &page->flags);
+ }
}
EXPORT_SYMBOL_GPL(__sync_icache_dcache);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 16a2b2b1c54d..e55409caaee3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -43,6 +43,7 @@
#include <linux/sizes.h>
#include <asm/tlb.h>
#include <asm/alternative.h>
+#include <asm/xen/swiotlb-xen.h>
/*
* We need to be able to catch inadvertent references to memstart_addr
@@ -482,7 +483,7 @@ void __init mem_init(void)
if (swiotlb_force == SWIOTLB_FORCE ||
max_pfn > PFN_DOWN(arm64_dma_phys_limit))
swiotlb_init(1);
- else
+ else if (!xen_swiotlb_detect())
swiotlb_force = SWIOTLB_NO_FORCE;
set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6dd9369e3ea0..89b66ef43a0f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
- if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
+ if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
+ IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 0a48191534ff..97d7bcd8d4f2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup)
mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
msr_s SYS_GCR_EL1, x10
+ /*
+ * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+ * RGSR_EL1.SEED must be non-zero for IRG to produce
+ * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+ * must initialize it.
+ */
+ mrs x10, CNTVCT_EL0
+ ands x10, x10, #SYS_RGSR_EL1_SEED_MASK
+ csinc x10, x10, xzr, ne
+ lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
+ msr_s SYS_RGSR_EL1, x10
+
/* clear any pending tag check faults in TFSR*_EL1 */
msr_s SYS_TFSR_EL1, xzr
msr_s SYS_TFSRE0_EL1, xzr
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
new file mode 100644
index 000000000000..932b4fe5c768
--- /dev/null
+++ b/arch/arm64/tools/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen := arch/$(ARCH)/include/generated
+kapi := $(gen)/asm
+
+kapi-hdrs-y := $(kapi)/cpucaps.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+
+PHONY += kapi
+
+kapi: $(kapi-hdrs-y) $(gen-y)
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+quiet_cmd_gen_cpucaps = GEN $@
+ cmd_gen_cpucaps = mkdir -p $(dir $@) && \
+ $(AWK) -f $(filter-out $(PHONY),$^) > $@
+
+$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
+ $(call if_changed,gen_cpucaps)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
new file mode 100644
index 000000000000..21fbdda7086e
--- /dev/null
+++ b/arch/arm64/tools/cpucaps
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Internal CPU capabilities constants, keep this list sorted
+
+BTI
+HAS_32BIT_EL0
+HAS_32BIT_EL1
+HAS_ADDRESS_AUTH
+HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_IMP_DEF
+HAS_AMU_EXTN
+HAS_ARMv8_4_TTL
+HAS_CACHE_DIC
+HAS_CACHE_IDC
+HAS_CNP
+HAS_CRC32
+HAS_DCPODP
+HAS_DCPOP
+HAS_E0PD
+HAS_EPAN
+HAS_GENERIC_AUTH
+HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_IMP_DEF
+HAS_IRQ_PRIO_MASKING
+HAS_LDAPR
+HAS_LSE_ATOMICS
+HAS_NO_FPSIMD
+HAS_NO_HW_PREFETCH
+HAS_PAN
+HAS_RAS_EXTN
+HAS_RNG
+HAS_SB
+HAS_STAGE2_FWB
+HAS_SYSREG_GIC_CPUIF
+HAS_TLB_RANGE
+HAS_VIRT_HOST_EXTN
+HW_DBM
+KVM_PROTECTED_MODE
+MISMATCHED_CACHE_TYPE
+MTE
+SPECTRE_V2
+SPECTRE_V3A
+SPECTRE_V4
+SSBS
+SVE
+UNMAP_KERNEL_AT_EL0
+WORKAROUND_834220
+WORKAROUND_843419
+WORKAROUND_845719
+WORKAROUND_858921
+WORKAROUND_1418040
+WORKAROUND_1463225
+WORKAROUND_1508412
+WORKAROUND_1542419
+WORKAROUND_CAVIUM_23154
+WORKAROUND_CAVIUM_27456
+WORKAROUND_CAVIUM_30115
+WORKAROUND_CAVIUM_TX2_219_PRFM
+WORKAROUND_CAVIUM_TX2_219_TVM
+WORKAROUND_CLEAN_CACHE
+WORKAROUND_DEVICE_LOAD_ACQUIRE
+WORKAROUND_NVIDIA_CARMEL_CNP
+WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_REPEAT_TLBI
+WORKAROUND_SPECULATIVE_AT
diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
new file mode 100755
index 000000000000..00c9e72a200a
--- /dev/null
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -0,0 +1,40 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-cpucaps.awk: arm64 cpucaps header generator
+#
+# Usage: awk -f gen-cpucaps.awk cpucaps.txt
+
+# Log an error and terminate
+function fatal(msg) {
+ print "Error at line " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+# skip blank lines and comment lines
+/^$/ { next }
+/^#/ { next }
+
+BEGIN {
+ print "#ifndef __ASM_CPUCAPS_H"
+ print "#define __ASM_CPUCAPS_H"
+ print ""
+ print "/* Generated file - do not edit */"
+ cap_num = 0
+ print ""
+}
+
+/^[vA-Z0-9_]+$/ {
+ printf("#define ARM64_%-30s\t%d\n", $0, cap_num++)
+ next
+}
+
+END {
+ printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
+ print ""
+ print "#endif /* __ASM_CPUCAPS_H */"
+}
+
+# Any lines not handled by previous rules are unexpected
+{
+ fatal("unhandled statement")
+}
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 1ee8e736a48e..bb11fe4c875a 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index a4b7ee1df211..8f215e79e70e 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void)
BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12);
/* _sigfault._perf */
- BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14);
/* _sigpoll */
BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c);
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 0dd019dc2136..79c2d24c89dd 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -442,7 +442,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 2ac716984ca2..b11395a20c20 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -448,7 +448,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c
index b184baa4e56a..f175bce2987f 100644
--- a/arch/mips/alchemy/board-xxs1500.c
+++ b/arch/mips/alchemy/board-xxs1500.c
@@ -18,6 +18,7 @@
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/mach-au1x00/au1000.h>
+#include <asm/mach-au1x00/gpio-au1000.h>
#include <prom.h>
const char *get_system_type(void)
diff --git a/arch/mips/include/asm/mips-boards/launch.h b/arch/mips/include/asm/mips-boards/launch.h
index f93aa5ee2e2e..3481ed4c117b 100644
--- a/arch/mips/include/asm/mips-boards/launch.h
+++ b/arch/mips/include/asm/mips-boards/launch.h
@@ -3,6 +3,9 @@
*
*/
+#ifndef _ASM_MIPS_BOARDS_LAUNCH_H
+#define _ASM_MIPS_BOARDS_LAUNCH_H
+
#ifndef _ASSEMBLER_
struct cpulaunch {
@@ -34,3 +37,5 @@ struct cpulaunch {
/* Polling period in count cycles for secondary CPU's */
#define LAUNCHPERIOD 10000
+
+#endif /* _ASM_MIPS_BOARDS_LAUNCH_H */
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 5e0096657251..9220909526f9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -381,7 +381,7 @@
440 n32 process_madvise sys_process_madvise
441 n32 epoll_pwait2 compat_sys_epoll_pwait2
442 n32 mount_setattr sys_mount_setattr
-443 n32 quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 n32 landlock_create_ruleset sys_landlock_create_ruleset
445 n32 landlock_add_rule sys_landlock_add_rule
446 n32 landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 9974f5f8e49b..9cd1c34f31b5 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
440 n64 process_madvise sys_process_madvise
441 n64 epoll_pwait2 sys_epoll_pwait2
442 n64 mount_setattr sys_mount_setattr
-443 n64 quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 n64 landlock_create_ruleset sys_landlock_create_ruleset
445 n64 landlock_add_rule sys_landlock_add_rule
446 n64 landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 39d6e71e57b6..d560c467a8c6 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -430,7 +430,7 @@
440 o32 process_madvise sys_process_madvise
441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 o32 mount_setattr sys_mount_setattr
-443 o32 quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 o32 landlock_create_ruleset sys_landlock_create_ruleset
445 o32 landlock_add_rule sys_landlock_add_rule
446 o32 landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c
index de03838b343b..a9b72eacfc0b 100644
--- a/arch/mips/lib/mips-atomic.c
+++ b/arch/mips/lib/mips-atomic.c
@@ -37,7 +37,7 @@
*/
notrace void arch_local_irq_disable(void)
{
- preempt_disable();
+ preempt_disable_notrace();
__asm__ __volatile__(
" .set push \n"
@@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void)
: /* no inputs */
: "memory");
- preempt_enable();
+ preempt_enable_notrace();
}
EXPORT_SYMBOL(arch_local_irq_disable);
@@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void)
{
unsigned long flags;
- preempt_disable();
+ preempt_disable_notrace();
__asm__ __volatile__(
" .set push \n"
@@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void)
: /* no inputs */
: "memory");
- preempt_enable();
+ preempt_enable_notrace();
return flags;
}
@@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags)
{
unsigned long __tmp1;
- preempt_disable();
+ preempt_disable_notrace();
__asm__ __volatile__(
" .set push \n"
@@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags)
: "0" (flags)
: "memory");
- preempt_enable();
+ preempt_enable_notrace();
}
EXPORT_SYMBOL(arch_local_irq_restore);
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index a7bf0c80371c..830ab91e574f 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -158,31 +158,29 @@ unsigned long _page_cachable_default;
EXPORT_SYMBOL(_page_cachable_default);
#define PM(p) __pgprot(_page_cachable_default | (p))
-#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p))
static inline void setup_protection_map(void)
{
protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
- protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
- protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
- protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
- protection_map[4] = PVA(_PAGE_PRESENT);
- protection_map[5] = PVA(_PAGE_PRESENT);
- protection_map[6] = PVA(_PAGE_PRESENT);
- protection_map[7] = PVA(_PAGE_PRESENT);
+ protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+ protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+ protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+ protection_map[4] = PM(_PAGE_PRESENT);
+ protection_map[5] = PM(_PAGE_PRESENT);
+ protection_map[6] = PM(_PAGE_PRESENT);
+ protection_map[7] = PM(_PAGE_PRESENT);
protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
- protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
- protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
+ protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+ protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
_PAGE_NO_READ);
- protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
- protection_map[12] = PVA(_PAGE_PRESENT);
- protection_map[13] = PVA(_PAGE_PRESENT);
- protection_map[14] = PVA(_PAGE_PRESENT);
- protection_map[15] = PVA(_PAGE_PRESENT);
+ protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
+ protection_map[12] = PM(_PAGE_PRESENT);
+ protection_map[13] = PM(_PAGE_PRESENT);
+ protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+ protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
}
-#undef _PVA
#undef PM
void cpu_cache_init(void)
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 0c5de07da097..0135376c5de5 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -8,6 +8,7 @@
#include <linux/io.h>
#include <linux/clk.h>
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/sizes.h>
#include <linux/of_fdt.h>
@@ -25,6 +26,7 @@
__iomem void *rt_sysc_membase;
__iomem void *rt_memc_membase;
+EXPORT_SYMBOL_GPL(rt_sysc_membase);
__iomem void *plat_of_remap_node(const char *node)
{
diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h
new file mode 100644
index 000000000000..7538294721be
--- /dev/null
+++ b/arch/openrisc/include/asm/barrier.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#define mb() asm volatile ("l.msync" ::: "memory")
+
+#include <asm-generic/barrier.h>
+
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 2416a9f91533..c6f9e7b9f7cb 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -278,6 +278,8 @@ void calibrate_delay(void)
pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
loops_per_jiffy / (500000 / HZ),
(loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy);
+
+ of_node_put(cpu);
}
void __init setup_arch(char **cmdline_p)
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index d5641198b90c..cfef61a7b6c2 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -75,7 +75,6 @@ static void __init map_ram(void)
/* These mark extents of read-only kernel pages...
* ...from vmlinux.lds.S
*/
- struct memblock_region *region;
v = PAGE_OFFSET;
@@ -121,7 +120,7 @@ static void __init map_ram(void)
}
printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
- region->base, region->base + region->size);
+ start, end);
}
}
@@ -129,7 +128,6 @@ void __init paging_init(void)
{
extern void tlb_init(void);
- unsigned long end;
int i;
printk(KERN_INFO "Setting up paging and PTEs.\n");
@@ -145,8 +143,6 @@ void __init paging_init(void)
*/
current_pgd[smp_processor_id()] = init_mm.pgd;
- end = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
-
map_ram();
zone_sizes_init();
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5ac80b83d745..aabc37f8cae3 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -440,7 +440,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index c2717f31925a..ccda0a91abf0 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -122,7 +122,15 @@
};
/include/ "pq3-i2c-0.dtsi"
+ i2c@3000 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "pq3-i2c-1.dtsi"
+ i2c@3100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "pq3-duart-0.dtsi"
/include/ "pq3-espi-0.dtsi"
spi0: spi@7000 {
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index 872e4485dc3f..ddc018d42252 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -371,7 +371,23 @@
};
/include/ "qoriq-i2c-0.dtsi"
+ i2c@118000 {
+ fsl,i2c-erratum-a004447;
+ };
+
+ i2c@118100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "qoriq-i2c-1.dtsi"
+ i2c@119000 {
+ fsl,i2c-erratum-a004447;
+ };
+
+ i2c@119100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "qoriq-duart-0.dtsi"
/include/ "qoriq-duart-1.dtsi"
/include/ "qoriq-gpio-0.dtsi"
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 443050906018..e3b29eda8074 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -448,6 +448,9 @@
*/
long plpar_hcall_norets(unsigned long opcode, ...);
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
/**
* plpar_hcall: - Make a pseries hypervisor call
* @opcode: The hypervisor call to make.
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 44cde2e129b8..59f704408d65 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
- if (user_mode(regs))
- kuep_unlock();
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
@@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
+ regs->nip < (unsigned long)__end_interrupts) {
+ // Kernel code running below __end_interrupts is
+ // implicitly soft-masked.
+ regs->softe = IRQS_ALL_DISABLED;
+ }
+
/* Don't do any per-CPU operations until interrupt state is fixed */
if (nmi_disables_ftrace(regs)) {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1e83359f286b..7f2e90db2050 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -51,6 +51,7 @@
/* PPC-specific vcpu->requests bit members */
#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
+#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2)
#include <linux/mmu_notifier.h>
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index 5d1726bb28e7..bcb7b5f917be 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu)
return be32_to_cpu(yield_count);
}
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
static inline void yield_to_preempted(int cpu, u32 yield_count)
{
- plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+ plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
}
static inline void prod_cpu(int cpu)
{
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
}
static inline void yield_to_any(void)
{
- plpar_hcall_norets(H_CONFER, -1, 0);
+ plpar_hcall_norets_notrace(H_CONFER, -1, 0);
}
#else
static inline bool is_shared_processor(void)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index ece84a430701..83e0f701ebc6 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint)
static inline long cede_processor(void)
{
- return plpar_hcall_norets(H_CEDE);
+ /*
+ * We cannot call tracepoints inside RCU idle regions which
+ * means we must not trace H_CEDE.
+ */
+ return plpar_hcall_norets_notrace(H_CEDE);
}
static inline long extended_cede_processor(unsigned long latency_hint)
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 33fa5dd8ee6a..714a35f0d425 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
pgd_t *pgdir = init_mm.pgd;
return __find_linux_pte(pgdir, ea, NULL, hshift);
}
+
+/*
+ * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a
+ * physical address, without taking locks. This can be used in real-mode.
+ */
+static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
+{
+ pte_t *ptep;
+ phys_addr_t pa;
+ int hugepage_shift;
+
+ /*
+ * init_mm does not free page tables, and does not do THP. It may
+ * have huge pages from huge vmalloc / ioremap etc.
+ */
+ ptep = find_init_mm_pte(addr, &hugepage_shift);
+ if (WARN_ON(!ptep))
+ return 0;
+
+ pa = PFN_PHYS(pte_pfn(*ptep));
+
+ if (!hugepage_shift)
+ hugepage_shift = PAGE_SHIFT;
+
+ pa |= addr & ((1ul << hugepage_shift) - 1);
+
+ return pa;
+}
+
/*
* This is what we should always use. Any other lockless page table lookup needs
* careful audit against THP split.
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 9c9ab2746168..b476a685f066 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -19,6 +19,7 @@
#ifndef _ASM_POWERPC_PTRACE_H
#define _ASM_POWERPC_PTRACE_H
+#include <linux/err.h>
#include <uapi/asm/ptrace.h>
#include <asm/asm-const.h>
@@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs);
long do_syscall_trace_enter(struct pt_regs *regs);
void do_syscall_trace_leave(struct pt_regs *regs);
-#define kernel_stack_pointer(regs) ((regs)->gpr[1])
-static inline int is_syscall_success(struct pt_regs *regs)
-{
- return !(regs->ccr & 0x10000000);
-}
-
-static inline long regs_return_value(struct pt_regs *regs)
-{
- if (is_syscall_success(regs))
- return regs->gpr[3];
- else
- return -regs->gpr[3];
-}
-
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
-{
- regs->gpr[3] = rc;
-}
-
#ifdef __powerpc64__
#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
#else
@@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs)
regs->trap |= 0x1;
}
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ if (trap_is_scv(regs))
+ return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
+ else
+ return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ if (trap_is_scv(regs))
+ return regs->gpr[3];
+
+ if (is_syscall_success(regs))
+ return regs->gpr[3];
+ else
+ return -regs->gpr[3];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->gpr[3] = rc;
+}
+
#define arch_has_single_step() (1)
#define arch_has_block_step() (true)
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index fd1b518eed17..ba0f88f3a30d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- /*
- * If the system call failed,
- * regs->gpr[3] contains a positive ERRORCODE.
- */
- return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+ if (trap_is_scv(regs)) {
+ unsigned long error = regs->gpr[3];
+
+ return IS_ERR_VALUE(error) ? error : 0;
+ } else {
+ /*
+ * If the system call failed,
+ * regs->gpr[3] contains a positive ERRORCODE.
+ */
+ return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+ }
}
static inline long syscall_get_return_value(struct task_struct *task,
@@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- /*
- * In the general case it's not obvious that we must deal with CCR
- * here, as the syscall exit path will also do that for us. However
- * there are some places, eg. the signal code, which check ccr to
- * decide if the value in r3 is actually an error.
- */
- if (error) {
- regs->ccr |= 0x10000000L;
- regs->gpr[3] = error;
+ if (trap_is_scv(regs)) {
+ regs->gpr[3] = (long) error ?: val;
} else {
- regs->ccr &= ~0x10000000L;
- regs->gpr[3] = val;
+ /*
+ * In the general case it's not obvious that we must deal with
+ * CCR here, as the syscall exit path will also do that for us.
+ * However there are some places, eg. the signal code, which
+ * check ccr to decide if the value in r3 is actually an error.
+ */
+ if (error) {
+ regs->ccr |= 0x10000000L;
+ regs->gpr[3] = error;
+ } else {
+ regs->ccr &= ~0x10000000L;
+ regs->gpr[3] = val;
+ }
}
}
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a09e4240c5b1..22c79ab40006 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -157,7 +157,7 @@ do { \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
EX_TABLE(2b, %l2) \
- : "=r" (x) \
+ : "=&r" (x) \
: "m" (*addr) \
: \
: label)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f24cd53ff26e..3bbdcc86d01b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
- pte_t *ptep;
- unsigned long pa;
- int hugepage_shift;
-
- /*
- * We won't find hugepages here(this is iomem). Hence we are not
- * worried about _PAGE_SPLITTING/collapse. Also we will not hit
- * page table free, because of init_mm.
- */
- ptep = find_init_mm_pte(token, &hugepage_shift);
- if (!ptep)
- return token;
-
- pa = pte_pfn(*ptep);
-
- /* On radix we can do hugepage mappings for io, so handle that */
- if (!hugepage_shift)
- hugepage_shift = PAGE_SHIFT;
-
- pa <<= PAGE_SHIFT;
- pa |= token & ((1ul << hugepage_shift) - 1);
- return pa;
+ return ppc_find_vmap_phys(token);
}
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7c3654b0d0f4..f1ae710274bc 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -340,6 +340,12 @@ ret_from_mc_except:
andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
bne masked_interrupt_book3e_##n
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
std r15,PACA_EXGEN+EX_R15(r13)
@@ -535,6 +541,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x300)
b storage_fault_common
@@ -544,6 +554,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x400)
b storage_fault_common
@@ -557,6 +571,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
@@ -565,10 +583,10 @@ __end_interrupts:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
- EXCEPTION_COMMON(0x700)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
+ EXCEPTION_COMMON(0x700)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
REST_NVGPRS(r1)
b interrupt_return
@@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
+ EXCEPTION_COMMON_CRIT(0xd00)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
@@ -796,11 +814,11 @@ kernel_dbg_exc:
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_DBG(0xd08)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
+ EXCEPTION_COMMON_DBG(0xd08)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
@@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0:
* original values stashed away in the PACA
*/
storage_fault_common:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
bl do_page_fault
b interrupt_return
@@ -944,11 +958,7 @@ storage_fault_common:
* continues here.
*/
alignment_more:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
bl alignment_exception
REST_NVGPRS(r1)
b interrupt_return
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e4559f8914eb..e0938ba298f2 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
syscall_fn f;
kuep_lock();
-#ifdef CONFIG_PPC32
- kuap_save_and_lock(regs);
-#endif
regs->orig_gpr3 = r3;
@@ -427,6 +424,7 @@ again:
/* Restore user access locks last */
kuap_user_restore(regs);
+ kuep_unlock();
return ret;
}
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 51bbaae94ccc..c877f074d174 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
#ifdef CONFIG_PPC_INDIRECT_MMIO
struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
{
- unsigned hugepage_shift;
struct iowa_bus *bus;
int token;
@@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
bus = &iowa_busses[token - 1];
else {
unsigned long vaddr, paddr;
- pte_t *ptep;
vaddr = (unsigned long)PCI_FIX_ADDR(addr);
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
- /*
- * We won't find huge pages here (iomem). Also can't hit
- * a page table free due to init_mm
- */
- ptep = find_init_mm_pte(vaddr, &hugepage_shift);
- if (ptep == NULL)
- paddr = 0;
- else {
- WARN_ON(hugepage_shift);
- paddr = pte_pfn(*ptep) << PAGE_SHIFT;
- }
+
+ paddr = ppc_find_vmap_phys(vaddr);
+
bus = iowa_pci_find(vaddr, paddr);
if (bus == NULL)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 57d6b85e9b96..2af89a5e379f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
- size_t size_io = size;
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
- nio_pages = size_io >> tbl->it_page_shift;
- io_order = get_iommu_order(size_io, tbl);
+ nio_pages = size >> tbl->it_page_shift;
+ io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
if (mapping == DMA_MAPPING_ERROR) {
@@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
if (tbl) {
- size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
- unsigned int nio_pages = size_io >> tbl->it_page_shift;
+ unsigned int nio_pages;
+ size = PAGE_ALIGN(size);
+ nio_pages = size >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01ab2163659e..e8c2a6373157 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p)
int ret = 0;
struct kprobe *prev;
struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
- struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1));
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p)
} else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
ret = -EINVAL;
- } else if (ppc_inst_prefixed(prefix)) {
+ } else if ((unsigned long)p->addr & ~PAGE_MASK &&
+ ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 8b2c1a8553a0..cfc03e016ff2 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console)
static int __init ioremap_legacy_serial_console(void)
{
- struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
- struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+ struct plat_serial8250_port *port;
+ struct legacy_serial_info *info;
void __iomem *vaddr;
if (legacy_serial_console < 0)
return 0;
+ info = &legacy_serial_infos[legacy_serial_console];
+ port = &legacy_serial_ports[legacy_serial_console];
+
if (!info->early_addr)
return 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b779d25761cf..e42b85e4f1aa 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
- early_ioremap_setup();
-
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ early_ioremap_setup();
+
/*
* After firmware and early platform setup code has set things up,
* we note the SPR values for configurable control/performance
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index f4aafa337c2e..1f07317964e4 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#else
-#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
-#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 2e68fbb57cc6..8f052ff4058c 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d9193cd73be..c63e263312a4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
kvm_unmap_radix(kvm, range->slot, gfn);
} else {
for (gfn = range->start; gfn < range->end; gfn++)
- kvm_unmap_rmapp(kvm, range->slot, range->start);
+ kvm_unmap_rmapp(kvm, range->slot, gfn);
}
return false;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 28a80d240b76..bc0813644666 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3936,7 +3936,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
break;
}
cur = ktime_get();
- } while (single_task_running() && ktime_before(cur, stop));
+ } while (kvm_vcpu_can_poll(cur, stop));
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
@@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
mtspr(SPRN_EBBRR, ebb_regs[1]);
mtspr(SPRN_BESCR, ebb_regs[2]);
mtspr(SPRN_TAR, user_tar);
- mtspr(SPRN_FSCR, current->thread.fscr);
}
mtspr(SPRN_VRSAVE, user_vrsave);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7af7c70f1468..7a0f12404e0e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -23,20 +23,9 @@
#include <asm/pte-walk.h>
/* Translate address of a vmalloc'd thing to a linear map address */
-static void *real_vmalloc_addr(void *x)
+static void *real_vmalloc_addr(void *addr)
{
- unsigned long addr = (unsigned long) x;
- pte_t *p;
- /*
- * assume we don't have huge pages in vmalloc space...
- * So don't worry about THP collapse/split. Called
- * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
- */
- p = find_init_mm_pte(addr, NULL);
- if (!p || !pte_present(*p))
- return NULL;
- addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
- return __va(addr);
+ return __va(ppc_find_vmap_phys((unsigned long)addr));
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5e634db4809b..004f0d4e665f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_UAMOR (SFS-88)
#define STACK_SLOT_DAWR1 (SFS-96)
#define STACK_SLOT_DAWRX1 (SFS-104)
+#define STACK_SLOT_FSCR (SFS-112)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
@@ -686,6 +687,8 @@ BEGIN_FTR_SECTION
std r6, STACK_SLOT_DAWR0(r1)
std r7, STACK_SLOT_DAWRX0(r1)
std r8, STACK_SLOT_IAMR(r1)
+ mfspr r5, SPRN_FSCR
+ std r5, STACK_SLOT_FSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
mfspr r6, SPRN_DAWR1
@@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE
ld r7, STACK_SLOT_HFSCR(r1)
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_FSCR(r1)
+ mtspr SPRN_FSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 1fd31b4b0e13..fe26f2fa0f3f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
#include <asm/page.h>
@@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
- if (types & STF_BARRIER_FALLBACK)
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (types & STF_BARRIER_FALLBACK) {
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_branch((struct ppc_inst *)(dest + 1),
- (unsigned long)&stf_barrier_fallback,
- BRANCH_SET_LINK);
- else
- patch_instruction((struct ppc_inst *)(dest + 1),
- ppc_inst(instrs[1]));
-
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ }
}
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
- do_stf_entry_barrier_fixups(types);
- do_stf_exit_barrier_fixups(types);
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are executing
+ * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+ * spin in the stop machine core with interrupts hard disabled.
+ */
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
@@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
: "unknown");
}
-void do_entry_flush_fixups(enum l1d_flush_type types)
+static int __do_entry_flush_fixups(void *data)
{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
unsigned int instrs[3], *dest;
long *start, *end;
int i;
@@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+ /*
+ * If we're patching in or out the fallback flush we need to be careful about the
+ * order in which we patch instructions. That's because it's possible we could
+ * take a page fault after patching one instruction, so the sequence of
+ * instructions must be safe even in a half patched state.
+ *
+ * To make that work, when patching in the fallback flush we patch in this order:
+ * - the mflr (dest)
+ * - the mtlr (dest + 2)
+ * - the branch (dest + 1)
+ *
+ * That ensures the sequence is safe to execute at any point. In contrast if we
+ * patch the mtlr last, it's possible we could return from the branch and not
+ * restore LR, leading to a crash later.
+ *
+ * When patching out the fallback flush (either with nops or another flush type),
+ * we patch in this order:
+ * - the branch (dest + 1)
+ * - the mtlr (dest + 2)
+ * - the mflr (dest)
+ *
+ * Note we are protected by stop_machine() from other CPUs executing the code in a
+ * semi-patched state.
+ */
+
start = PTRRELOC(&__start___entry_flush_fixup);
end = PTRRELOC(&__stop___entry_flush_fixup);
for (i = 0; start < end; start++, i++) {
@@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
- if (types == L1D_FLUSH_FALLBACK)
- patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
- BRANCH_SET_LINK);
- else
+ if (types == L1D_FLUSH_FALLBACK) {
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_branch((struct ppc_inst *)(dest + 1),
+ (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
+ } else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ }
}
start = PTRRELOC(&__start___scv_entry_flush_fixup);
@@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
- if (types == L1D_FLUSH_FALLBACK)
- patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
- BRANCH_SET_LINK);
- else
+ if (types == L1D_FLUSH_FALLBACK) {
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_branch((struct ppc_inst *)(dest + 1),
+ (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
+ } else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ }
}
@@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
+
+ return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * The call to the fallback flush can not be safely patched in/out while
+ * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+ * CPU while all other CPUs spin in the stop machine core with interrupts
+ * hard disabled.
+ */
+ stop_machine(__do_entry_flush_fixups, &types, NULL);
}
void do_rfi_flush_fixups(enum l1d_flush_type types)
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 2136e42833af..8a2b8d64265b 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \
#define HCALL_BRANCH(LABEL)
#endif
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+ HVSC /* invoke the hypervisor */
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr /* return r3 = status */
+
_GLOBAL_TOC(plpar_hcall_norets)
HMT_MEDIUM
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 1f3152ad7213..dab356e3ff87 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void)
#endif
/*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
*/
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
-void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
unsigned long flags;
unsigned int *depth;
- /*
- * We cannot call tracepoints inside RCU idle regions which
- * means we must not trace H_CEDE.
- */
- if (opcode == H_CEDE)
- return;
-
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
- if (*depth)
+ if (WARN_ON_ONCE(*depth))
goto out;
(*depth)++;
@@ -1864,19 +1862,16 @@ out:
local_irq_restore(flags);
}
-void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
{
unsigned long flags;
unsigned int *depth;
- if (opcode == H_CEDE)
- return;
-
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
- if (*depth)
+ if (*depth) /* Don't warn again on the way out */
goto out;
(*depth)++;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 12de7a9c85b3..9cc71ca9a88f 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -651,9 +651,9 @@ ENDPROC(stack_overflow)
.Lcleanup_sie_mcck:
larl %r13,.Lsie_entry
slgr %r9,%r13
- larl %r13,.Lsie_skip
+ lghi %r13,.Lsie_skip - .Lsie_entry
clgr %r9,%r13
- jh .Lcleanup_sie_int
+ jhe .Lcleanup_sie_int
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
.Lcleanup_sie_int:
BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 7e4a2aba366d..0690263df1dd 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index f47a0dc55445..0b91499ebdcf 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index f5beecdac693..e76b22157099 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { }
BUILD_TRAP_HANDLER(nmi)
{
- unsigned int cpu = smp_processor_id();
TRAP_HANDLER_DECL;
arch_ftrace_nmi_enter();
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b9e1c0e735b7..e34cc30ef22c 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -488,7 +488,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c77c5d8a7b3e..cb5e8d39cac1 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
endif
-ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
- -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
-endif
-
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
KBUILD_CFLAGS += -Wno-sign-compare
#
@@ -202,7 +197,13 @@ ifdef CONFIG_RETPOLINE
endif
endif
-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+
+ifdef CONFIG_LTO_CLANG
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
+endif
ifdef CONFIG_X86_NEED_RELOCS
LDFLAGS_vmlinux := --emit-relocs --discard-none
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6e5522aebbbd..431bf7f846c3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
KBUILD_CFLAGS := -m$(BITS) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
+KBUILD_CFLAGS += -Wundef
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
@@ -48,10 +49,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
KBUILD_CFLAGS += $(CLANG_FLAGS)
-# sev-es.c indirectly inludes inat-table.h which is generated during
+# sev.c indirectly inludes inat-table.h which is generated during
# compilation and stored in $(objtree). Add the directory to the includes so
# that the compiler finds it even with out-of-tree builds (make O=/some/path).
-CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
+CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
@@ -93,7 +94,7 @@ ifdef CONFIG_X86_64
vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
- vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dde042f64cca..743f13ea25c1 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -172,7 +172,7 @@ void __puthex(unsigned long value)
}
}
-#if CONFIG_X86_NEED_RELOCS
+#ifdef CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
{
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index e5612f035498..31139256859f 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -79,7 +79,7 @@ struct mem_vector {
u64 size;
};
-#if CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_RANDOMIZE_BASE
/* kaslr.c */
void choose_random_location(unsigned long input,
unsigned long input_size,
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev.c
index 82041bd380e5..670e998fe930 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -13,7 +13,7 @@
#include "misc.h"
#include <asm/pgtable_types.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
#include <asm/trapnr.h>
#include <asm/trap_pf.h>
#include <asm/msr-index.h>
@@ -117,7 +117,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
#include "../../lib/insn.c"
/* Include code for early handlers */
-#include "../../kernel/sev-es-shared.c"
+#include "../../kernel/sev-shared.c"
static bool early_setup_sev_es(void)
{
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 28a1423ce32e..4bbc267fb36b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -447,7 +447,7 @@
440 i386 process_madvise sys_process_madvise
441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 i386 mount_setattr sys_mount_setattr
-443 i386 quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 i386 landlock_create_ruleset sys_landlock_create_ruleset
445 i386 landlock_add_rule sys_landlock_add_rule
446 i386 landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d05..ce18119ea0d0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e509325c2c3..8f71dd72ef95 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -396,10 +396,12 @@ int x86_reserve_hardware(void)
if (!atomic_inc_not_zero(&pmc_refcount)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&pmc_refcount) == 0) {
- if (!reserve_pmc_hardware())
+ if (!reserve_pmc_hardware()) {
err = -EBUSY;
- else
+ } else {
reserve_ds_buffers();
+ reserve_lbr_buffers();
+ }
}
if (!err)
atomic_inc(&pmc_refcount);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2521d03de5e0..e28892270c58 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6253,7 +6253,7 @@ __init int intel_pmu_init(void)
* Check all LBT MSR here.
* Disable LBR access if any LBR MSRs can not be accessed.
*/
- if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+ if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
x86_pmu.lbr_nr = 0;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 76dbab6ac9fb..4409d2cccfda 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel)
void intel_pmu_lbr_add(struct perf_event *event)
{
- struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu.lbr_nr)
@@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event)
perf_sched_cb_inc(event->ctx->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
intel_pmu_lbr_reset();
-
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- kmem_cache && !cpuc->lbr_xsave &&
- (cpuc->lbr_users != cpuc->lbr_pebs_users))
- cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL);
}
void release_lbr_buffers(void)
@@ -722,6 +716,26 @@ void release_lbr_buffers(void)
}
}
+void reserve_lbr_buffers(void)
+{
+ struct kmem_cache *kmem_cache;
+ struct cpu_hw_events *cpuc;
+ int cpu;
+
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+ kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
+ if (!kmem_cache || cpuc->lbr_xsave)
+ continue;
+
+ cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+}
+
void intel_pmu_lbr_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 63f097289a84..3a75a2c601c2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
+ if (die_id < 0)
+ die_id = -ENODEV;
map->pbus_to_dieid[bus] = die_id;
break;
}
@@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
i = -1;
if (reverse) {
for (bus = 255; bus >= 0; bus--) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
}
} else {
for (bus = 0; bus <= 255; bus++) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
@@ -5097,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = {
.perf_ctr = SNR_M2M_PCI_PMON_CTR0,
.event_ctl = SNR_M2M_PCI_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT,
.box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
.ops = &snr_m2m_uncore_pci_ops,
- .format_group = &skx_uncore_format_group,
+ .format_group = &snr_m2m_uncore_format_group,
};
static struct attribute *icx_upi_uncore_formats_attr[] = {
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 27fa85e7d4fd..ad87cb36f7c8 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void);
void release_lbr_buffers(void);
+void reserve_lbr_buffers(void);
+
extern struct event_constraint bts_constraint;
extern struct event_constraint vlbr_constraint;
@@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void)
{
}
+static inline void reserve_lbr_buffers(void)
+{
+}
+
static inline int intel_pmu_init(void)
{
return 0;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 412b51e059c8..48067af94678 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void)
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index b7dd944dc867..8f28fafa98b3 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,11 +56,8 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
-#ifdef CONFIG_IOMMU_SUPPORT
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
+/* Force disable because it's broken beyond repair */
+#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
#ifdef CONFIG_X86_SGX
# define DISABLE_SGX 0
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index ed33a14188f6..23bef08a8388 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
*/
#define PASID_DISABLED 0
-#ifdef CONFIG_IOMMU_SUPPORT
-/* Update current's PASID MSR/state by mm's PASID. */
-void update_pasid(void);
-#else
static inline void update_pasid(void) { }
-#endif
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 8d33ad80704f..ceeba9f63172 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -584,13 +584,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
pkru_val = pk->pkru;
}
__write_pkru(pkru_val);
-
- /*
- * Expensive PASID MSR write will be avoided in update_pasid() because
- * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
- * unless it's different from mm->pasid to reduce overhead.
- */
- update_pasid();
}
#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 323641097f63..e7bef91cee04 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
KVM_X86_OP_NULL(vcpu_unblocking)
KVM_X86_OP_NULL(update_pi_irte)
+KVM_X86_OP_NULL(start_assignment)
KVM_X86_OP_NULL(apicv_post_state_restore)
KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt)
KVM_X86_OP_NULL(set_hv_timer)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cbbcee0a84f9..9c7ced0e3171 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
#define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
@@ -199,6 +200,7 @@ enum x86_intercept_stage;
#define KVM_NR_DB_REGS 4
+#define DR6_BUS_LOCK (1 << 11)
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
#define DR6_BT (1 << 15)
@@ -212,7 +214,7 @@ enum x86_intercept_stage;
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
*/
#define DR6_ACTIVE_LOW 0xffff0ff0
-#define DR6_VOLATILE 0x0001e00f
+#define DR6_VOLATILE 0x0001e80f
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
#define DR7_BP_EN_MASK 0x000000ff
@@ -407,7 +409,7 @@ struct kvm_mmu {
u32 pkru_mask;
u64 *pae_root;
- u64 *lm_root;
+ u64 *pml4_root;
/*
* check zero bits on shadow page table entries, these
@@ -1350,6 +1352,7 @@ struct kvm_x86_ops {
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
+ void (*start_assignment)(struct kvm *kvm);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
@@ -1417,6 +1420,7 @@ struct kvm_arch_async_pf {
bool direct_map;
};
+extern u32 __read_mostly kvm_nr_uret_msrs;
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
@@ -1775,9 +1779,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+ return kvm_find_user_return_msr(msr) >= 0;
+}
+
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 338119852512..69299878b200 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,8 +7,6 @@
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
-extern void kvmclock_init(void);
-
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
@@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
}
#ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
return 0;
}
-static inline void kvm_disable_steal_time(void)
-{
- return;
-}
-
static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
return false;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 742d89a00721..211ba3375ee9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -537,9 +537,9 @@
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
-#define MSR_K8_SYSCFG 0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG 0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 154321d29050..556b2b17c3e2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow);
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
+extern u32 amd_get_highest_perf(void);
#else
static inline u32 amd_get_nodes_per_socket(void) { return 0; }
+static inline u32 amd_get_highest_perf(void) { return 0; }
#endif
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
new file mode 100644
index 000000000000..629c3df243f0
--- /dev/null
+++ b/arch/x86/include/asm/sev-common.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header common between the guest and the hypervisor.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __ASM_X86_SEV_COMMON_H
+#define __ASM_X86_SEV_COMMON_H
+
+#define GHCB_MSR_INFO_POS 0
+#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP 0x001
+#define GHCB_MSR_SEV_INFO_REQ 0x002
+#define GHCB_MSR_VER_MAX_POS 48
+#define GHCB_MSR_VER_MAX_MASK 0xffff
+#define GHCB_MSR_VER_MIN_POS 32
+#define GHCB_MSR_VER_MIN_MASK 0xffff
+#define GHCB_MSR_CBIT_POS 24
+#define GHCB_MSR_CBIT_MASK 0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
+ ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \
+ (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \
+ (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \
+ GHCB_MSR_SEV_INFO_RESP)
+#define GHCB_MSR_INFO(v) ((v) & 0xfffUL)
+#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
+#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
+
+#define GHCB_MSR_CPUID_REQ 0x004
+#define GHCB_MSR_CPUID_RESP 0x005
+#define GHCB_MSR_CPUID_FUNC_POS 32
+#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS 32
+#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff
+#define GHCB_MSR_CPUID_REG_POS 30
+#define GHCB_MSR_CPUID_REG_MASK 0x3
+#define GHCB_CPUID_REQ_EAX 0
+#define GHCB_CPUID_REQ_EBX 1
+#define GHCB_CPUID_REQ_ECX 2
+#define GHCB_CPUID_REQ_EDX 3
+#define GHCB_CPUID_REQ(fn, reg) \
+ (GHCB_MSR_CPUID_REQ | \
+ (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
+ (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
+
+#define GHCB_MSR_TERM_REQ 0x100
+#define GHCB_MSR_TERM_REASON_SET_POS 12
+#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
+#define GHCB_MSR_TERM_REASON_POS 16
+#define GHCB_MSR_TERM_REASON_MASK 0xff
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \
+ (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \
+ ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS))
+
+#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0
+#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1
+
+#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
+
+#endif
diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev.h
index cf1d957c7091..fa5cd05d3b5b 100644
--- a/arch/x86/include/asm/sev-es.h
+++ b/arch/x86/include/asm/sev.h
@@ -10,34 +10,12 @@
#include <linux/types.h>
#include <asm/insn.h>
+#include <asm/sev-common.h>
-#define GHCB_SEV_INFO 0x001UL
-#define GHCB_SEV_INFO_REQ 0x002UL
-#define GHCB_INFO(v) ((v) & 0xfffUL)
-#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL)
-#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL)
-#define GHCB_PROTO_OUR 0x0001UL
-#define GHCB_SEV_CPUID_REQ 0x004UL
-#define GHCB_CPUID_REQ_EAX 0
-#define GHCB_CPUID_REQ_EBX 1
-#define GHCB_CPUID_REQ_ECX 2
-#define GHCB_CPUID_REQ_EDX 3
-#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
- (((unsigned long)reg & 3) << 30) | \
- (((unsigned long)fn) << 32))
+#define GHCB_PROTO_OUR 0x0001UL
+#define GHCB_PROTOCOL_MAX 1ULL
+#define GHCB_DEFAULT_USAGE 0ULL
-#define GHCB_PROTOCOL_MAX 0x0001UL
-#define GHCB_DEFAULT_USAGE 0x0000UL
-
-#define GHCB_SEV_CPUID_RESP 0x005UL
-#define GHCB_SEV_TERMINATE 0x100UL
-#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \
- (((((u64)reason_set) & 0x7) << 12) | \
- ((((u64)reason_val) & 0xff) << 16))
-#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0
-#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1
-
-#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff)
#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
enum es_result {
diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
index ddbdefd5b94f..91a7b6687c3b 100644
--- a/arch/x86/include/asm/thermal.h
+++ b/arch/x86/include/asm/thermal.h
@@ -3,11 +3,13 @@
#define _ASM_X86_THERMAL_H
#ifdef CONFIG_X86_THERMAL_VECTOR
+void therm_lvt_init(void);
void intel_init_thermal(struct cpuinfo_x86 *c);
bool x86_thermal_enabled(void);
void intel_thermal_interrupt(void);
#else
-static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
+static inline void therm_lvt_init(void) { }
+static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
#endif
#endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h
index 119ac8612d89..136e5e57cfe1 100644
--- a/arch/x86/include/asm/vdso/clocksource.h
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -7,4 +7,6 @@
VDSO_CLOCKMODE_PVCLOCK, \
VDSO_CLOCKMODE_HVCLOCK
+#define HAVE_VDSO_CLOCKMODE_HVCLOCK
+
#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..0662f644aad9 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags;
} smm;
+ __u16 pad;
+
__u32 flags;
__u64 preemption_timer_deadline;
};
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0704c2a94272..0f66682ac02a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
-CFLAGS_REMOVE_sev-es.o = -pg
+CFLAGS_REMOVE_sev.o = -pg
endif
KASAN_SANITIZE_head$(BITS).o := n
@@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
-KASAN_SANITIZE_sev-es.o := n
+KASAN_SANITIZE_sev.o := n
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
@@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
-obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b5174495..6fe5b44fcbc9 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -183,41 +183,69 @@ done:
}
/*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+ unsigned long flags;
+ int i = off, nnops;
+
+ while (i < instrlen) {
+ if (instr[i] != 0x90)
+ break;
+
+ i++;
+ }
+
+ nnops = i - off;
+
+ if (nnops <= 1)
+ return nnops;
+
+ local_irq_save(flags);
+ add_nops(instr + off, nnops);
+ local_irq_restore(flags);
+
+ DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+ return nnops;
+}
+
+/*
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
- unsigned long flags;
struct insn insn;
- int nop, i = 0;
+ int i = 0;
/*
- * Jump over the non-NOP insns, the remaining bytes must be single-byte
- * NOPs, optimize them.
+ * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+ * ones.
*/
for (;;) {
if (insn_decode_kernel(&insn, &instr[i]))
return;
+ /*
+ * See if this and any potentially following NOPs can be
+ * optimized.
+ */
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
- break;
-
- if ((i += insn.length) >= a->instrlen)
- return;
- }
+ i += optimize_nops_range(instr, a->instrlen, i);
+ else
+ i += insn.length;
- for (nop = i; i < a->instrlen; i++) {
- if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+ if (i >= a->instrlen)
return;
}
-
- local_irq_save(flags);
- add_nops(instr + nop, i - nop);
- local_irq_restore(flags);
-
- DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
- instr, nop, a->instrlen);
}
/*
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4a39fb429f15..d262811ce14b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode)
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
+ lapic_update_legacy_vectors();
}
#ifdef CONFIG_UP_LATE_INIT
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6dbdc7c22bb7..fb67ed5e7e6a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace)
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
}
+void __init lapic_update_legacy_vectors(void)
+{
+ unsigned int i;
+
+ if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
+ return;
+
+ /*
+ * If the IO/APIC is disabled via config, kernel command line or
+ * lack of enumeration then all legacy interrupts are routed
+ * through the PIC. Make sure that they are marked as legacy
+ * vectors. PIC_CASCADE_IRQ has already been marked in
+ * lapic_assign_system_vectors().
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ if (i != PIC_CASCADE_IR)
+ lapic_assign_legacy_vector(i, true);
+ }
+}
+
void __init lapic_assign_system_vectors(void)
{
unsigned int i, vector = 0;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d11384dc9ab..c06ac56eae4d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
*/
if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
/* Check if memory encryption is enabled */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
goto clear_all;
/*
@@ -1165,3 +1165,19 @@ void set_dr_addr_mask(unsigned long mask, int dr)
break;
}
}
+
+u32 amd_get_highest_perf(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+ (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+ return 166;
+
+ if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+ (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+ return 166;
+
+ return 255;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0c3b372318b7..b5f43049fa5f 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void)
if (boot_cpu_data.x86 < 0xf)
return 0;
/* In case some hypervisor doesn't pass SYSCFG through: */
- if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+ if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0)
return 0;
/*
* Memory between 4GB and top of mem is forced WB by this magic bit.
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index b90f3f437765..558108296f3c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
- rdmsr(MSR_K8_SYSCFG, lo, hi);
+ rdmsr(MSR_AMD64_SYSCFG, lo, hi);
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
" not cleared by BIOS, clearing this bit\n",
smp_processor_id());
lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
- mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
+ mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi);
}
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 3ef5868ac588..7aecb2fc3186 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
- fallthrough;
+ break;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_PERFCTR0;
@@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
- fallthrough;
+ break;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index a85c64000218..d0eef963aad1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
-
-#ifdef CONFIG_IOMMU_SUPPORT
-void update_pasid(void)
-{
- u64 pasid_state;
- u32 pasid;
-
- if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
- return;
-
- if (!current->mm)
- return;
-
- pasid = READ_ONCE(current->mm->pasid);
- /* Set the valid bit in the PASID MSR/state only for valid pasid. */
- pasid_state = pasid == PASID_DISABLED ?
- pasid : pasid | MSR_IA32_PASID_VALID;
-
- /*
- * No need to hold fregs_lock() since the task's fpstate won't
- * be changed by others (e.g. ptrace) while the task is being
- * switched to or is in IPI.
- */
- if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
- /* The MSR is active and can be directly updated. */
- wrmsrl(MSR_IA32_PASID, pasid_state);
- } else {
- struct fpu *fpu = &current->thread.fpu;
- struct ia32_pasid_state *ppasid_state;
- struct xregs_state *xsave;
-
- /*
- * The CPU's xstate registers are not currently active. Just
- * update the PASID state in the memory buffer here. The
- * PASID MSR will be loaded when returning to user mode.
- */
- xsave = &fpu->state.xsave;
- xsave->header.xfeatures |= XFEATURE_MASK_PASID;
- ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
- /*
- * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
- * won't be NULL and no need to check its value.
- *
- * Only update the task's PASID state when it's different
- * from the mm's pasid.
- */
- if (ppasid_state->pasid != pasid_state) {
- /*
- * Invalid fpregs so that state restoring will pick up
- * the PASID state.
- */
- __fpu_invalidate_fpregs_state(fpu);
- ppasid_state->pasid = pasid_state;
- }
- }
-}
-#endif /* CONFIG_IOMMU_SUPPORT */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 18be44163a50..de01903c3735 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,7 +39,7 @@
#include <asm/realmode.h>
#include <asm/extable.h>
#include <asm/trapnr.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
/*
* Manage page tables very early on.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d307c22e5c18..a26643dc6bd6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,6 +26,7 @@
#include <linux/kprobes.h>
#include <linux/nmi.h>
#include <linux/swait.h>
+#include <linux/syscore_ops.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -37,6 +38,7 @@
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
#include <asm/ptrace.h>
+#include <asm/reboot.h>
#include <asm/svm.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
- pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
+ pr_info("setup async PF for cpu %d\n", smp_processor_id());
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
- pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+ pr_info("disable async PF for cpu %d\n", smp_processor_id());
}
-static void kvm_pv_guest_cpu_reboot(void *unused)
+static void kvm_disable_steal_time(void)
{
- /*
- * We disable PV EOI before we load a new kernel by kexec,
- * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
- * New kernel can re-enable when it boots.
- */
- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
- wrmsrl(MSR_KVM_PV_EOI_EN, 0);
- kvm_pv_disable_apf();
- kvm_disable_steal_time();
-}
+ if (!has_steal_clock)
+ return;
-static int kvm_pv_reboot_notify(struct notifier_block *nb,
- unsigned long code, void *unused)
-{
- if (code == SYS_RESTART)
- on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
- return NOTIFY_DONE;
+ wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}
-static struct notifier_block kvm_pv_reboot_nb = {
- .notifier_call = kvm_pv_reboot_notify,
-};
-
static u64 kvm_steal_clock(int cpu)
{
u64 steal;
@@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
return steal;
}
-void kvm_disable_steal_time(void)
-{
- if (!has_steal_clock)
- return;
-
- wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
{
early_set_memory_decrypted((unsigned long) ptr, size);
@@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
}
}
+static void kvm_guest_cpu_offline(bool shutdown)
+{
+ kvm_disable_steal_time();
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
+ if (!shutdown)
+ apf_task_wake_all();
+ kvmclock_disable();
+}
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kvm_guest_cpu_init();
+ local_irq_restore(flags);
+ return 0;
+}
+
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void)
kvm_spinlock_init();
}
-static void kvm_guest_cpu_offline(void)
+static int kvm_cpu_down_prepare(unsigned int cpu)
{
- kvm_disable_steal_time();
- if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
- wrmsrl(MSR_KVM_PV_EOI_EN, 0);
- kvm_pv_disable_apf();
- apf_task_wake_all();
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kvm_guest_cpu_offline(false);
+ local_irq_restore(flags);
+ return 0;
}
-static int kvm_cpu_online(unsigned int cpu)
+#endif
+
+static int kvm_suspend(void)
{
- local_irq_disable();
- kvm_guest_cpu_init();
- local_irq_enable();
+ kvm_guest_cpu_offline(false);
+
return 0;
}
-static int kvm_cpu_down_prepare(unsigned int cpu)
+static void kvm_resume(void)
{
- local_irq_disable();
- kvm_guest_cpu_offline();
- local_irq_enable();
- return 0;
+ kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+ .suspend = kvm_suspend,
+ .resume = kvm_resume,
+};
+
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+ kvm_guest_cpu_offline(true);
+}
+
+static int kvm_pv_reboot_notify(struct notifier_block *nb,
+ unsigned long code, void *unused)
+{
+ if (code == SYS_RESTART)
+ on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
+ return NOTIFY_DONE;
}
+static struct notifier_block kvm_pv_reboot_nb = {
+ .notifier_call = kvm_pv_reboot_notify,
+};
+
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+ kvm_guest_cpu_offline(true);
+ native_machine_crash_shutdown(regs);
+}
#endif
static void __init kvm_guest_init(void)
@@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
kvm_guest_cpu_init();
#endif
+#ifdef CONFIG_KEXEC_CORE
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
+ register_syscore_ops(&kvm_syscore_ops);
+
/*
* Hard lockup detection is enabled by default. Disable it, as guests
* can get false positives too easily, for example if the host is
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d37ed4e1d033..ad273e5861c1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -20,7 +20,6 @@
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
-#include <asm/reboot.h>
#include <asm/kvmclock.h>
static int kvmclock __initdata = 1;
@@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
}
#endif
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
- native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_crash_shutdown(regs);
-}
-#endif
-
-static void kvm_shutdown(void)
+void kvmclock_disable(void)
{
native_write_msr(msr_kvm_system_time, 0, 0);
- kvm_disable_steal_time();
- native_machine_shutdown();
}
static void __init kvmclock_init_mem(void)
@@ -351,10 +331,6 @@ void __init kvmclock_init(void)
#endif
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
- machine_ops.shutdown = kvm_shutdown;
-#ifdef CONFIG_KEXEC_CORE
- machine_ops.crash_shutdown = kvm_crash_shutdown;
-#endif
kvm_get_preset_lpj();
/*
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index b5cb49e57df8..c94dec6a1834 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void)
return;
/* SYS_CFG */
- address = MSR_K8_SYSCFG;
+ address = MSR_AMD64_SYSCFG;
rdmsrl(address, val);
/* TOP_MEM2 is not enabled? */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2ef961cf4cfc..4bce802d25fb 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -33,7 +33,7 @@
#include <asm/reboot.h>
#include <asm/cache.h>
#include <asm/nospec-branch.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 72920af0b3c0..1e720626069a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -44,6 +44,7 @@
#include <asm/pci-direct.h>
#include <asm/prom.h>
#include <asm/proto.h>
+#include <asm/thermal.h>
#include <asm/unwind.h>
#include <asm/vsyscall.h>
#include <linux/vmalloc.h>
@@ -637,11 +638,11 @@ static void __init trim_snb_memory(void)
* them from accessing certain memory ranges, namely anything below
* 1M and in the pages listed in bad_pages[] above.
*
- * To avoid these pages being ever accessed by SNB gfx devices
- * reserve all memory below the 1 MB mark and bad_pages that have
- * not already been reserved at boot time.
+ * To avoid these pages being ever accessed by SNB gfx devices reserve
+ * bad_pages that have not already been reserved at boot time.
+ * All memory below the 1 MB mark is anyway reserved later during
+ * setup_arch(), so there is no need to reserve it here.
*/
- memblock_reserve(0, 1<<20);
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
@@ -733,14 +734,14 @@ static void __init early_reserve_memory(void)
* The first 4Kb of memory is a BIOS owned area, but generally it is
* not listed as such in the E820 table.
*
- * Reserve the first memory page and typically some additional
- * memory (64KiB by default) since some BIOSes are known to corrupt
- * low memory. See the Kconfig help text for X86_RESERVE_LOW.
+ * Reserve the first 64K of memory since some BIOSes are known to
+ * corrupt low memory. After the real mode trampoline is allocated the
+ * rest of the memory below 640k is reserved.
*
* In addition, make sure page 0 is always reserved because on
* systems with L1TF its contents can be leaked to user processes.
*/
- memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+ memblock_reserve(0, SZ_64K);
early_reserve_initrd();
@@ -751,6 +752,7 @@ static void __init early_reserve_memory(void)
reserve_ibft_region();
reserve_bios_regions();
+ trim_snb_memory();
}
/*
@@ -1081,14 +1083,20 @@ void __init setup_arch(char **cmdline_p)
(max_pfn_mapped<<PAGE_SHIFT) - 1);
#endif
- reserve_real_mode();
-
/*
- * Reserving memory causing GPU hangs on Sandy Bridge integrated
- * graphics devices should be done after we allocated memory under
- * 1M for the real mode trampoline.
+ * Find free memory for the real mode trampoline and place it
+ * there.
+ * If there is not enough free memory under 1M, on EFI-enabled
+ * systems there will be additional attempt to reclaim the memory
+ * for the real mode trampoline at efi_free_boot_services().
+ *
+ * Unconditionally reserve the entire first 1M of RAM because
+ * BIOSes are know to corrupt low memory and several
+ * hundred kilobytes are not worth complex detection what memory gets
+ * clobbered. Moreover, on machines with SandyBridge graphics or in
+ * setups that use crashkernel the entire 1M is reserved anyway.
*/
- trim_snb_memory();
+ reserve_real_mode();
init_mem_mapping();
@@ -1226,6 +1234,14 @@ void __init setup_arch(char **cmdline_p)
x86_init.timers.wallclock_init();
+ /*
+ * This needs to run before setup_local_APIC() which soft-disables the
+ * local APIC temporarily and that masks the thermal LVT interrupt,
+ * leading to softlockups on machines which have configured SMI
+ * interrupt delivery.
+ */
+ therm_lvt_init();
+
mcheck_init();
register_refined_jiffies(CLOCK_TICK_RATE);
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-shared.c
index 0aa9f13efd57..9f90f460a28c 100644
--- a/arch/x86/kernel/sev-es-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void)
static void __noreturn sev_es_terminate(unsigned int reason)
{
- u64 val = GHCB_SEV_TERMINATE;
+ u64 val = GHCB_MSR_TERM_REQ;
/*
* Tell the hypervisor what went wrong - only reason-set 0 is
* currently supported.
*/
- val |= GHCB_SEV_TERMINATE_REASON(0, reason);
+ val |= GHCB_SEV_TERM_REASON(0, reason);
/* Request Guest Termination from Hypvervisor */
sev_es_wr_ghcb_msr(val);
@@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void)
u64 val;
/* Do the GHCB protocol version negotiation */
- sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
+ sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
VMGEXIT();
val = sev_es_rd_ghcb_msr();
- if (GHCB_INFO(val) != GHCB_SEV_INFO)
+ if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
return false;
- if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
- GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
return false;
return true;
@@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void)
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
+ ghcb->save.sw_exit_code = 0;
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
@@ -153,28 +154,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
VMGEXIT();
val = sev_es_rd_ghcb_msr();
- if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
goto fail;
regs->ax = val >> 32;
sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
VMGEXIT();
val = sev_es_rd_ghcb_msr();
- if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
goto fail;
regs->bx = val >> 32;
sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
VMGEXIT();
val = sev_es_rd_ghcb_msr();
- if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
goto fail;
regs->cx = val >> 32;
sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
VMGEXIT();
val = sev_es_rd_ghcb_msr();
- if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
goto fail;
regs->dx = val >> 32;
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev.c
index 73873b007838..651b81cd648e 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev.c
@@ -22,7 +22,7 @@
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
#include <asm/insn-eval.h>
#include <asm/fpu/internal.h>
#include <asm/processor.h>
@@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
if (unlikely(data->ghcb_active)) {
/* GHCB is already in use - save its contents */
- if (unlikely(data->backup_ghcb_active))
- return NULL;
+ if (unlikely(data->backup_ghcb_active)) {
+ /*
+ * Backup-GHCB is also already in use. There is no way
+ * to continue here so just kill the machine. To make
+ * panic() work, mark GHCBs inactive so that messages
+ * can be printed out.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ }
/* Mark backup_ghcb active before writing to it */
data->backup_ghcb_active = true;
@@ -221,24 +231,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
return ghcb;
}
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
-{
- struct sev_es_runtime_data *data;
- struct ghcb *ghcb;
-
- data = this_cpu_read(runtime_data);
- ghcb = &data->ghcb_page;
-
- if (state->ghcb) {
- /* Restore GHCB from Backup */
- *ghcb = *state->ghcb;
- data->backup_ghcb_active = false;
- state->ghcb = NULL;
- } else {
- data->ghcb_active = false;
- }
-}
-
/* Needed in vc_early_forward_exception */
void do_early_exception(struct pt_regs *regs, int trapnr);
@@ -323,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
- /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
- if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
- memcpy(dst, buf, size);
- return ES_OK;
- }
-
+ /*
+ * This function uses __put_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __put_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __put_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_to_user() here because
+ * vc_write_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
switch (size) {
case 1:
memcpy(&d1, buf, 1);
- if (put_user(d1, target))
+ if (__put_user(d1, target))
goto fault;
break;
case 2:
memcpy(&d2, buf, 2);
- if (put_user(d2, target))
+ if (__put_user(d2, target))
goto fault;
break;
case 4:
memcpy(&d4, buf, 4);
- if (put_user(d4, target))
+ if (__put_user(d4, target))
goto fault;
break;
case 8:
memcpy(&d8, buf, 8);
- if (put_user(d8, target))
+ if (__put_user(d8, target))
goto fault;
break;
default:
@@ -378,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
- /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
- if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
- memcpy(buf, src, size);
- return ES_OK;
- }
-
+ /*
+ * This function uses __get_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __get_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __get_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_from_user() here because
+ * vc_read_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
switch (size) {
case 1:
- if (get_user(d1, s))
+ if (__get_user(d1, s))
goto fault;
memcpy(buf, &d1, 1);
break;
case 2:
- if (get_user(d2, s))
+ if (__get_user(d2, s))
goto fault;
memcpy(buf, &d2, 2);
break;
case 4:
- if (get_user(d4, s))
+ if (__get_user(d4, s))
goto fault;
memcpy(buf, &d4, 4);
break;
case 8:
- if (get_user(d8, s))
+ if (__get_user(d8, s))
goto fault;
memcpy(buf, &d8, 8);
break;
@@ -459,7 +477,30 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
}
/* Include code shared with pre-decompression boot stage */
-#include "sev-es-shared.c"
+#include "sev-shared.c"
+
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ /*
+ * Invalidate the GHCB so a VMGEXIT instruction issued
+ * from userspace won't appear to be valid.
+ */
+ vc_ghcb_invalidate(ghcb);
+ data->ghcb_active = false;
+ }
+}
void noinstr __sev_es_nmi_complete(void)
{
@@ -1255,6 +1296,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
case X86_TRAP_UD:
exc_invalid_op(ctxt->regs);
break;
+ case X86_TRAP_PF:
+ write_cr2(ctxt->fi.cr2);
+ exc_page_fault(ctxt->regs, error_code);
+ break;
case X86_TRAP_AC:
exc_alignment_check(ctxt->regs, error_code);
break;
@@ -1284,7 +1329,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
*/
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{
- struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
@@ -1310,16 +1354,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
*/
ghcb = sev_es_get_ghcb(&state);
- if (!ghcb) {
- /*
- * Mark GHCBs inactive so that panic() is able to print the
- * message.
- */
- data->ghcb_active = false;
- data->backup_ghcb_active = false;
-
- panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
- }
vc_ghcb_invalidate(ghcb);
result = vc_init_em_ctxt(&ctxt, regs, error_code);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 0e5d0a7e203b..06743ec054d2 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10);
+
BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
@@ -138,8 +141,10 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
- BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18);
- BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14);
CHECK_CSI_OFFSET(_sigpoll);
CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0ad5214f598a..7770245cc7fa 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void)
return false;
}
- highest_perf = perf_caps.highest_perf;
+ highest_perf = amd_get_highest_perf();
nominal_perf = perf_caps.nominal_perf;
if (!highest_perf || !nominal_perf) {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 19606a341888..b4da665bb892 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC)
+ F(SGX_LC) | F(BUS_LOCK_DETECT)
);
/* Set LA57 based on hardware capability. */
if (cpuid_ecx(7) & F(LA57))
@@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
F(PMM) | F(PMM_EN)
);
+
+ /*
+ * Hide RDTSCP and RDPID if either feature is reported as supported but
+ * probing MSR_TSC_AUX failed. This is purely a sanity check and
+ * should never happen, but the guest will likely crash if RDTSCP or
+ * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
+ * the past. For example, the sanity check may fire if this instance of
+ * KVM is running as L1 on top of an older, broken KVM.
+ */
+ if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
+ kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
+ !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
+ kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+ kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+ }
}
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
@@ -637,8 +652,10 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
entry->eax = 0;
- entry->ecx = F(RDPID);
+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+ entry->ecx = F(RDPID);
++array->nent;
+ break;
default:
break;
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 77e1c89a95a7..5e5de05a8fbf 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
* from the register case of group9.
*/
static const struct gprefix pfx_0f_c7_7 = {
- N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
};
@@ -5111,7 +5111,7 @@ done:
return rc;
}
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
{
int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode;
@@ -5322,7 +5322,8 @@ done_prefixes:
ctxt->execute = opcode.u.execute;
- if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
+ if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
+ likely(!(ctxt->d & EmulateOnUD)))
return EMULATION_FAILED;
if (unlikely(ctxt->d &
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f98370a39936..f00830e5202f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1172,6 +1172,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
{
struct kvm_hv *hv = to_kvm_hv(kvm);
u64 gfn;
+ int idx;
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
@@ -1190,9 +1191,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
hv->tsc_ref.tsc_sequence = 0;
+
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&kvm->srcu);
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+ srcu_read_unlock(&kvm->srcu, idx);
out_unlock:
mutex_unlock(&hv->hv_lock);
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0d359115429a..3e870bf9ca4d 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -314,7 +314,6 @@ struct x86_emulate_ctxt {
int interruptibility;
bool perm_ok; /* do not check permissions if true */
- bool ud; /* inject an #UD if host doesn't support insn */
bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
@@ -468,6 +467,7 @@ enum x86_intercept {
x86_intercept_clgi,
x86_intercept_skinit,
x86_intercept_rdtscp,
+ x86_intercept_rdpid,
x86_intercept_icebp,
x86_intercept_wbinvd,
x86_intercept_monitor,
@@ -490,7 +490,7 @@ enum x86_intercept {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type);
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_FAILED -1
#define EMULATION_OK 0
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 152591f9243a..17fa4ab1b834 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
+ if (alignment + len > 4)
+ return 1;
+
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
@@ -1494,6 +1497,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
static void cancel_hv_timer(struct kvm_lapic *apic);
+static void cancel_apic_timer(struct kvm_lapic *apic)
+{
+ hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
+}
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1502,11 +1514,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic->lapic_timer.timer_mode != timer_mode) {
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
- hrtimer_cancel(&apic->lapic_timer.timer);
- preempt_disable();
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_timer(apic);
- preempt_enable();
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -1598,11 +1606,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
+ if (lapic_timer_advance_dynamic) {
+ adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
+ /*
+ * If the timer fired early, reread the TSC to account for the
+ * overhead of the above adjustment to avoid waiting longer
+ * than is necessary.
+ */
+ if (guest_tsc < tsc_deadline)
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+ }
+
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
-
- if (lapic_timer_advance_dynamic)
- adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
@@ -1661,7 +1677,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
}
atomic_inc(&apic->lapic_timer.pending);
- kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
if (from_timer_fn)
kvm_vcpu_kick(vcpu);
}
@@ -1913,8 +1929,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
WARN_ON(rcuwait_active(&vcpu->wait));
- cancel_hv_timer(apic);
apic_timer_expired(apic, false);
+ cancel_hv_timer(apic);
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
advance_periodic_target_expiration(apic);
@@ -2084,7 +2100,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
if (apic_lvtt_tscdeadline(apic))
break;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
break;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4b3ee244ebe0..8d5876dfc6b7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
- if (WARN_ON_ONCE(!mmu->lm_root)) {
+ if (WARN_ON_ONCE(!mmu->pml4_root)) {
r = -EIO;
goto out_unlock;
}
- mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
+ mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
}
for (i = 0; i < 4; ++i) {
@@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
}
if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
- mmu->root_hpa = __pa(mmu->lm_root);
+ mmu->root_hpa = __pa(mmu->pml4_root);
else
mmu->root_hpa = __pa(mmu->pae_root);
@@ -3350,7 +3350,7 @@ out_unlock:
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
- u64 *lm_root, *pae_root;
+ u64 *pml4_root, *pae_root;
/*
* When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
return -EIO;
- if (mmu->pae_root && mmu->lm_root)
+ if (mmu->pae_root && mmu->pml4_root)
return 0;
/*
* The special roots should always be allocated in concert. Yell and
* bail if KVM ends up in a state where only one of the roots is valid.
*/
- if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
+ if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
return -EIO;
/*
@@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
if (!pae_root)
return -ENOMEM;
- lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
- if (!lm_root) {
+ pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+ if (!pml4_root) {
free_page((unsigned long)pae_root);
return -ENOMEM;
}
mmu->pae_root = pae_root;
- mmu->lm_root = lm_root;
+ mmu->pml4_root = pml4_root;
return 0;
}
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
context->inject_page_fault = kvm_inject_page_fault;
}
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+ /*
+ * Nested MMUs are used only for walking L2's gva->gpa, they never have
+ * shadow pages of their own and so "direct" has no meaning. Set it
+ * to "true" to try to detect bogus usage of the nested MMU.
+ */
+ role.base.direct = true;
+
+ if (!is_paging(vcpu))
+ role.base.level = 0;
+ else if (is_long_mode(vcpu))
+ role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+ PT64_ROOT_4LEVEL;
+ else if (is_pae(vcpu))
+ role.base.level = PT32E_ROOT_LEVEL;
+ else
+ role.base.level = PT32_ROOT_LEVEL;
+
+ return role;
+}
+
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
- union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+ union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
if (new_role.as_u64 == g_context->mmu_role.as_u64)
@@ -5261,7 +5285,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
if (!tdp_enabled && mmu->pae_root)
set_memory_encrypted((unsigned long)mmu->pae_root, 1);
free_page((unsigned long)mmu->pae_root);
- free_page((unsigned long)mmu->lm_root);
+ free_page((unsigned long)mmu->pml4_root);
}
static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 70b7e44e3035..823a5919f9fa 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -90,8 +90,8 @@ struct guest_walker {
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
bool pte_writable[PT_MAX_FULL_LEVELS];
- unsigned pt_access;
- unsigned pte_access;
+ unsigned int pt_access[PT_MAX_FULL_LEVELS];
+ unsigned int pte_access;
gfn_t gfn;
struct x86_exception fault;
};
@@ -418,13 +418,15 @@ retry_walk:
}
walker->ptes[walker->level - 1] = pte;
+
+ /* Convert to ACC_*_MASK flags for struct guest_walker. */
+ walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
} while (!is_last_gpte(mmu, walker->level, pte));
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
- walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
@@ -463,7 +465,8 @@ retry_walk:
}
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
- __func__, (u64)pte, walker->pte_access, walker->pt_access);
+ __func__, (u64)pte, walker->pte_access,
+ walker->pt_access[walker->level - 1]);
return 1;
error:
@@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
- unsigned direct_access, access = gw->pt_access;
+ unsigned int direct_access, access;
int top_level, level, req_level, ret;
gfn_t base_gfn = gw->gfn;
@@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
sp = NULL;
if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2];
+ access = gw->pt_access[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
false, access);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 88f69a6cc492..237317b1eddd 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
}
/**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
* @kvm: kvm instance
* @as_id: the address space of the paging structure the SPTE was a part of
* @gfn: the base GFN that was mapped by the SPTE
@@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+ if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+ if (is_large_pte(old_spte))
+ atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+ else
+ atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+ }
+
/*
* The only times a SPTE should be changed from a non-present to
* non-present state is when an MMIO entry is installed/modified/
@@ -1009,6 +1016,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
}
if (!is_shadow_present_pte(iter.old_spte)) {
+ /*
+ * If SPTE has been forzen by another thread, just
+ * give up and retry, avoiding unnecessary page table
+ * allocation and free.
+ */
+ if (is_removed_spte(iter.old_spte))
+ break;
+
sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
child_pt = sp->spt;
@@ -1177,9 +1192,9 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
}
/*
- * Remove write access from all the SPTEs mapping GFNs [start, end). If
- * skip_4k is set, SPTEs that map 4k pages, will not be write-protected.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ * Remove write access from all SPTEs at or above min_level that map GFNs
+ * [start, end). Returns true if an SPTE has been changed and the TLBs need to
+ * be flushed.
*/
static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end, int min_level)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712b4e0de481..5e7e920113f3 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -28,10 +28,8 @@
#include "svm.h"
/* enable / disable AVIC */
-int avic;
-#ifdef CONFIG_X86_LOCAL_APIC
-module_param(avic, int, S_IRUGO);
-#endif
+bool avic;
+module_param(avic, bool, S_IRUGO);
#define SVM_AVIC_DOORBELL 0xc001011b
@@ -223,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
return &avic_physical_id_table[index];
}
-/**
+/*
* Note:
* AVIC hardware walks the nested page table to check permissions,
* but does not use the SPA address specified in the leaf page
@@ -766,7 +764,7 @@ out:
return ret;
}
-/**
+/*
* Note:
* The HW cannot support posting multicast/broadcast
* interrupts to a vCPU. So, we still use legacy interrupt
@@ -1007,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/**
+/*
* This function is called during VCPU halt/unhalt.
*/
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 540d43ba2cf4..5e8d8443154e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
svm_switch_vmcb(svm, &svm->vmcb01);
- WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
/*
* On vmexit the GIF is set to false and
@@ -872,6 +871,15 @@ void svm_free_nested(struct vcpu_svm *svm)
__free_page(virt_to_page(svm->nested.vmcb02.ptr));
svm->nested.vmcb02.ptr = NULL;
+ /*
+ * When last_vmcb12_gpa matches the current vmcb12 gpa,
+ * some vmcb12 fields are not loaded if they are marked clean
+ * in the vmcb12, since in this case they are up to date already.
+ *
+ * When the vmcb02 is freed, this optimization becomes invalid.
+ */
+ svm->nested.last_vmcb12_gpa = INVALID_GPA;
+
svm->nested.initialized = false;
}
@@ -884,9 +892,11 @@ void svm_leave_nested(struct vcpu_svm *svm)
if (is_guest_mode(vcpu)) {
svm->nested.nested_run_pending = 0;
+ svm->nested.vmcb12_gpa = INVALID_GPA;
+
leave_guest_mode(vcpu);
- svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ svm_switch_vmcb(svm, &svm->vmcb01);
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
@@ -1298,12 +1308,17 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* L2 registers if needed are moved from the current VMCB to VMCB02.
*/
+ if (is_guest_mode(vcpu))
+ svm_leave_nested(svm);
+ else
+ svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
+ svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+
svm->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- if (svm->current_vmcb == &svm->vmcb01)
- svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
svm->vmcb01.ptr->save.es = save->es;
svm->vmcb01.ptr->save.cs = save->cs;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1356ee095cd5..8d36f0c73071 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
sev->misc_cg = NULL;
}
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission decommission;
+
+ if (!handle)
+ return;
+
+ decommission.handle = handle;
+ sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate deactivate;
if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_guest_deactivate(&deactivate, NULL);
up_read(&sev_deactivate_lock);
- /* decommission handle */
- decommission.handle = handle;
- sev_guest_decommission(&decommission, NULL);
+ sev_decommission(handle);
}
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start.handle);
goto e_free_session;
+ }
/* return handle to userspace */
params.handle = start.handle;
@@ -763,7 +773,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
}
static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
- unsigned long __user dst_uaddr,
+ void __user *dst_uaddr,
unsigned long dst_paddr,
int size, int *err)
{
@@ -787,8 +797,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
if (tpage) {
offset = paddr & 15;
- if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
- page_address(tpage) + offset, size))
+ if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
ret = -EFAULT;
}
@@ -800,9 +809,9 @@ e_free:
}
static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
- unsigned long __user vaddr,
+ void __user *vaddr,
unsigned long dst_paddr,
- unsigned long __user dst_vaddr,
+ void __user *dst_vaddr,
int size, int *error)
{
struct page *src_tpage = NULL;
@@ -810,13 +819,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
int ret, len = size;
/* If source buffer is not aligned then use an intermediate buffer */
- if (!IS_ALIGNED(vaddr, 16)) {
+ if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
src_tpage = alloc_page(GFP_KERNEL);
if (!src_tpage)
return -ENOMEM;
- if (copy_from_user(page_address(src_tpage),
- (void __user *)(uintptr_t)vaddr, size)) {
+ if (copy_from_user(page_address(src_tpage), vaddr, size)) {
__free_page(src_tpage);
return -EFAULT;
}
@@ -830,7 +838,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
* - copy the source buffer in an intermediate buffer
* - use the intermediate buffer as source buffer
*/
- if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
int dst_offset;
dst_tpage = alloc_page(GFP_KERNEL);
@@ -855,7 +863,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
page_address(src_tpage), size);
else {
if (copy_from_user(page_address(dst_tpage) + dst_offset,
- (void __user *)(uintptr_t)vaddr, size)) {
+ vaddr, size)) {
ret = -EFAULT;
goto e_free;
}
@@ -935,15 +943,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
if (dec)
ret = __sev_dbg_decrypt_user(kvm,
__sme_page_pa(src_p[0]) + s_off,
- dst_vaddr,
+ (void __user *)dst_vaddr,
__sme_page_pa(dst_p[0]) + d_off,
len, &argp->error);
else
ret = __sev_dbg_encrypt_user(kvm,
__sme_page_pa(src_p[0]) + s_off,
- vaddr,
+ (void __user *)vaddr,
__sme_page_pa(dst_p[0]) + d_off,
- dst_vaddr,
+ (void __user *)dst_vaddr,
len, &argp->error);
sev_unpin_memory(kvm, src_p, n);
@@ -1105,10 +1113,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct sev_data_send_start data;
int ret;
+ memset(&data, 0, sizeof(data));
data.handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
- if (ret < 0)
- return ret;
params->session_len = data.session_len;
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
@@ -1217,10 +1224,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct sev_data_send_update_data data;
int ret;
+ memset(&data, 0, sizeof(data));
data.handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
- if (ret < 0)
- return ret;
params->hdr_len = data.hdr_len;
params->trans_len = data.trans_len;
@@ -1764,7 +1770,8 @@ e_mirror_unlock:
e_source_unlock:
mutex_unlock(&source_kvm->lock);
e_source_put:
- fput(source_kvm_file);
+ if (source_kvm_file)
+ fput(source_kvm_file);
return ret;
}
@@ -2198,7 +2205,7 @@ vmgexit_err:
return -EINVAL;
}
-static void pre_sev_es_run(struct vcpu_svm *svm)
+void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
if (!svm->ghcb)
return;
@@ -2234,9 +2241,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int asid = sev_get_asid(svm->vcpu.kvm);
- /* Perform any SEV-ES pre-run actions */
- pre_sev_es_run(svm);
-
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b649f92287a2..e088086f3de6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
* RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
* defer the restoration of TSC_AUX until the CPU returns to userspace.
*/
-#define TSC_AUX_URET_SLOT 0
+static int tsc_aux_uret_slot __read_mostly = -1;
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
@@ -447,6 +447,11 @@ static int has_svm(void)
return 0;
}
+ if (pgtable_l5_enabled()) {
+ pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+ return 0;
+ }
+
return 1;
}
@@ -858,8 +863,8 @@ static __init void svm_adjust_mmio_mask(void)
return;
/* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ rdmsrl(MSR_AMD64_SYSCFG, msr);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
@@ -959,8 +964,7 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32;
}
- if (boot_cpu_has(X86_FEATURE_RDTSCP))
- kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+ tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -1006,9 +1010,7 @@ static __init int svm_hardware_setup(void)
}
if (avic) {
- if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_AVIC) ||
- !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
+ if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) {
avic = false;
} else {
pr_info("AVIC enabled\n");
@@ -1100,7 +1102,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
return svm->vmcb->control.tsc_offset;
}
-static void svm_check_invpcid(struct vcpu_svm *svm)
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+ struct vcpu_svm *svm)
{
/*
* Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1117,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
+
+ if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+ if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+ else
+ svm_set_intercept(svm, INTERCEPT_RDTSCP);
+ }
}
static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1235,8 +1246,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm->current_vmcb->asid_generation = 0;
svm->asid = 0;
- svm->nested.vmcb12_gpa = 0;
- svm->nested.last_vmcb12_gpa = 0;
+ svm->nested.vmcb12_gpa = INVALID_GPA;
+ svm->nested.last_vmcb12_gpa = INVALID_GPA;
vcpu->arch.hflags = 0;
if (!kvm_pause_in_guest(vcpu->kvm)) {
@@ -1248,7 +1259,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
- svm_check_invpcid(svm);
+ svm_recalc_instruction_intercepts(vcpu, svm);
/*
* If the host supports V_SPEC_CTRL then disable the interception
@@ -1424,6 +1435,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_unmap_ghcb(svm);
+
if (svm->guest_state_loaded)
return;
@@ -1445,8 +1459,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
}
}
- if (static_cpu_has(X86_FEATURE_RDTSCP))
- kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+ if (likely(tsc_aux_uret_slot >= 0))
+ kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
svm->guest_state_loaded = true;
}
@@ -2655,11 +2669,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
break;
case MSR_TSC_AUX:
- if (!boot_cpu_has(X86_FEATURE_RDTSCP))
- return 1;
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
msr_info->data = svm->tsc_aux;
break;
/*
@@ -2876,30 +2885,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
break;
case MSR_TSC_AUX:
- if (!boot_cpu_has(X86_FEATURE_RDTSCP))
- return 1;
-
- if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
-
- /*
- * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
- * incomplete and conflicting architectural behavior. Current
- * AMD CPUs completely ignore bits 63:32, i.e. they aren't
- * reserved and always read as zeros. Emulate AMD CPU behavior
- * to avoid explosions if the vCPU is migrated from an AMD host
- * to an Intel host.
- */
- data = (u32)data;
-
/*
* TSC_AUX is usually changed only during boot and never read
* directly. Intercept TSC_AUX instead of exposing it to the
* guest via direct_access_msrs, and switch it via user return.
*/
preempt_disable();
- r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+ r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
preempt_enable();
if (r)
return 1;
@@ -3084,6 +3076,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = skinit_interception,
+ [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op,
[SVM_EXIT_WBINVD] = kvm_emulate_wbinvd,
[SVM_EXIT_MONITOR] = kvm_emulate_monitor,
[SVM_EXIT_MWAIT] = kvm_emulate_mwait,
@@ -3972,8 +3965,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
- /* Check again if INVPCID interception if required */
- svm_check_invpcid(svm);
+ svm_recalc_instruction_intercepts(vcpu, svm);
/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 84b3133c2251..2908c6ab5bb4 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -20,6 +20,7 @@
#include <linux/bits.h>
#include <asm/svm.h>
+#include <asm/sev-common.h>
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@@ -479,7 +480,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-extern int avic;
+extern bool avic;
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
@@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */
-#define GHCB_VERSION_MAX 1ULL
-#define GHCB_VERSION_MIN 1ULL
-
-#define GHCB_MSR_INFO_POS 0
-#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
-
-#define GHCB_MSR_SEV_INFO_RESP 0x001
-#define GHCB_MSR_SEV_INFO_REQ 0x002
-#define GHCB_MSR_VER_MAX_POS 48
-#define GHCB_MSR_VER_MAX_MASK 0xffff
-#define GHCB_MSR_VER_MIN_POS 32
-#define GHCB_MSR_VER_MIN_MASK 0xffff
-#define GHCB_MSR_CBIT_POS 24
-#define GHCB_MSR_CBIT_MASK 0xff
-#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \
- ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \
- (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \
- (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \
- GHCB_MSR_SEV_INFO_RESP)
-
-#define GHCB_MSR_CPUID_REQ 0x004
-#define GHCB_MSR_CPUID_RESP 0x005
-#define GHCB_MSR_CPUID_FUNC_POS 32
-#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff
-#define GHCB_MSR_CPUID_VALUE_POS 32
-#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff
-#define GHCB_MSR_CPUID_REG_POS 30
-#define GHCB_MSR_CPUID_REG_MASK 0x3
-
-#define GHCB_MSR_TERM_REQ 0x100
-#define GHCB_MSR_TERM_REASON_SET_POS 12
-#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
-#define GHCB_MSR_TERM_REASON_POS 16
-#define GHCB_MSR_TERM_REASON_MASK 0xff
+#define GHCB_VERSION_MAX 1ULL
+#define GHCB_VERSION_MIN 1ULL
+
extern unsigned int max_sev_asid;
@@ -581,6 +551,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_create_vcpu(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a61c015870e3..4f839148948b 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
TP_ARGS(msg, err),
TP_STRUCT__entry(
- __field(const char *, msg)
+ __string(msg, msg)
__field(u32, err)
),
TP_fast_assign(
- __entry->msg = msg;
+ __assign_str(msg, msg);
__entry->err = err;
),
- TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
+ TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
);
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d1d77985e889..aa0e7872fcc9 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -90,8 +90,7 @@ static inline bool cpu_has_vmx_preemption_timer(void)
static inline bool cpu_has_vmx_posted_intr(void)
{
- return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
- vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
}
static inline bool cpu_has_load_ia32_efer(void)
@@ -398,6 +397,9 @@ static inline u64 vmx_supported_debugctl(void)
{
u64 debugctl = 0;
+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+
if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
debugctl |= DEBUGCTLMSR_LBR_MASK;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bced76637823..6058a65a6ede 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
if (evmptrld_status == EVMPTRLD_VMFAIL ||
- evmptrld_status == EVMPTRLD_ERROR) {
- pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
- __func__);
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror =
- KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ evmptrld_status == EVMPTRLD_ERROR)
return false;
- }
}
return true;
@@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
{
- if (!nested_get_evmcs_page(vcpu))
+ if (!nested_get_evmcs_page(vcpu)) {
+ pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+ __func__);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+
return false;
+ }
if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
return false;
@@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
/* Similarly, triple faults in L2 should never escape. */
WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+ if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+ /*
+ * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
+ * Enlightened VMCS after migration and we still need to
+ * do that when something is forcing L2->L1 exit prior to
+ * the first L2 run.
+ */
+ (void)nested_get_evmcs_page(vcpu);
+ }
/* Service the TLB flush request for L2 before switching to L1. */
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 459748680daf..5f81ef092bd4 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -238,6 +238,20 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
/*
+ * Bail out of the block loop if the VM has an assigned
+ * device, but the blocking vCPU didn't reconfigure the
+ * PI.NV to the wakeup vector, i.e. the assigned device
+ * came along after the initial check in pi_pre_block().
+ */
+void vmx_pi_start_assignment(struct kvm *kvm)
+{
+ if (!irq_remapping_cap(IRQ_POSTING_CAP))
+ return;
+
+ kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
+}
+
+/*
* pi_update_irte - set IRTE for Posted-Interrupts
*
* @kvm: kvm
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 0bdc41391c5b..7f7b2326caf5 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -95,5 +95,6 @@ void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
bool set);
+void vmx_pi_start_assignment(struct kvm *kvm);
#endif /* __KVM_X86_VMX_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d000cddbd734..c2a779b688e6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -455,21 +455,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
static unsigned long host_idt_base;
-/*
- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
- * will emulate SYSCALL in legacy mode if the vendor string in guest
- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, IA32_STAR must always be included in
- * vmx_uret_msrs_list[], even in i386 builds.
- */
-static const u32 vmx_uret_msrs_list[] = {
-#ifdef CONFIG_X86_64
- MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
-#endif
- MSR_EFER, MSR_TSC_AUX, MSR_STAR,
- MSR_IA32_TSX_CTRL,
-};
-
#if IS_ENABLED(CONFIG_HYPERV)
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
@@ -697,21 +682,11 @@ static bool is_valid_passthrough_msr(u32 msr)
return r;
}
-static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
-{
- int i;
-
- for (i = 0; i < vmx->nr_uret_msrs; ++i)
- if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
- return i;
- return -1;
-}
-
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
{
int i;
- i = __vmx_find_uret_msr(vmx, msr);
+ i = kvm_find_user_return_msr(msr);
if (i >= 0)
return &vmx->guest_uret_msrs[i];
return NULL;
@@ -720,13 +695,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
struct vmx_uret_msr *msr, u64 data)
{
+ unsigned int slot = msr - vmx->guest_uret_msrs;
int ret = 0;
u64 old_msr_data = msr->data;
msr->data = data;
- if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
+ if (msr->load_into_hardware) {
preempt_disable();
- ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
+ ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
preempt_enable();
if (ret)
msr->data = old_msr_data;
@@ -1078,7 +1054,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
return false;
}
- i = __vmx_find_uret_msr(vmx, MSR_EFER);
+ i = kvm_find_user_return_msr(MSR_EFER);
if (i < 0)
return false;
@@ -1240,11 +1216,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
*/
if (!vmx->guest_uret_msrs_loaded) {
vmx->guest_uret_msrs_loaded = true;
- for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
- kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+ if (!vmx->guest_uret_msrs[i].load_into_hardware)
+ continue;
+
+ kvm_set_user_return_msr(i,
vmx->guest_uret_msrs[i].data,
vmx->guest_uret_msrs[i].mask);
-
+ }
}
if (vmx->nested.need_vmcs12_to_shadow_sync)
@@ -1751,19 +1730,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
vmx_clear_hlt(vcpu);
}
-static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
+ bool load_into_hardware)
{
- struct vmx_uret_msr tmp;
- int from, to;
+ struct vmx_uret_msr *uret_msr;
- from = __vmx_find_uret_msr(vmx, msr);
- if (from < 0)
+ uret_msr = vmx_find_uret_msr(vmx, msr);
+ if (!uret_msr)
return;
- to = vmx->nr_active_uret_msrs++;
- tmp = vmx->guest_uret_msrs[to];
- vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
- vmx->guest_uret_msrs[from] = tmp;
+ uret_msr->load_into_hardware = load_into_hardware;
}
/*
@@ -1773,29 +1749,42 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
*/
static void setup_msrs(struct vcpu_vmx *vmx)
{
- vmx->guest_uret_msrs_loaded = false;
- vmx->nr_active_uret_msrs = 0;
#ifdef CONFIG_X86_64
+ bool load_syscall_msrs;
+
/*
* The SYSCALL MSRs are only needed on long mode guests, and only
* when EFER.SCE is set.
*/
- if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
- vmx_setup_uret_msr(vmx, MSR_STAR);
- vmx_setup_uret_msr(vmx, MSR_LSTAR);
- vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
- }
+ load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
+ (vmx->vcpu.arch.efer & EFER_SCE);
+
+ vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
+ vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
+ vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
#endif
- if (update_transition_efer(vmx))
- vmx_setup_uret_msr(vmx, MSR_EFER);
+ vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
- if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
- vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
+ vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
+ guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
- vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+ /*
+ * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
+ * kernel and old userspace. If those guests run on a tsx=off host, do
+ * allow guests to use TSX_CTRL, but don't change the value in hardware
+ * so that TSX remains always disabled.
+ */
+ vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu);
+
+ /*
+ * The set of MSRs to load may have changed, reload MSRs before the
+ * next VM-Enter.
+ */
+ vmx->guest_uret_msrs_loaded = false;
}
static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1993,11 +1982,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
else
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
- case MSR_TSC_AUX:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
- goto find_uret_msr;
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
break;
@@ -2031,6 +2015,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
if (!intel_pmu_lbr_is_enabled(vcpu))
debugctl &= ~DEBUGCTLMSR_LBR_MASK;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+ debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+
return debugctl;
}
@@ -2313,14 +2300,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
else
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
- case MSR_TSC_AUX:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
- return 1;
- /* Check reserved bit, higher 32 bits should be zero */
- if ((data >> 32) != 0)
- return 1;
- goto find_uret_msr;
case MSR_IA32_PERF_CAPABILITIES:
if (data && !vcpu_to_pmu(vcpu)->version)
return 1;
@@ -4369,7 +4348,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
xsaves_enabled, false);
}
- vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
+ /*
+ * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
+ * feature is exposed to the guest. This creates a virtualization hole
+ * if both are supported in hardware but only one is exposed to the
+ * guest, but letting the guest execute RDTSCP or RDPID when either one
+ * is advertised is preferable to emulating the advertised instruction
+ * in KVM on #UD, and obviously better than incorrectly injecting #UD.
+ */
+ if (cpu_has_vmx_rdtscp()) {
+ bool rdpid_or_rdtscp_enabled =
+ guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+
+ vmx_adjust_secondary_exec_control(vmx, &exec_control,
+ SECONDARY_EXEC_ENABLE_RDTSCP,
+ rdpid_or_rdtscp_enabled, false);
+ }
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@ -4848,7 +4843,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run;
u32 intr_info, ex_no, error_code;
- unsigned long cr2, rip, dr6;
+ unsigned long cr2, dr6;
u32 vect_info;
vect_info = vmx->idt_vectoring_info;
@@ -4938,8 +4933,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- rip = kvm_rip_read(vcpu);
- kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+ kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
kvm_run->debug.arch.exception = ex_no;
break;
case AC_VECTOR:
@@ -6253,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
switch (kvm_get_apic_mode(vcpu)) {
case LAPIC_MODE_INVALID:
WARN_ONCE(true, "Invalid local APIC state");
+ break;
case LAPIC_MODE_DISABLED:
break;
case LAPIC_MODE_XAPIC:
@@ -6855,6 +6850,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
{
+ struct vmx_uret_msr *tsx_ctrl;
struct vcpu_vmx *vmx;
int i, cpu, err;
@@ -6877,43 +6873,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
goto free_vpid;
}
- BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
-
- for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
- u32 index = vmx_uret_msrs_list[i];
- u32 data_low, data_high;
- int j = vmx->nr_uret_msrs;
-
- if (rdmsr_safe(index, &data_low, &data_high) < 0)
- continue;
- if (wrmsr_safe(index, data_low, data_high) < 0)
- continue;
-
- vmx->guest_uret_msrs[j].slot = i;
- vmx->guest_uret_msrs[j].data = 0;
- switch (index) {
- case MSR_IA32_TSX_CTRL:
- /*
- * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
- * interception. Keep the host value unchanged to avoid
- * changing CPUID bits under the host kernel's feet.
- *
- * hle=0, rtm=0, tsx_ctrl=1 can be found with some
- * combinations of new kernel and old userspace. If
- * those guests run on a tsx=off host, do allow guests
- * to use TSX_CTRL, but do not change the value on the
- * host so that TSX remains always disabled.
- */
- if (boot_cpu_has(X86_FEATURE_RTM))
- vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
- else
- vmx->guest_uret_msrs[j].mask = 0;
- break;
- default:
- vmx->guest_uret_msrs[j].mask = -1ull;
- break;
- }
- ++vmx->nr_uret_msrs;
+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+ vmx->guest_uret_msrs[i].data = 0;
+ vmx->guest_uret_msrs[i].mask = -1ull;
+ }
+ if (boot_cpu_has(X86_FEATURE_RTM)) {
+ /*
+ * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
+ * Keep the host value unchanged to avoid changing CPUID bits
+ * under the host kernel's feet.
+ */
+ tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+ if (tsx_ctrl)
+ vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
}
err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7344,9 +7316,11 @@ static __init void vmx_set_cpu_caps(void)
if (!cpu_has_vmx_xsaves())
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
- /* CPUID 0x80000001 */
- if (!cpu_has_vmx_rdtscp())
+ /* CPUID 0x80000001 and 0x7 (RDPID) */
+ if (!cpu_has_vmx_rdtscp()) {
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+ kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+ }
if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
@@ -7402,8 +7376,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
/*
* RDPID causes #UD if disabled through secondary execution controls.
* Because it is marked as EmulateOnUD, we need to intercept it here.
+ * Note, RDPID is hidden behind ENABLE_RDTSCP.
*/
- case x86_intercept_rdtscp:
+ case x86_intercept_rdpid:
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
exception->vector = UD_VECTOR;
exception->error_code_valid = false;
@@ -7746,6 +7721,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.nested_ops = &vmx_nested_ops,
.update_pi_irte = pi_update_irte,
+ .start_assignment = vmx_pi_start_assignment,
#ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer,
@@ -7769,17 +7745,42 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
};
+static __init void vmx_setup_user_return_msrs(void)
+{
+
+ /*
+ * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
+ * will emulate SYSCALL in legacy mode if the vendor string in guest
+ * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
+ * support this emulation, MSR_STAR is included in the list for i386,
+ * but is never loaded into hardware. MSR_CSTAR is also never loaded
+ * into hardware and is here purely for emulation purposes.
+ */
+ const u32 vmx_uret_msrs_list[] = {
+ #ifdef CONFIG_X86_64
+ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
+ #endif
+ MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+ MSR_IA32_TSX_CTRL,
+ };
+ int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+ for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
+ kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
+}
+
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
struct desc_ptr dt;
- int r, i, ept_lpage_level;
+ int r, ept_lpage_level;
store_idt(&dt);
host_idt_base = dt.address;
- for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
- kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+ vmx_setup_user_return_msrs();
if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
return -EIO;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 008cb87ff088..16e4e457ba23 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -36,7 +36,7 @@ struct vmx_msrs {
};
struct vmx_uret_msr {
- unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
+ bool load_into_hardware;
u64 data;
u64 mask;
};
@@ -245,8 +245,16 @@ struct vcpu_vmx {
u32 idt_vectoring_info;
ulong rflags;
+ /*
+ * User return MSRs are always emulated when enabled in the guest, but
+ * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+ * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+ * be loaded into hardware if those conditions aren't met.
+ * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+ * into hardware when running the guest. guest_uret_msrs[] is resorted
+ * whenever the number of "active" uret MSRs is modified.
+ */
struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
- int nr_uret_msrs;
int nr_active_uret_msrs;
bool guest_uret_msrs_loaded;
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6eda2834fc05..e0f4a46649d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
*/
#define KVM_MAX_NR_USER_RETURN_MSRS 16
-struct kvm_user_return_msrs_global {
- int nr;
- u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
-};
-
struct kvm_user_return_msrs {
struct user_return_notifier urn;
bool registered;
@@ -198,7 +193,9 @@ struct kvm_user_return_msrs {
} values[KVM_MAX_NR_USER_RETURN_MSRS];
};
-static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
+u32 __read_mostly kvm_nr_uret_msrs;
+EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
static struct kvm_user_return_msrs __percpu *user_return_msrs;
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
@@ -330,23 +327,53 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
user_return_notifier_unregister(urn);
}
local_irq_restore(flags);
- for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
+ for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
values = &msrs->values[slot];
if (values->host != values->curr) {
- wrmsrl(user_return_msrs_global.msrs[slot], values->host);
+ wrmsrl(kvm_uret_msrs_list[slot], values->host);
values->curr = values->host;
}
}
}
-void kvm_define_user_return_msr(unsigned slot, u32 msr)
+static int kvm_probe_user_return_msr(u32 msr)
+{
+ u64 val;
+ int ret;
+
+ preempt_disable();
+ ret = rdmsrl_safe(msr, &val);
+ if (ret)
+ goto out;
+ ret = wrmsrl_safe(msr, val);
+out:
+ preempt_enable();
+ return ret;
+}
+
+int kvm_add_user_return_msr(u32 msr)
{
- BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
- user_return_msrs_global.msrs[slot] = msr;
- if (slot >= user_return_msrs_global.nr)
- user_return_msrs_global.nr = slot + 1;
+ BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
+
+ if (kvm_probe_user_return_msr(msr))
+ return -1;
+
+ kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
+ return kvm_nr_uret_msrs++;
+}
+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
+
+int kvm_find_user_return_msr(u32 msr)
+{
+ int i;
+
+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+ if (kvm_uret_msrs_list[i] == msr)
+ return i;
+ }
+ return -1;
}
-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
+EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
static void kvm_user_return_msr_cpu_online(void)
{
@@ -355,8 +382,8 @@ static void kvm_user_return_msr_cpu_online(void)
u64 value;
int i;
- for (i = 0; i < user_return_msrs_global.nr; ++i) {
- rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
+ for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+ rdmsrl_safe(kvm_uret_msrs_list[i], &value);
msrs->values[i].host = value;
msrs->values[i].curr = value;
}
@@ -371,7 +398,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
value = (value & mask) | (msrs->values[slot].host & ~mask);
if (value == msrs->values[slot].curr)
return 0;
- err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
+ err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
if (err)
return 1;
@@ -1149,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
fixed |= DR6_RTM;
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+ fixed |= DR6_BUS_LOCK;
return fixed;
}
@@ -1615,6 +1645,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
* invokes 64-bit SYSENTER.
*/
data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+ break;
+ case MSR_TSC_AUX:
+ if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+ return 1;
+
+ if (!host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+ return 1;
+
+ /*
+ * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+ * incomplete and conflicting architectural behavior. Current
+ * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+ * reserved and always read as zeros. Enforce Intel's reserved
+ * bits check if and only if the guest CPU is Intel, and clear
+ * the bits in all other cases. This ensures cross-vendor
+ * migration will provide consistent behavior for the guest.
+ */
+ if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+ return 1;
+
+ data = (u32)data;
+ break;
}
msr.data = data;
@@ -1651,6 +1705,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
return KVM_MSR_RET_FILTERED;
+ switch (index) {
+ case MSR_TSC_AUX:
+ if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+ return 1;
+
+ if (!host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+ return 1;
+ break;
+ }
+
msr.index = index;
msr.host_initiated = host_initiated;
@@ -3006,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
+
+ if (!tdp_enabled) {
+ /*
+ * A TLB flush on behalf of the guest is equivalent to
+ * INVPCID(all), toggling CR4.PGE, etc., which requires
+ * a forced sync of the shadow page tables. Unload the
+ * entire MMU here and the subsequent load will sync the
+ * shadow page tables, and also flush the TLB.
+ */
+ kvm_mmu_unload(vcpu);
+ return;
+ }
+
static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
@@ -3035,10 +3114,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* expensive IPIs.
*/
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+ u8 st_preempted = xchg(&st->preempted, 0);
+
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- st->preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ st_preempted & KVM_VCPU_FLUSH_TLB);
+ if (st_preempted & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
+ } else {
+ st->preempted = 0;
}
vcpu->arch.st.preempted = 0;
@@ -3402,7 +3485,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP:
- case MSR_K8_SYSCFG:
+ case MSR_AMD64_SYSCFG:
case MSR_K8_TSEG_ADDR:
case MSR_K8_TSEG_MASK:
case MSR_VM_HSAVE_PA:
@@ -5468,14 +5551,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
struct kvm_msr_filter_range *user_range)
{
- struct msr_bitmap_range range;
unsigned long *bitmap = NULL;
size_t bitmap_size;
- int r;
if (!user_range->nmsrs)
return 0;
+ if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
+ return -EINVAL;
+
+ if (!user_range->flags)
+ return -EINVAL;
+
bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
return -EINVAL;
@@ -5484,31 +5571,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
if (IS_ERR(bitmap))
return PTR_ERR(bitmap);
- range = (struct msr_bitmap_range) {
+ msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
.flags = user_range->flags,
.base = user_range->base,
.nmsrs = user_range->nmsrs,
.bitmap = bitmap,
};
- if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
- r = -EINVAL;
- goto err;
- }
-
- if (!range.flags) {
- r = -EINVAL;
- goto err;
- }
-
- /* Everything ok, add this range identifier. */
- msr_filter->ranges[msr_filter->count] = range;
msr_filter->count++;
-
return 0;
-err:
- kfree(bitmap);
- return r;
}
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
@@ -5937,7 +6008,8 @@ static void kvm_init_msr_list(void)
continue;
break;
case MSR_TSC_AUX:
- if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+ if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
+ !kvm_cpu_cap_has(X86_FEATURE_RDPID))
continue;
break;
case MSR_IA32_UMWAIT_CONTROL:
@@ -7034,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+ vcpu->arch.hflags = emul_flags;
+ kvm_mmu_reset_context(vcpu);
}
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -7171,6 +7246,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
+ ctxt->interruptibility = 0;
+ ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
+ ctxt->perm_ok = false;
+
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
}
@@ -7506,14 +7586,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
kvm_vcpu_check_breakpoint(vcpu, &r))
return r;
- ctxt->interruptibility = 0;
- ctxt->have_exception = false;
- ctxt->exception.vector = -1;
- ctxt->perm_ok = false;
-
- ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
-
- r = x86_decode_insn(ctxt, insn, insn_len);
+ r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
trace_kvm_emulate_insn_start(vcpu);
++vcpu->stat.insn_emulation;
@@ -8040,6 +8113,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+ queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
+/*
* Notification about pvclock gtod data update.
*/
static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
@@ -8050,13 +8135,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
update_pvclock_gtod(tk);
- /* disable master clock if host does not trust, or does not
- * use, TSC based clocksource.
+ /*
+ * Disable master clock if host does not trust, or does not use,
+ * TSC based clocksource. Delegate queue_work() to irq_work as
+ * this is invoked with tk_core.seq write held.
*/
if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
atomic_read(&kvm_guest_has_master_clock) != 0)
- queue_work(system_long_wq, &pvclock_gtod_work);
-
+ irq_work_queue(&pvclock_irq_work);
return 0;
}
@@ -8118,6 +8204,7 @@ int kvm_arch_init(void *opaque)
printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
goto out_free_x86_emulator_cache;
}
+ kvm_nr_uret_msrs = 0;
r = kvm_mmu_module_init();
if (r)
@@ -8168,10 +8255,13 @@ void kvm_arch_exit(void)
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+ irq_work_sync(&pvclock_irq_work);
+ cancel_work_sync(&pvclock_gtod_work);
#endif
kvm_x86_ops.hardware_enable = NULL;
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
+ kmem_cache_destroy(x86_emulator_cache);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
@@ -8289,6 +8379,9 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
vcpu->stat.directed_yield_attempted++;
+ if (single_task_running())
+ goto no_yield;
+
rcu_read_lock();
map = rcu_dereference(vcpu->kvm->arch.apic_map);
@@ -9425,7 +9518,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (r <= 0)
break;
- kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
@@ -10044,8 +10137,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
kvm_update_dr7(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
- get_segment_base(vcpu, VCPU_SREG_CS);
+ vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
/*
* Trigger an rflags update that will inject or remove the trace
@@ -11428,7 +11520,8 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
void kvm_arch_start_assignment(struct kvm *kvm)
{
- atomic_inc(&kvm->arch.assigned_device_count);
+ if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
+ static_call_cond(kvm_x86_start_assignment)(kvm);
}
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index b93d6cd08a7f..121921b2927c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -5,7 +5,7 @@
#include <xen/xen.h>
#include <asm/fpu/internal.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1c548ad00752..6bda7f67d737 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);
-
- force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+ else
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
local_irq_disable();
}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 04aba7e80a36..470b20208430 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp)
#define AMD_SME_BIT BIT(0)
#define AMD_SEV_BIT BIT(1)
- /* Check the SEV MSR whether SEV or SME is enabled */
- sev_status = __rdmsr(MSR_AMD64_SEV);
- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
-
/*
* Check for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
@@ -519,17 +515,22 @@ void __init sme_enable(struct boot_params *bp)
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
- if (!(eax & feature_mask))
+ /* Check whether SEV or SME is supported */
+ if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT)))
return;
me_mask = 1UL << (ebx & 0x3f);
+ /* Check the SEV MSR whether SEV or SME is enabled */
+ sev_status = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
/*
* No SME if Hypervisor bit is set. This check is here to
* prevent a guest from trying to enable SME. For running as a
- * KVM guest the MSR_K8_SYSCFG will be sufficient, but there
+ * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there
* might be other hypervisors which emulate that MSR as non-zero
* or even pass it through to the guest.
* A malicious hypervisor can still trick a guest into this
@@ -542,8 +543,8 @@ void __init sme_enable(struct boot_params *bp)
return;
/* For SME, check the SYSCFG MSR */
- msr = __rdmsr(MSR_K8_SYSCFG);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ msr = __rdmsr(MSR_AMD64_SYSCFG);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
} else {
/* SEV state cannot be controlled by a command line option */
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index ae744b6a0785..dd40d3fea74e 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -284,7 +284,7 @@ static int __init early_root_info_init(void)
/* need to take out [4G, TOM2) for RAM*/
/* SYS_CFG */
- address = MSR_K8_SYSCFG;
+ address = MSR_AMD64_SYSCFG;
rdmsrl(address, val);
/* TOP_MEM2 is enabled? */
if (val & (1<<21)) {
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 02dc64625e64..2edd86649468 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+#define RS690_LOWER_TOP_OF_DRAM2 0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
+#define RS690_UPPER_TOP_OF_DRAM2 0x31
+#define RS690_HTIU_NB_INDEX 0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100
+#define RS690_HTIU_NB_DATA 0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+ u32 val = 0;
+ phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+ if (top_of_dram <= (1ULL << 32))
+ return;
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2);
+ pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+ if (val)
+ return;
+
+ pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+ top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index df7b5477fc4f..7515e78ef898 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -47,7 +47,7 @@
#include <asm/realmode.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
/*
* We allocate runtime services regions top-down, starting from -4G, i.e.
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 7850111008a8..b15ebfe40a73 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -450,6 +450,18 @@ void __init efi_free_boot_services(void)
size -= rm_size;
}
+ /*
+ * Don't free memory under 1M for two reasons:
+ * - BIOS might clobber it
+ * - Crash kernel needs it to be reserved
+ */
+ if (start + size < SZ_1M)
+ continue;
+ if (start < SZ_1M) {
+ size -= (SZ_1M - start);
+ start = SZ_1M;
+ }
+
memblock_free_late(start, size);
}
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 1be71ef5e4c4..6534c92d0f83 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -9,7 +9,7 @@
#include <asm/realmode.h>
#include <asm/tlbflush.h>
#include <asm/crash.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -29,14 +29,16 @@ void __init reserve_real_mode(void)
/* Has to be under 1M so we can execute real-mode AP code. */
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem) {
+ if (!mem)
pr_info("No sub-1M memory is available for the trampoline\n");
- return;
- }
+ else
+ set_real_mode_mem(mem);
- memblock_reserve(mem, size);
- set_real_mode_mem(mem);
- crash_reserve_low_1M();
+ /*
+ * Unconditionally reserve the entire fisrt 1M, see comment in
+ * setup_arch().
+ */
+ memblock_reserve(0, SZ_1M);
}
static void sme_sev_setup_real_mode(struct trampoline_header *th)
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 84c5d1b33d10..cc8391f86cdb 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -123,9 +123,9 @@ SYM_CODE_START(startup_32)
*/
btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
jnc .Ldone
- movl $MSR_K8_SYSCFG, %ecx
+ movl $MSR_AMD64_SYSCFG, %ecx
rdmsr
- bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+ bts $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax
jc .Ldone
/*
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 17503fed2017..e87699aa2dc8 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1273,16 +1273,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
+ /* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
+ x86_configure_nx();
+
/*
* Set up kernel GDT and segment registers, mainly so that
* -fstack-protector code can be executed.
*/
xen_setup_gdt(0);
- /* Work out if we support NX */
- get_cpu_cap(&boot_cpu_data);
- x86_configure_nx();
-
/* Determine virtual and physical address sizes */
get_cpu_address_sizes(&boot_cpu_data);
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 9d76d433d3d6..fd2f30227d96 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -413,7 +413,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+# 443 reserved for quotactl_path
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self