summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/acpi.h4
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/amd/fch.h13
-rw-r--r--arch/x86/include/asm/amd/node.h1
-rw-r--r--arch/x86/include/asm/apic.h77
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/archrandom.h6
-rw-r--r--arch/x86/include/asm/asm.h36
-rw-r--r--arch/x86/include/asm/bitops.h30
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/bug.h170
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/cfi.h28
-rw-r--r--arch/x86/include/asm/cmpxchg.h12
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h6
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h3
-rw-r--r--arch/x86/include/asm/cpufeature.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h33
-rw-r--r--arch/x86/include/asm/cpuid.h8
-rw-r--r--arch/x86/include/asm/cpumask.h2
-rw-r--r--arch/x86/include/asm/debugreg.h19
-rw-r--r--arch/x86/include/asm/div64.h39
-rw-r--r--arch/x86/include/asm/entry-common.h7
-rw-r--r--arch/x86/include/asm/floppy.h8
-rw-r--r--arch/x86/include/asm/fpu/sched.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h49
-rw-r--r--arch/x86/include/asm/fpu/xstate.h9
-rw-r--r--arch/x86/include/asm/fred.h2
-rw-r--r--arch/x86/include/asm/ftrace.h5
-rw-r--r--arch/x86/include/asm/futex.h75
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/hw_irq.h12
-rw-r--r--arch/x86/include/asm/hypervisor.h2
-rw-r--r--arch/x86/include/asm/ibt.h10
-rw-r--r--arch/x86/include/asm/idtentry.h13
-rw-r--r--arch/x86/include/asm/inat.h15
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/insn-eval.h2
-rw-r--r--arch/x86/include/asm/insn.h56
-rw-r--r--arch/x86/include/asm/intel-family.h14
-rw-r--r--arch/x86/include/asm/intel_ds.h10
-rw-r--r--arch/x86/include/asm/intel_telemetry.h37
-rw-r--r--arch/x86/include/asm/irq_remapping.h24
-rw-r--r--arch/x86/include/asm/irq_stack.h2
-rw-r--r--arch/x86/include/asm/irqflags.h6
-rw-r--r--arch/x86/include/asm/jump_label.h1
-rw-r--r--arch/x86/include/asm/kexec.h12
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h8
-rw-r--r--arch/x86/include/asm/kvm_host.h194
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/kvm_types.h15
-rw-r--r--arch/x86/include/asm/mce.h25
-rw-r--r--arch/x86/include/asm/module.h8
-rw-r--r--arch/x86/include/asm/mshyperv.h204
-rw-r--r--arch/x86/include/asm/msr-index.h72
-rw-r--r--arch/x86/include/asm/mtrr.h15
-rw-r--r--arch/x86/include/asm/mwait.h35
-rw-r--r--arch/x86/include/asm/nospec-branch.h60
-rw-r--r--arch/x86/include/asm/page_64.h17
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/percpu.h17
-rw-r--r--arch/x86/include/asm/perf_event.h124
-rw-r--r--arch/x86/include/asm/pgtable.h53
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h3
-rw-r--r--arch/x86/include/asm/pgtable_types.h8
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/ptrace.h24
-rw-r--r--arch/x86/include/asm/realmode.h2
-rw-r--r--arch/x86/include/asm/resctrl.h16
-rw-r--r--arch/x86/include/asm/rmwcc.h26
-rw-r--r--arch/x86/include/asm/runtime-const.h4
-rw-r--r--arch/x86/include/asm/segment.h8
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/sev-common.h1
-rw-r--r--arch/x86/include/asm/sev-internal.h28
-rw-r--r--arch/x86/include/asm/sev.h94
-rw-r--r--arch/x86/include/asm/sgx.h97
-rw-r--r--arch/x86/include/asm/shared/msr.h15
-rw-r--r--arch/x86/include/asm/shared/tdx.h2
-rw-r--r--arch/x86/include/asm/shstk.h8
-rw-r--r--arch/x86/include/asm/sighandling.h22
-rw-r--r--arch/x86/include/asm/signal.h3
-rw-r--r--arch/x86/include/asm/smap.h49
-rw-r--r--arch/x86/include/asm/smp.h25
-rw-r--r--arch/x86/include/asm/special_insns.h39
-rw-r--r--arch/x86/include/asm/static_call.h2
-rw-r--r--arch/x86/include/asm/string.h26
-rw-r--r--arch/x86/include/asm/string_64.h6
-rw-r--r--arch/x86/include/asm/svm.h20
-rw-r--r--arch/x86/include/asm/tdx.h37
-rw-r--r--arch/x86/include/asm/text-patching.h20
-rw-r--r--arch/x86/include/asm/thread_info.h76
-rw-r--r--arch/x86/include/asm/tlbflush.h5
-rw-r--r--arch/x86/include/asm/topology.h22
-rw-r--r--arch/x86/include/asm/uaccess.h19
-rw-r--r--arch/x86/include/asm/uaccess_64.h12
-rw-r--r--arch/x86/include/asm/unwind_user.h41
-rw-r--r--arch/x86/include/asm/uprobes.h16
-rw-r--r--arch/x86/include/asm/uv/bios.h2
-rw-r--r--arch/x86/include/asm/video.h2
-rw-r--r--arch/x86/include/asm/vmx.h9
-rw-r--r--arch/x86/include/asm/x86_init.h28
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/include/asm/xen/page.h14
104 files changed, 1649 insertions, 931 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 5ab1a4598d00..a03aa6f999d1 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -158,13 +158,13 @@ static inline bool acpi_has_cpu_in_madt(void)
}
#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
-static inline void acpi_arch_set_root_pointer(u64 addr)
+static __always_inline void acpi_arch_set_root_pointer(u64 addr)
{
x86_init.acpi.set_root_pointer(addr);
}
#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
-static inline u64 acpi_arch_get_root_pointer(void)
+static __always_inline u64 acpi_arch_get_root_pointer(void)
{
return x86_init.acpi.get_root_pointer();
}
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 15bc07a5ebb3..03364510d5fe 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -197,7 +197,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
"773:\n"
#define ALTINSTR_ENTRY(ft_flags) \
- ".pushsection .altinstructions,\"a\"\n" \
+ ".pushsection .altinstructions, \"aM\", @progbits, " \
+ __stringify(ALT_INSTR_SIZE) "\n" \
" .long 771b - .\n" /* label */ \
" .long 774f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
@@ -207,6 +208,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
".pushsection .altinstr_replacement, \"ax\"\n" \
+ ANNOTATE_DATA_SPECIAL "\n" \
"# ALT: replacement\n" \
"774:\n\t" newinstr "\n775:\n" \
".popsection\n"
@@ -360,11 +362,12 @@ void nop_func(void);
741: \
.skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\
742: \
- .pushsection .altinstructions,"a" ; \
+ .pushsection .altinstructions, "aM", @progbits, ALT_INSTR_SIZE ;\
altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \
.popsection ; \
.pushsection .altinstr_replacement,"ax" ; \
743: \
+ ANNOTATE_DATA_SPECIAL ; \
newinst ; \
744: \
.popsection ;
diff --git a/arch/x86/include/asm/amd/fch.h b/arch/x86/include/asm/amd/fch.h
deleted file mode 100644
index 2cf5153edbc2..000000000000
--- a/arch/x86/include/asm/amd/fch.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_AMD_FCH_H_
-#define _ASM_X86_AMD_FCH_H_
-
-#define FCH_PM_BASE 0xFED80300
-
-/* Register offsets from PM base: */
-#define FCH_PM_DECODEEN 0x00
-#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19)
-#define FCH_PM_SCRATCH 0x80
-#define FCH_PM_S5_RESET_STATUS 0xC0
-
-#endif /* _ASM_X86_AMD_FCH_H_ */
diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h
index 23fe617898a8..a672b8765fa8 100644
--- a/arch/x86/include/asm/amd/node.h
+++ b/arch/x86/include/asm/amd/node.h
@@ -23,7 +23,6 @@
#define AMD_NODE0_PCI_SLOT 0x18
struct pci_dev *amd_node_get_func(u16 node, u8 func);
-struct pci_dev *amd_node_get_root(u16 node);
static inline u16 amd_num_nodes(void)
{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 23d86c9750b9..a26e66d66444 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -305,6 +305,8 @@ struct apic {
/* Probe, setup and smpboot functions */
int (*probe)(void);
+ void (*setup)(void);
+ void (*teardown)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
void (*init_apic_ldr)(void);
@@ -317,6 +319,8 @@ struct apic {
/* wakeup secondary CPU using 64-bit wakeup point */
int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu);
+ void (*update_vector)(unsigned int cpu, unsigned int vector, bool set);
+
char *name;
};
@@ -470,6 +474,12 @@ static __always_inline bool apic_id_valid(u32 apic_id)
return apic_id <= apic->max_apic_id;
}
+static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ if (apic->update_vector)
+ apic->update_vector(cpu, vector, set);
+}
+
#else /* CONFIG_X86_LOCAL_APIC */
static inline u32 apic_read(u32 reg) { return 0; }
@@ -481,6 +491,7 @@ static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
static inline void apic_setup_apic_calls(void) { }
+static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { }
#define apic_update_callback(_callback, _fn) do { } while (0)
@@ -488,11 +499,14 @@ static inline void apic_setup_apic_calls(void) { }
extern void apic_ack_irq(struct irq_data *data);
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
+
static inline bool lapic_vector_set_in_irr(unsigned int vector)
{
- u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+ u32 irr = apic_read(APIC_IRR + APIC_VECTOR_TO_REG_OFFSET(vector));
- return !!(irr & (1U << (vector % 32)));
+ return !!(irr & (1U << APIC_VECTOR_TO_BIT_NUMBER(vector)));
}
static inline bool is_vector_pending(unsigned int vector)
@@ -500,6 +514,65 @@ static inline bool is_vector_pending(unsigned int vector)
return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector);
}
+#define MAX_APIC_VECTOR 256
+#define APIC_VECTORS_PER_REG 32
+
+/*
+ * Vector states are maintained by APIC in 32-bit registers that are
+ * 16 bytes aligned. The status of each vector is kept in a single
+ * bit.
+ */
+static inline int apic_find_highest_vector(void *bitmap)
+{
+ int vec;
+ u32 *reg;
+
+ for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; vec >= 0; vec -= APIC_VECTORS_PER_REG) {
+ reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec);
+ if (*reg)
+ return __fls(*reg) + vec;
+ }
+
+ return -1;
+}
+
+static inline u32 apic_get_reg(void *regs, int reg)
+{
+ return *((u32 *) (regs + reg));
+}
+
+static inline void apic_set_reg(void *regs, int reg, u32 val)
+{
+ *((u32 *) (regs + reg)) = val;
+}
+
+static __always_inline u64 apic_get_reg64(void *regs, int reg)
+{
+ BUILD_BUG_ON(reg != APIC_ICR);
+ return *((u64 *) (regs + reg));
+}
+
+static __always_inline void apic_set_reg64(void *regs, int reg, u64 val)
+{
+ BUILD_BUG_ON(reg != APIC_ICR);
+ *((u64 *) (regs + reg)) = val;
+}
+
+static inline void apic_clear_vector(int vec, void *bitmap)
+{
+ clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
+}
+
+static inline void apic_set_vector(int vec, void *bitmap)
+{
+ set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
+}
+
+static inline int apic_test_vector(int vec, void *bitmap)
+{
+ return test_bit(APIC_VECTOR_TO_BIT_NUMBER(vec), bitmap + APIC_VECTOR_TO_REG_OFFSET(vec));
+}
+
/*
* Warm reset vector position:
*/
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@
#define APIC_TDR_DIV_128 0xA
#define APIC_EFEAT 0x400
#define APIC_ECTRL 0x410
+#define APIC_SEOI 0x420
+#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 02bae8e0758b..4c305305871b 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -23,8 +23,7 @@ static inline bool __must_check rdrand_long(unsigned long *v)
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
asm volatile("rdrand %[out]"
- CC_SET(c)
- : CC_OUT(c) (ok), [out] "=r" (*v));
+ : "=@ccc" (ok), [out] "=r" (*v));
if (ok)
return true;
} while (--retry);
@@ -35,8 +34,7 @@ static inline bool __must_check rdseed_long(unsigned long *v)
{
bool ok;
asm volatile("rdseed %[out]"
- CC_SET(c)
- : CC_OUT(c) (ok), [out] "=r" (*v));
+ : "=@ccc" (ok), [out] "=r" (*v));
return ok;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index f963848024a5..0e8c611bc9e2 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
+#include <linux/annotate.h>
+
#ifdef __ASSEMBLER__
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
@@ -122,31 +124,23 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
}
#endif
-/*
- * Macros to generate condition code outputs from inline assembly,
- * The output operand must be type "bool".
- */
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
-# define CC_OUT(c) "=@cc" #c
-#else
-# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
-# define CC_OUT(c) [_cc_ ## c] "=qm"
-#endif
-
#ifdef __KERNEL__
+#ifndef COMPILE_OFFSETS
+#include <asm/asm-offsets.h>
+#endif
+
# include <asm/extable_fixup_types.h>
/* Exception table entry */
#ifdef __ASSEMBLER__
-# define _ASM_EXTABLE_TYPE(from, to, type) \
- .pushsection "__ex_table","a" ; \
- .balign 4 ; \
- .long (from) - . ; \
- .long (to) - . ; \
- .long type ; \
+# define _ASM_EXTABLE_TYPE(from, to, type) \
+ .pushsection "__ex_table", "aM", @progbits, EXTABLE_SIZE ; \
+ .balign 4 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+ .long type ; \
.popsection
# ifdef CONFIG_KPROBES
@@ -189,7 +183,8 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
".purgem extable_type_reg\n"
# define _ASM_EXTABLE_TYPE(from, to, type) \
- " .pushsection \"__ex_table\",\"a\"\n" \
+ " .pushsection __ex_table, \"aM\", @progbits, " \
+ __stringify(EXTABLE_SIZE) "\n" \
" .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
@@ -197,7 +192,8 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
" .popsection\n"
# define _ASM_EXTABLE_TYPE_REG(from, to, type, reg) \
- " .pushsection \"__ex_table\",\"a\"\n" \
+ " .pushsection __ex_table, \"aM\", @progbits, " \
+ __stringify(EXTABLE_SIZE) "\n" \
" .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index eebbc8889e70..c2ce213f2b9b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -99,8 +99,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
{
bool negative;
asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
- CC_SET(s)
- : CC_OUT(s) (negative), WBYTE_ADDR(addr)
+ : "=@ccs" (negative), WBYTE_ADDR(addr)
: "iq" ((char)mask) : "memory");
return negative;
}
@@ -149,8 +148,7 @@ arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
bool oldbit;
asm(__ASM_SIZE(bts) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -175,8 +173,7 @@ arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
bool oldbit;
asm volatile(__ASM_SIZE(btr) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: ADDR, "Ir" (nr) : "memory");
return oldbit;
}
@@ -187,8 +184,7 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
bool oldbit;
asm volatile(__ASM_SIZE(btc) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: ADDR, "Ir" (nr) : "memory");
return oldbit;
@@ -211,8 +207,7 @@ static __always_inline bool constant_test_bit_acquire(long nr, const volatile un
bool oldbit;
asm volatile("testb %2,%1"
- CC_SET(nz)
- : CC_OUT(nz) (oldbit)
+ : "=@ccnz" (oldbit)
: "m" (((unsigned char *)addr)[nr >> 3]),
"i" (1 << (nr & 7))
:"memory");
@@ -225,8 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
bool oldbit;
asm volatile(__ASM_SIZE(bt) " %2,%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
+ : "=@ccc" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
@@ -246,7 +240,7 @@ arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
variable_test_bit(nr, addr);
}
-static __always_inline unsigned long variable__ffs(unsigned long word)
+static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned long word)
{
asm("tzcnt %1,%0"
: "=r" (word)
@@ -265,7 +259,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
(unsigned long)__builtin_ctzl(word) : \
variable__ffs(word))
-static __always_inline unsigned long variable_ffz(unsigned long word)
+static __always_inline __attribute_const__ unsigned long variable_ffz(unsigned long word)
{
return variable__ffs(~word);
}
@@ -287,7 +281,7 @@ static __always_inline unsigned long variable_ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned long __fls(unsigned long word)
{
if (__builtin_constant_p(word))
return BITS_PER_LONG - 1 - __builtin_clzl(word);
@@ -301,7 +295,7 @@ static __always_inline unsigned long __fls(unsigned long word)
#undef ADDR
#ifdef __KERNEL__
-static __always_inline int variable_ffs(int x)
+static __always_inline __attribute_const__ int variable_ffs(int x)
{
int r;
@@ -355,7 +349,7 @@ static __always_inline int variable_ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
-static __always_inline int fls(unsigned int x)
+static __always_inline __attribute_const__ int fls(unsigned int x)
{
int r;
@@ -400,7 +394,7 @@ static __always_inline int fls(unsigned int x)
* at position 64.
*/
#ifdef CONFIG_X86_64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
int bitpos = -1;
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 02b23aa78955..f7b67cb73915 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -82,6 +82,8 @@
#ifndef __ASSEMBLER__
extern unsigned int output_len;
extern const unsigned long kernel_text_size;
+extern const unsigned long kernel_inittext_offset;
+extern const unsigned long kernel_inittext_size;
extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index f0e9acf72547..9b4e04690e1a 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -5,14 +5,24 @@
#include <linux/stringify.h>
#include <linux/instrumentation.h>
#include <linux/objtool.h>
+#include <asm/asm.h>
+
+#ifndef __ASSEMBLY__
+struct bug_entry;
+extern void __WARN_trap(struct bug_entry *bug, ...);
+#endif
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
*/
-#define ASM_UD2 ".byte 0x0f, 0x0b"
+#define ASM_UD2 __ASM_FORM(ud2)
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
+#define ASM_UDB _ASM_BYTES(0xd6)
+#define INSN_UDB 0xd6
+#define LEN_UDB 1
+
/*
* In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
*/
@@ -26,55 +36,77 @@
#define BUG_UD2 0xfffe
#define BUG_UD1 0xfffd
#define BUG_UD1_UBSAN 0xfffc
-#define BUG_EA 0xffea
+#define BUG_UD1_WARN 0xfffb
+#define BUG_UDB 0xffd6
#define BUG_LOCK 0xfff0
#ifdef CONFIG_GENERIC_BUG
-#ifdef CONFIG_X86_32
-# define __BUG_REL(val) ".long " __stringify(val)
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define __BUG_ENTRY_VERBOSE(file, line) \
+ "\t.long " file " - .\t# bug_entry::file\n" \
+ "\t.word " line "\t# bug_entry::line\n"
#else
-# define __BUG_REL(val) ".long " __stringify(val) " - ."
+#define __BUG_ENTRY_VERBOSE(file, line)
#endif
-#ifdef CONFIG_DEBUG_BUGVERBOSE
+#if defined(CONFIG_X86_64) || defined(CONFIG_DEBUG_BUGVERBOSE_DETAILED)
+#define HAVE_ARCH_BUG_FORMAT
+#define __BUG_ENTRY_FORMAT(format) \
+ "\t.long " format " - .\t# bug_entry::format\n"
+#else
+#define __BUG_ENTRY_FORMAT(format)
+#endif
-#define _BUG_FLAGS(ins, flags, extra) \
-do { \
- asm_inline volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t" __BUG_REL(%c0) "\t# bug_entry::file\n" \
- "\t.word %c1" "\t# bug_entry::line\n" \
- "\t.word %c2" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c3\n" \
- ".popsection\n" \
- extra \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
+#ifdef CONFIG_X86_64
+#define HAVE_ARCH_BUG_FORMAT_ARGS
+#endif
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
+#define __BUG_ENTRY(format, file, line, flags) \
+ "\t.long 1b - ." "\t# bug_entry::bug_addr\n" \
+ __BUG_ENTRY_FORMAT(format) \
+ __BUG_ENTRY_VERBOSE(file, line) \
+ "\t.word " flags "\t# bug_entry::flags\n"
+
+#define _BUG_FLAGS_ASM(format, file, line, flags, size, extra) \
+ ".pushsection __bug_table,\"aw\"\n\t" \
+ ANNOTATE_DATA_SPECIAL "\n\t" \
+ "2:\n\t" \
+ __BUG_ENTRY(format, file, line, flags) \
+ "\t.org 2b + " size "\n" \
+ ".popsection\n" \
+ extra
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED
+#define WARN_CONDITION_STR(cond_str) cond_str
+#else
+#define WARN_CONDITION_STR(cond_str) ""
+#endif
-#define _BUG_FLAGS(ins, flags, extra) \
+#define _BUG_FLAGS(cond_str, ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
- ".pushsection __bug_table,\"aw\"\n" \
- "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
- "\t.word %c0" "\t# bug_entry::flags\n" \
- "\t.org 2b+%c1\n" \
- ".popsection\n" \
- extra \
- : : "i" (flags), \
- "i" (sizeof(struct bug_entry))); \
+ _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \
+ "%c[line]", "%c[fl]", \
+ "%c[size]", extra) \
+ : : [fmt] "i" (WARN_CONDITION_STR(cond_str)), \
+ [file] "i" (__FILE__), \
+ [line] "i" (__LINE__), \
+ [fl] "i" (flags), \
+ [size] "i" (sizeof(struct bug_entry))); \
} while (0)
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
+#define ARCH_WARN_ASM(file, line, flags, size) \
+ ".pushsection .rodata.str1.1, \"aMS\", @progbits, 1\n" \
+ "99:\n" \
+ "\t.string \"\"\n" \
+ ".popsection\n" \
+ "1:\t " ASM_UD2 "\n" \
+ _BUG_FLAGS_ASM("99b", file, line, flags, size, "")
#else
-#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
+#define _BUG_FLAGS(cond_str, ins, flags, extra) asm volatile(ins)
#endif /* CONFIG_GENERIC_BUG */
@@ -82,7 +114,7 @@ do { \
#define BUG() \
do { \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, 0, ""); \
+ _BUG_FLAGS("", ASM_UD2, 0, ""); \
__builtin_unreachable(); \
} while (0)
@@ -92,14 +124,72 @@ do { \
* were to trigger, we'd rather wreck the machine in an attempt to get the
* message out than not know about it.
*/
-#define __WARN_FLAGS(flags) \
-do { \
- __auto_type __flags = BUGFLAG_WARNING|(flags); \
- instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE(1b)); \
- instrumentation_end(); \
+
+#define ARCH_WARN_REACHABLE ANNOTATE_REACHABLE(1b)
+
+#define __WARN_FLAGS(cond_str, flags) \
+do { \
+ auto __flags = BUGFLAG_WARNING|(flags); \
+ instrumentation_begin(); \
+ _BUG_FLAGS(cond_str, ASM_UD2, __flags, ARCH_WARN_REACHABLE); \
+ instrumentation_end(); \
} while (0)
+#ifdef HAVE_ARCH_BUG_FORMAT_ARGS
+
+#ifndef __ASSEMBLY__
+#include <linux/static_call_types.h>
+DECLARE_STATIC_CALL(WARN_trap, __WARN_trap);
+
+struct pt_regs;
+struct sysv_va_list { /* from AMD64 System V ABI */
+ unsigned int gp_offset;
+ unsigned int fp_offset;
+ void *overflow_arg_area;
+ void *reg_save_area;
+};
+struct arch_va_list {
+ unsigned long regs[6];
+ struct sysv_va_list args;
+};
+extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs);
+#endif /* __ASSEMBLY__ */
+
+#define __WARN_bug_entry(flags, format) ({ \
+ struct bug_entry *bug; \
+ asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n" \
+ _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \
+ "%c[line]", "%c[fl]", \
+ "%c[size]", "") \
+ : [addr] "=r" (bug) \
+ : [fmt] "i" (format), \
+ [file] "i" (__FILE__), \
+ [line] "i" (__LINE__), \
+ [fl] "i" (flags), \
+ [size] "i" (sizeof(struct bug_entry))); \
+ bug; })
+
+#define __WARN_print_arg(flags, format, arg...) \
+do { \
+ int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ; \
+ static_call_mod(WARN_trap)(__WARN_bug_entry(__flags, format), ## arg); \
+ asm (""); /* inhibit tail-call optimization */ \
+} while (0)
+
+#define __WARN_printf(taint, fmt, arg...) \
+ __WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg)
+
+#define WARN_ONCE(cond, format, arg...) ({ \
+ int __ret_warn_on = !!(cond); \
+ if (unlikely(__ret_warn_on)) { \
+ __WARN_print_arg(BUGFLAG_ONCE|BUGFLAG_TAINT(TAINT_WARN),\
+ format, ## arg); \
+ } \
+ __ret_warn_on; \
+})
+
+#endif /* HAVE_ARCH_BUG_FORMAT_ARGS */
+
#include <asm-generic/bug.h>
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
index 2930f560d7f3..e1f965bb1e31 100644
--- a/arch/x86/include/asm/ce4100.h
+++ b/arch/x86/include/asm/ce4100.h
@@ -4,4 +4,10 @@
int ce4100_pci_init(void);
+#ifdef CONFIG_SERIAL_8250
+void __init sdv_serial_fixup(void);
+#else
+static inline void sdv_serial_fixup(void) {};
+#endif
+
#endif
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 3e51ba459154..c40b9ebc1fb4 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -71,12 +71,10 @@
*
* __cfi_foo:
* endbr64
- * subl 0x12345678, %r10d
- * jz foo
- * ud2
- * nop
+ * subl 0x12345678, %eax
+ * jne.32,pn foo+3
* foo:
- * osp nop3 # was endbr64
+ * nopl -42(%rax) # was endbr64
* ... code here ...
* ret
*
@@ -86,9 +84,9 @@
* indirect caller:
* lea foo(%rip), %r11
* ...
- * movl $0x12345678, %r10d
- * subl $16, %r11
- * nop4
+ * movl $0x12345678, %eax
+ * lea -0x10(%r11), %r11
+ * nop5
* call *%r11
*
*/
@@ -113,11 +111,9 @@ extern bhi_thunk __bhi_args_end[];
struct pt_regs;
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
#define __bpfcall
-extern u32 cfi_bpf_hash;
-extern u32 cfi_bpf_subprog_hash;
static inline int cfi_get_offset(void)
{
@@ -135,6 +131,8 @@ static inline int cfi_get_offset(void)
#define cfi_get_offset cfi_get_offset
extern u32 cfi_get_func_hash(void *func);
+#define cfi_get_func_hash cfi_get_func_hash
+
extern int cfi_get_func_arity(void *func);
#ifdef CONFIG_FINEIBT
@@ -153,17 +151,11 @@ static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
return BUG_TRAP_TYPE_NONE;
}
-#define cfi_bpf_hash 0U
-#define cfi_bpf_subprog_hash 0U
-static inline u32 cfi_get_func_hash(void *func)
-{
- return 0;
-}
static inline int cfi_get_func_arity(void *func)
{
return 0;
}
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
#if HAS_KERNEL_IBT == 1
#define CFI_NOSEAL(x) asm(IBT_NOSEAL(__stringify(x)))
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index b61f32c3459f..a88b06f1c35e 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -166,8 +166,7 @@ extern void __add_wrong_size(void)
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
@@ -178,8 +177,7 @@ extern void __add_wrong_size(void)
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -190,8 +188,7 @@ extern void __add_wrong_size(void)
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
@@ -202,8 +199,7 @@ extern void __add_wrong_size(void)
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 371f7906019e..1f80a62be969 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -46,8 +46,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new
bool ret; \
\
asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
- CC_SET(e) \
- : CC_OUT(e) (ret), \
+ : "=@ccz" (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
@@ -125,8 +124,7 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
_lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
- CC_SET(e) \
- : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
+ : ALT_OUTPUT_SP("=@ccz" (ret), \
"+a" (o.low), "+d" (o.high)) \
: "b" (n.low), "c" (n.high), \
[ptr] "S" (_ptr) \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 71d1e72ed879..5afea056fb20 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -66,8 +66,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
bool ret; \
\
asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
- CC_SET(e) \
- : CC_OUT(e) (ret), \
+ : "=@ccz" (ret), \
[ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 893cbca37fe9..3ddc1d33399b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -30,7 +30,7 @@ enum cpuid_leafs
CPUID_6_EAX,
CPUID_8000_000A_EDX,
CPUID_7_ECX,
- CPUID_8000_0007_EBX,
+ CPUID_LNX_6,
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
@@ -101,6 +101,7 @@ static __always_inline bool _static_cpu_has(u16 bit)
asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
+ ANNOTATE_DATA_SPECIAL "\n"
" testb %[bitnum], %a[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ee176236c2be..c3b53beb1300 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -218,6 +218,7 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
@@ -313,13 +314,14 @@
#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_LASS (12*32+ 6) /* "lass" Linear Address Space Separation */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */
#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */
#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */
#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
@@ -337,6 +339,7 @@
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
+#define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
@@ -406,9 +409,12 @@
#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
-/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+/*
+ * Linux-defined word for use with scattered/synthetic bits.
+ */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+
#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
@@ -443,6 +449,7 @@
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
@@ -456,10 +463,14 @@
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
+#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
+
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
@@ -487,6 +498,21 @@
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */
+#define X86_FEATURE_SGX_EUPDATESVN (21*32+17) /* Support for ENCLS[EUPDATESVN] instruction */
+
+#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO (21*32+19) /*
+ * Clear CPU buffers before VM-Enter if the vCPU
+ * can access host MMIO (ignored for all intents
+ * and purposes if CLEAR_CPU_BUF_VM is set).
+ */
+#define X86_FEATURE_X2AVIC_EXT (21*32+20) /* AMD SVM x2AVIC support for 4k vCPUs */
/*
* BUG word(s)
@@ -542,5 +568,6 @@
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
-
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
deleted file mode 100644
index d5749b25fa10..000000000000
--- a/arch/x86/include/asm/cpuid.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _ASM_X86_CPUID_H
-#define _ASM_X86_CPUID_H
-
-#include <asm/cpuid/api.h>
-
-#endif /* _ASM_X86_CPUID_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 70f6b60ad67b..9df9e9cde670 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_CPUMASK_H
#define _ASM_X86_CPUMASK_H
#ifndef __ASSEMBLER__
+
+#include <linux/compiler.h>
#include <linux/cpumask.h>
extern void setup_cpu_local_masks(void);
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 363110e6b2e3..a2c1f2d24b64 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -9,6 +9,14 @@
#include <asm/cpufeature.h>
#include <asm/msr.h>
+/*
+ * Define bits that are always set to 1 in DR7, only bit 10 is
+ * architecturally reserved to '1'.
+ *
+ * This is also the init/reset value for DR7.
+ */
+#define DR7_FIXED_1 0x00000400
+
DECLARE_PER_CPU(unsigned long, cpu_dr7);
#ifndef CONFIG_PARAVIRT_XXL
@@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
static inline void hw_breakpoint_disable(void)
{
- /* Zero the control register for HW Breakpoint */
- set_debugreg(0UL, 7);
+ /* Reset the control register for HW Breakpoint */
+ set_debugreg(DR7_FIXED_1, 7);
/* Zero-out the individual HW breakpoint address registers */
set_debugreg(0UL, 0);
@@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void)
return 0;
get_debugreg(dr7, 7);
- dr7 &= ~0x400; /* architecturally set bit */
+
+ /* Architecturally set bit */
+ dr7 &= ~DR7_FIXED_1;
if (dr7)
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
+
/*
* Ensure the compiler doesn't lower the above statements into
* the critical section; disabling breakpoints late would not
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..30fd06ede751 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -60,6 +60,12 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
}
#define div_u64_rem div_u64_rem
+/*
+ * gcc tends to zero extend 32bit values and do full 64bit maths.
+ * Define asm functions that avoid this.
+ * (clang generates better code for the C versions.)
+ */
+#ifndef __clang__
static inline u64 mul_u32_u32(u32 a, u32 b)
{
u32 high, low;
@@ -71,6 +77,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
}
#define mul_u32_u32 mul_u32_u32
+static inline u64 add_u64_u32(u64 a, u32 b)
+{
+ u32 high = a >> 32, low = a;
+
+ asm ("addl %[b], %[low]; adcl $0, %[high]"
+ : [low] "+r" (low), [high] "+r" (high)
+ : [b] "rm" (b) );
+
+ return low | (u64)high << 32;
+}
+#define add_u64_u32 add_u64_u32
+#endif
+
/*
* __div64_32() is never called on x86, so prevent the
* generic definition from getting built.
@@ -84,21 +103,25 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
* Will generate an #DE when the result doesn't fit u64, could fix with an
* __ex_table[] entry when it becomes an issue.
*/
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div)
{
- u64 q;
+ u64 rdx;
+
+ asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul));
+
+ if (!statically_true(!add))
+ asm ("addq %[add], %[lo]; adcq $0, %[hi]" :
+ [lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add));
- asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" (mul), "rm" (div)
- : "rdx");
+ asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div));
- return q;
+ return rax;
}
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
{
- return mul_u64_u64_div_u64(a, mul, div);
+ return mul_u64_add_u64_div_u64(a, mul, 0, div);
}
#define mul_u64_u32_div mul_u64_u32_div
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index d535a97c7284..ce3eb6d5fdf9 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -93,6 +93,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
* 8 (ia32) bits.
*/
choose_random_kstack_offset(rdtsc());
+
+ /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
+ if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
+ this_cpu_read(x86_ibpb_exit_to_user)) {
+ indirect_branch_prediction_barrier();
+ this_cpu_write(x86_ibpb_exit_to_user, false);
+ }
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index 6ec3fc969ad5..e7a244051c62 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -10,6 +10,7 @@
#ifndef _ASM_X86_FLOPPY_H
#define _ASM_X86_FLOPPY_H
+#include <linux/sizes.h>
#include <linux/vmalloc.h>
/*
@@ -22,10 +23,7 @@
*/
#define _CROSS_64KB(a, s, vdma) \
(!(vdma) && \
- ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
-
-#define CROSS_64KB(a, s) _CROSS_64KB(a, s, use_virtual_dma & 1)
-
+ ((unsigned long)(a) / SZ_64K != ((unsigned long)(a) + (s) - 1) / SZ_64K))
#define SW fd_routine[use_virtual_dma & 1]
#define CSW fd_routine[can_use_virtual_dma & 1]
@@ -206,7 +204,7 @@ static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
{
#ifdef FLOPPY_SANITY_CHECK
- if (CROSS_64KB(addr, size)) {
+ if (_CROSS_64KB(addr, size, use_virtual_dma & 1)) {
printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
return -1;
}
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index c060549c6c94..89004f4ca208 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -11,7 +11,7 @@
extern void save_fpregs_to_fpstate(struct fpu *fpu);
extern void fpu__drop(struct task_struct *tsk);
-extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
+extern int fpu_clone(struct task_struct *dst, u64 clone_flags, bool minimal,
unsigned long shstk_addr);
extern void fpu_flush_thread(void);
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 1c94121acd3d..93e99d2583d6 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -118,7 +118,7 @@ enum xfeature {
XFEATURE_PKRU,
XFEATURE_PASID,
XFEATURE_CET_USER,
- XFEATURE_CET_KERNEL_UNUSED,
+ XFEATURE_CET_KERNEL,
XFEATURE_RSRVD_COMP_13,
XFEATURE_RSRVD_COMP_14,
XFEATURE_LBR,
@@ -142,7 +142,7 @@ enum xfeature {
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
#define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER)
-#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL_UNUSED)
+#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL)
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
@@ -269,6 +269,16 @@ struct cet_user_state {
};
/*
+ * State component 12 is Control-flow Enforcement supervisor states.
+ * This state includes SSP pointers for privilege levels 0 through 2.
+ */
+struct cet_supervisor_state {
+ u64 pl0_ssp;
+ u64 pl1_ssp;
+ u64 pl2_ssp;
+} __packed;
+
+/*
* State component 15: Architectural LBR configuration state.
* The size of Arch LBR state depends on the number of LBRs (lbr_depth).
*/
@@ -552,6 +562,31 @@ struct fpu_guest {
};
/*
+ * FPU state configuration data for fpu_guest.
+ * Initialized at boot time. Read only after init.
+ */
+struct vcpu_fpu_config {
+ /*
+ * @size:
+ *
+ * The default size of the register state buffer in guest FPUs.
+ * Includes all supported features except independent managed
+ * features and features which have to be requested by user space
+ * before usage.
+ */
+ unsigned int size;
+
+ /*
+ * @features:
+ *
+ * The default supported features bitmap in guest FPUs. Does not
+ * include independent managed features and features which have to
+ * be requested by user space before usage.
+ */
+ u64 features;
+};
+
+/*
* FPU state configuration data. Initialized at boot time. Read only after init.
*/
struct fpu_state_config {
@@ -567,8 +602,9 @@ struct fpu_state_config {
* @default_size:
*
* The default size of the register state buffer. Includes all
- * supported features except independent managed features and
- * features which have to be requested by user space before usage.
+ * supported features except independent managed features,
+ * guest-only features and features which have to be requested by
+ * user space before usage.
*/
unsigned int default_size;
@@ -584,8 +620,8 @@ struct fpu_state_config {
* @default_features:
*
* The default supported features bitmap. Does not include
- * independent managed features and features which have to
- * be requested by user space before usage.
+ * independent managed features, guest-only features and features
+ * which have to be requested by user space before usage.
*/
u64 default_features;
/*
@@ -606,5 +642,6 @@ struct fpu_state_config {
/* FPU state configuration information */
extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
+extern struct vcpu_fpu_config guest_default_cfg;
#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index b308a76afbb7..7a7dc9d56027 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -46,9 +46,13 @@
/* Features which are dynamically enabled for a process on request */
#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
+/* Supervisor features which are enabled only in guest FPUs */
+#define XFEATURE_MASK_GUEST_SUPERVISOR XFEATURE_MASK_CET_KERNEL
+
/* All currently supported supervisor features */
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
- XFEATURE_MASK_CET_USER)
+ XFEATURE_MASK_CET_USER | \
+ XFEATURE_MASK_GUEST_SUPERVISOR)
/*
* A supervisor state component may not always contain valuable information,
@@ -75,8 +79,7 @@
* Unsupported supervisor features. When a supervisor feature in this mask is
* supported in the future, move it to the supported supervisor feature mask.
*/
-#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \
- XFEATURE_MASK_CET_KERNEL)
+#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
/* All supervisor states including supported and unsupported states. */
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 12b34d5b2953..2bb65677c079 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -79,7 +79,7 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
.type = type,
.vector = vector,
.nmi = type == EVENT_TYPE_NMI,
- .lm = 1,
+ .l = 1,
};
asm_fred_entry_from_kvm(ss);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 93156ac4ffe0..b08c95872eed 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -56,6 +56,11 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return &arch_ftrace_regs(fregs)->regs;
}
+#define arch_ftrace_partial_regs(regs) do { \
+ regs->flags &= ~X86_EFLAGS_FIXED; \
+ regs->cs = __KERNEL_CS; \
+} while (0)
+
#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
(_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \
(_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 6e2458088800..fe5d9a10d900 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -46,38 +46,31 @@ do { \
} while(0)
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- u32 __user *uaddr)
+ u32 __user *uaddr)
{
- if (can_do_masked_user_access())
- uaddr = masked_user_access_begin(uaddr);
- else if (!user_access_begin(uaddr, sizeof(u32)))
- return -EFAULT;
-
- switch (op) {
- case FUTEX_OP_SET:
- unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault);
- break;
- case FUTEX_OP_ADD:
- unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval,
- uaddr, oparg, Efault);
- break;
- case FUTEX_OP_OR:
- unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault);
- break;
- case FUTEX_OP_ANDN:
- unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault);
- break;
- case FUTEX_OP_XOR:
- unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault);
- break;
- default:
- user_access_end();
- return -ENOSYS;
+ scoped_user_rw_access(uaddr, Efault) {
+ switch (op) {
+ case FUTEX_OP_SET:
+ unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault);
+ break;
+ case FUTEX_OP_ADD:
+ unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, uaddr, oparg, Efault);
+ break;
+ case FUTEX_OP_OR:
+ unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault);
+ break;
+ case FUTEX_OP_ANDN:
+ unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault);
+ break;
+ case FUTEX_OP_XOR:
+ unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault);
+ break;
+ default:
+ return -ENOSYS;
+ }
}
- user_access_end();
return 0;
Efault:
- user_access_end();
return -EFAULT;
}
@@ -86,21 +79,19 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
{
int ret = 0;
- if (can_do_masked_user_access())
- uaddr = masked_user_access_begin(uaddr);
- else if (!user_access_begin(uaddr, sizeof(u32)))
- return -EFAULT;
- asm volatile("\n"
- "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
- "2:\n"
- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0) \
- : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
- : "r" (newval), "1" (oldval)
- : "memory"
- );
- user_access_end();
- *uval = oldval;
+ scoped_user_rw_access(uaddr, Efault) {
+ asm_inline volatile("\n"
+ "1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
+ "2:\n"
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %0)
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+ : "r" (newval), "1" (oldval)
+ : "memory");
+ *uval = oldval;
+ }
return ret;
+Efault:
+ return -EFAULT;
}
#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index f00c09ffe6a9..6b6d472baa0b 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -5,7 +5,7 @@
#include <linux/threads.h>
typedef struct {
-#if IS_ENABLED(CONFIG_KVM_INTEL)
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
u8 kvm_cpu_l1tf_flush_l1d;
#endif
unsigned int __nmi_count; /* arch dependent */
@@ -68,7 +68,7 @@ extern u64 arch_irq_stat(void);
DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
#define local_softirq_pending_ref __softirq_pending
-#if IS_ENABLED(CONFIG_KVM_INTEL)
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
/*
* This function is called from noinstr interrupt contexts
* and must be inlined to not get instrumentation.
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 162ebd73a698..cbe19e669080 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -92,8 +92,6 @@ struct irq_cfg {
extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
#ifdef CONFIG_SMP
extern void vector_schedule_cleanup(struct irq_cfg *);
extern void irq_complete_move(struct irq_cfg *cfg);
@@ -101,12 +99,16 @@ extern void irq_complete_move(struct irq_cfg *cfg);
static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif
-
extern void apic_ack_edge(struct irq_data *data);
-#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
+#else
static inline void lock_vector_lock(void) {}
static inline void unlock_vector_lock(void) {}
-#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+#endif
/* Statistics */
extern atomic_t irq_err_count;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e41cbf2ec41d..9ad86a7d13f6 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -30,6 +30,7 @@ enum x86_hypervisor_type {
X86_HYPER_KVM,
X86_HYPER_JAILHOUSE,
X86_HYPER_ACRN,
+ X86_HYPER_BHYVE,
};
#ifdef CONFIG_HYPERVISOR_GUEST
@@ -64,6 +65,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_kvm;
extern const struct hypervisor_x86 x86_hyper_jailhouse;
extern const struct hypervisor_x86 x86_hyper_acrn;
+extern const struct hypervisor_x86 x86_hyper_bhyve;
extern struct hypervisor_x86 x86_hyper_xen_hvm;
extern bool nopv;
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
index 28d845257303..5e45d6424722 100644
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -59,10 +59,10 @@ static __always_inline __attribute_const__ u32 gen_endbr(void)
static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
{
/*
- * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
- * will be unique to (former) ENDBR sites.
+ * 4 byte NOP that isn't NOP4, such that it will be unique to (former)
+ * ENDBR sites. Additionally it carries UDB as immediate.
*/
- return 0x001f0f66; /* osp nopl (%rax) */
+ return 0xd6401f0f; /* nopl -42(%rax) */
}
static inline bool __is_endbr(u32 val)
@@ -70,10 +70,6 @@ static inline bool __is_endbr(u32 val)
if (val == gen_endbr_poison())
return true;
- /* See cfi_fineibt_bhi_preamble() */
- if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5)
- return true;
-
val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
return val == gen_endbr();
}
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a4ec27c67988..3218770670d3 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -393,7 +393,7 @@ static __always_inline void __##func(struct pt_regs *regs)
/**
* DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
- when raised from kernel mode
+ * when raised from kernel mode
* @func: Function name of the entry point
*
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
@@ -403,7 +403,7 @@ static __always_inline void __##func(struct pt_regs *regs)
/**
* DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
- when raised from user mode
+ * when raised from user mode
* @func: Function name of the entry point
*
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
@@ -460,17 +460,12 @@ __visible noinstr void func(struct pt_regs *regs, \
#endif
void idt_install_sysvec(unsigned int n, const void *function);
-
-#ifdef CONFIG_X86_FRED
void fred_install_sysvec(unsigned int vector, const idtentry_t function);
-#else
-static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { }
-#endif
#define sysvec_install(vector, function) { \
- if (cpu_feature_enabled(X86_FEATURE_FRED)) \
+ if (IS_ENABLED(CONFIG_X86_FRED)) \
fred_install_sysvec(vector, function); \
- else \
+ if (!cpu_feature_enabled(X86_FEATURE_FRED)) \
idt_install_sysvec(vector, asm_##function); \
}
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 97f341777db5..1b3060a3425c 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -37,6 +37,8 @@
#define INAT_PFX_EVEX 15 /* EVEX prefix */
/* x86-64 REX2 prefix */
#define INAT_PFX_REX2 16 /* 0xD5 */
+/* AMD XOP prefix */
+#define INAT_PFX_XOP 17 /* 0x8F */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -77,6 +79,7 @@
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_XOPOK INAT_VEXOK
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
@@ -111,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
insn_byte_t vex_pp);
+extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode,
+ insn_byte_t map_select);
/* Attribute checking functions */
static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -164,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
}
+static inline int inat_is_xop_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_XOP;
+}
+
static inline int inat_is_escape(insn_attr_t attr)
{
return attr & INAT_ESC_MASK;
@@ -229,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
return attr & INAT_VEXOK;
}
+static inline int inat_accept_xop(insn_attr_t attr)
+{
+ return attr & INAT_XOPOK;
+}
+
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & (INAT_VEXONLY | INAT_EVEXONLY);
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 8b1b1abcef15..01ccdd168df0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,12 +2,6 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
-#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
-#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector
-#else
-#define __head __section(".head.text") __no_sanitize_undefined
-#endif
-
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void (*free_pgt_page)(void *, void *); /* free buf for page table */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 54368a43abf6..4733e9064ee5 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -44,4 +44,6 @@ enum insn_mmio_type {
enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+bool insn_is_nop(struct insn *insn);
+
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 7152ea809e6a..846d21c1a7f8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -71,7 +71,10 @@ struct insn {
* prefixes.bytes[3]: last prefix
*/
struct insn_field rex_prefix; /* REX prefix */
- struct insn_field vex_prefix; /* VEX prefix */
+ union {
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field xop_prefix; /* XOP prefix */
+ };
struct insn_field opcode; /*
* opcode.bytes[0]: opcode1
* opcode.bytes[1]: opcode2
@@ -135,6 +138,17 @@ struct insn {
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+/* XOP bit fields */
+#define X86_XOP_R(xop) ((xop) & 0x80) /* XOP Byte2 */
+#define X86_XOP_X(xop) ((xop) & 0x40) /* XOP Byte2 */
+#define X86_XOP_B(xop) ((xop) & 0x20) /* XOP Byte2 */
+#define X86_XOP_M(xop) ((xop) & 0x1f) /* XOP Byte2 */
+#define X86_XOP_W(xop) ((xop) & 0x80) /* XOP Byte3 */
+#define X86_XOP_V(xop) ((xop) & 0x78) /* XOP Byte3 */
+#define X86_XOP_L(xop) ((xop) & 0x04) /* XOP Byte3 */
+#define X86_XOP_P(xop) ((xop) & 0x03) /* XOP Byte3 */
+#define X86_XOP_M_MIN 0x08 /* Min of XOP.M */
+#define X86_XOP_M_MAX 0x1f /* Max of XOP.M */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern int insn_get_prefixes(struct insn *insn);
@@ -178,7 +192,7 @@ static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
return X86_REX2_M(insn->rex_prefix.bytes[1]);
}
-static inline int insn_is_avx(struct insn *insn)
+static inline int insn_is_avx_or_xop(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
@@ -192,6 +206,22 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
+/* If we already know this is AVX/XOP encoded */
+static inline int avx_insn_is_xop(struct insn *insn)
+{
+ insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]);
+
+ return inat_is_xop_prefix(attr);
+}
+
+static inline int insn_is_xop(struct insn *insn)
+{
+ if (!insn_is_avx_or_xop(insn))
+ return 0;
+
+ return avx_insn_is_xop(insn);
+}
+
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
@@ -222,11 +252,26 @@ static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
return X86_VEX_W(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_xop_map_bits(struct insn *insn)
+{
+ if (insn->xop_prefix.nbytes < 3) /* XOP is 3 bytes */
+ return 0;
+ return X86_XOP_M(insn->xop_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_xop_p_bits(struct insn *insn)
+{
+ return X86_XOP_P(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
- if (insn_is_avx(insn))
+ if (insn_is_avx_or_xop(insn)) {
+ if (avx_insn_is_xop(insn))
+ return insn_xop_p_bits(insn);
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+ }
if (insn->prefixes.bytes[3])
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
@@ -267,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn)
/**
* for_each_insn_prefix() -- Iterate prefixes in the instruction
* @insn: Pointer to struct insn.
- * @idx: Index storage.
* @prefix: Prefix byte.
*
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
@@ -276,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn)
* Since prefixes.nbytes can be bigger than 4 if some prefixes
* are repeated, it cannot be used for looping over the prefixes.
*/
-#define for_each_insn_prefix(insn, idx, prefix) \
- for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+#define for_each_insn_prefix(insn, prefix) \
+ for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
#define POP_SS_OPCODE 0x1f
#define MOV_SREG_OPCODE 0x8e
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index be10c188614f..950bfd006905 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -51,7 +51,7 @@
#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */
#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
-/* Family 6 */
+/* Family 6, 18, 19 */
#define INTEL_PENTIUM_PRO IFM(6, 0x01)
#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03)
#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05)
@@ -126,6 +126,8 @@
#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */
#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
+#define INTEL_DIAMONDRAPIDS_X IFM(19, 0x01) /* Panther Cove */
+
#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */
/* "Hybrid" Processors (P-Core/E-Core) */
@@ -148,7 +150,12 @@
#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */
-#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */
+#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Darkmont */
+
+#define INTEL_WILDCATLAKE_L IFM(6, 0xD5)
+
+#define INTEL_NOVALAKE IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */
+#define INTEL_NOVALAKE_L IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */
/* "Small Core" Processors (Atom/E-Core) */
@@ -198,9 +205,6 @@
#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04)
#define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */
-/* Family 19 */
-#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */
-
/*
* Intel CPU core types
*
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 5dbeac48a5b9..695f87efbeb8 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -4,7 +4,15 @@
#include <linux/percpu-defs.h>
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SHIFT 4
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
+
+/*
+ * The largest PEBS record could consume a page, ensure
+ * a record at least can be written after triggering PMI.
+ */
+#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
+#define ARCH_PEBS_THRESH_SINGLE 1
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS_FMT4 8
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 43b7657febca..944637a4e6de 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -59,18 +59,6 @@ struct telemetry_plt_config {
};
struct telemetry_core_ops {
- int (*get_sampling_period)(u8 *pss_min_period, u8 *pss_max_period,
- u8 *ioss_min_period, u8 *ioss_max_period);
-
- int (*get_eventconfig)(struct telemetry_evtconfig *pss_evtconfig,
- struct telemetry_evtconfig *ioss_evtconfig,
- int pss_len, int ioss_len);
-
- int (*update_events)(struct telemetry_evtconfig pss_evtconfig,
- struct telemetry_evtconfig ioss_evtconfig);
-
- int (*set_sampling_period)(u8 pss_period, u8 ioss_period);
-
int (*get_trace_verbosity)(enum telemetry_unit telem_unit,
u32 *verbosity);
@@ -84,11 +72,6 @@ struct telemetry_core_ops {
int (*read_eventlog)(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog,
int len, int log_all_evts);
-
- int (*add_events)(u8 num_pss_evts, u8 num_ioss_evts,
- u32 *pss_evtmap, u32 *ioss_evtmap);
-
- int (*reset_events)(void);
};
int telemetry_set_pltdata(const struct telemetry_core_ops *ops,
@@ -101,35 +84,15 @@ struct telemetry_plt_config *telemetry_get_pltdata(void);
int telemetry_get_evtname(enum telemetry_unit telem_unit,
const char **name, int len);
-int telemetry_update_events(struct telemetry_evtconfig pss_evtconfig,
- struct telemetry_evtconfig ioss_evtconfig);
-
-int telemetry_add_events(u8 num_pss_evts, u8 num_ioss_evts,
- u32 *pss_evtmap, u32 *ioss_evtmap);
-
-int telemetry_reset_events(void);
-
-int telemetry_get_eventconfig(struct telemetry_evtconfig *pss_config,
- struct telemetry_evtconfig *ioss_config,
- int pss_len, int ioss_len);
-
int telemetry_read_events(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog, int len);
-int telemetry_raw_read_events(enum telemetry_unit telem_unit,
- struct telemetry_evtlog *evtlog, int len);
-
int telemetry_read_eventlog(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog, int len);
int telemetry_raw_read_eventlog(enum telemetry_unit telem_unit,
struct telemetry_evtlog *evtlog, int len);
-int telemetry_get_sampling_period(u8 *pss_min_period, u8 *pss_max_period,
- u8 *ioss_min_period, u8 *ioss_max_period);
-
-int telemetry_set_sampling_period(u8 pss_period, u8 ioss_period);
-
int telemetry_set_trace_verbosity(enum telemetry_unit telem_unit,
u32 verbosity);
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5036f13ab69f..4e55d1755846 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,7 +26,22 @@ enum {
IRQ_REMAP_X2APIC_MODE,
};
-struct vcpu_data {
+/*
+ * This is mainly used to communicate information back-and-forth
+ * between SVM and IOMMU for setting up and tearing down posted
+ * interrupt
+ */
+struct amd_iommu_pi_data {
+ u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */
+ u32 ga_tag;
+ u32 vector; /* Guest vector of the interrupt */
+ int cpu;
+ bool ga_log_intr;
+ bool is_guest_mode;
+ void *ir_data;
+};
+
+struct intel_iommu_pi_data {
u64 pi_desc_addr; /* Physical address of PI Descriptor */
u32 vector; /* Guest vector of the interrupt */
};
@@ -72,4 +87,11 @@ static inline void panic_if_irq_remap(const char *msg)
}
#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_POSTED_MSI
+void intel_ack_posted_msi_irq(struct irq_data *irqd);
+#else
+#define intel_ack_posted_msi_irq NULL
+#endif
+
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 735c3a491f60..8325b79f2ac6 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -101,7 +101,7 @@
#define ASM_CALL_ARG0 \
"1: call %c[__func] \n" \
- ANNOTATE_REACHABLE(1b)
+ ANNOTATE_REACHABLE(1b) " \n"
#define ASM_CALL_ARG1 \
"movq %[arg1], %%rdi \n" \
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 9a9b21b78905..a1193e9d65f2 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -25,7 +25,7 @@ extern __always_inline unsigned long native_save_fl(void)
*/
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
- : "=rm" (flags)
+ : ASM_OUTPUT_RM (flags)
: /* no input */
: "memory");
@@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void)
static __always_inline void native_safe_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
static __always_inline void native_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 61dd1dee7812..05b16299588d 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -15,6 +15,7 @@
#define JUMP_TABLE_ENTRY(key, label) \
".pushsection __jump_table, \"aw\" \n\t" \
_ASM_ALIGN "\n\t" \
+ ANNOTATE_DATA_SPECIAL "\n" \
".long 1b - . \n\t" \
".long " label " - . \n\t" \
_ASM_PTR " " key " - . \n\t" \
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index f2ad77929d6e..5cfb27f26583 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -13,6 +13,15 @@
# define KEXEC_DEBUG_EXC_HANDLER_SIZE 6 /* PUSHI, PUSHI, 2-byte JMP */
#endif
+#ifdef CONFIG_X86_64
+
+#include <linux/bits.h>
+
+#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0)
+#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1)
+
+#endif
+
# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
@@ -121,8 +130,7 @@ typedef unsigned long
relocate_kernel_fn(unsigned long indirection_page,
unsigned long pa_control_page,
unsigned long start_address,
- unsigned int preserve_context,
- unsigned int host_mem_enc_active);
+ unsigned int flags);
#endif
extern relocate_kernel_fn relocate_kernel;
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 8d50e3e0a19b..de709fb5bd76 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -49,7 +49,6 @@ KVM_X86_OP(set_idt)
KVM_X86_OP(get_gdt)
KVM_X86_OP(set_gdt)
KVM_X86_OP(sync_dirty_debug_regs)
-KVM_X86_OP(set_dr6)
KVM_X86_OP(set_dr7)
KVM_X86_OP(cache_reg)
KVM_X86_OP(get_rflags)
@@ -112,7 +111,7 @@ KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
KVM_X86_OP_OPTIONAL(pi_update_irte)
-KVM_X86_OP_OPTIONAL(pi_start_assignment)
+KVM_X86_OP_OPTIONAL(pi_start_bypass)
KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
@@ -129,6 +128,7 @@ KVM_X86_OP(enable_smi_window)
KVM_X86_OP_OPTIONAL(dev_get_attr)
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(vcpu_mem_enc_unlocked_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
@@ -139,14 +139,14 @@ KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
KVM_X86_OP_OPTIONAL(migrate_timers)
-KVM_X86_OP(msr_filter_changed)
+KVM_X86_OP(recalc_intercepts)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
-KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level)
+KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level)
KVM_X86_OP_OPTIONAL(gmem_invalidate)
#undef KVM_X86_OP
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b4a391929cdb..5a3bfa293e8b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
+#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
@@ -119,7 +120,7 @@
#define KVM_REQ_TLB_FLUSH_GUEST \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
-#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
+#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
@@ -141,7 +142,7 @@
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
- | X86_CR4_LAM_SUP))
+ | X86_CR4_LAM_SUP | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -249,7 +250,6 @@ enum x86_intercept_stage;
#define DR7_BP_EN_MASK 0x000000ff
#define DR7_GE (1 << 9)
#define DR7_GD (1 << 13)
-#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff2bff
#define KVM_GUESTDBG_VALID_MASK \
@@ -267,6 +267,7 @@ enum x86_intercept_stage;
#define PFERR_RSVD_MASK BIT(3)
#define PFERR_FETCH_MASK BIT(4)
#define PFERR_PK_MASK BIT(5)
+#define PFERR_SS_MASK BIT(6)
#define PFERR_SGX_MASK BIT(15)
#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
@@ -297,6 +298,7 @@ enum x86_intercept_stage;
*/
#define KVM_APIC_PV_EOI_PENDING 1
+struct kvm_kernel_irqfd;
struct kvm_kernel_irq_routing_entry;
/*
@@ -544,10 +546,10 @@ struct kvm_pmc {
#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
KVM_MAX_NR_AMD_GP_COUNTERS)
-#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
-#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
-#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
- KVM_MAX_NR_AMD_FIXED_COUTNERS)
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
+#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
+#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
+ KVM_MAX_NR_AMD_FIXED_COUNTERS)
struct kvm_pmu {
u8 version;
@@ -578,6 +580,9 @@ struct kvm_pmu {
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
+
u64 ds_area;
u64 pebs_enable;
u64 pebs_enable_rsvd;
@@ -700,8 +705,13 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
- /* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+ /*
+ * Preallocated buffers for handling hypercalls that pass sparse vCPU
+ * sets (for high vCPU counts, they're too large to comfortably fit on
+ * the stack).
+ */
u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct hv_vp_assist_page vp_assist_page;
@@ -764,6 +774,8 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
+ CPUID_8000_0021_ECX,
+ CPUID_7_1_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -804,7 +816,6 @@ struct kvm_vcpu_arch {
bool at_instruction_boundary;
bool tpr_access_reporting;
bool xfd_no_write_intercept;
- u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
u64 perf_capabilities;
@@ -865,6 +876,8 @@ struct kvm_vcpu_arch {
u64 xcr0;
u64 guest_supported_xcr0;
+ u64 ia32_xss;
+ u64 guest_supported_xss;
struct kvm_pio_request pio;
void *pio_data;
@@ -919,6 +932,7 @@ struct kvm_vcpu_arch {
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
unsigned long cui_linear_rip;
+ int cui_rdmsr_imm_reg;
gpa_t time;
s8 pvclock_tsc_shift;
@@ -1041,9 +1055,6 @@ struct kvm_vcpu_arch {
/* be preempted when it's in kernel-mode(cpl=0) */
bool preempted_in_kernel;
- /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
- bool l1tf_flush_l1d;
-
/* Host CPU on which VM-entry was most recently attempted */
int last_vmentry_cpu;
@@ -1314,6 +1325,12 @@ enum kvm_apicv_inhibit {
*/
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
+ /*
+ * AVIC is disabled because the vCPU's APIC ID is beyond the max
+ * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable.
+ */
+ APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG,
+
NR_APICV_INHIBIT_REASONS,
};
@@ -1332,20 +1349,10 @@ enum kvm_apicv_inhibit {
__APICV_INHIBIT_REASON(IRQWIN), \
__APICV_INHIBIT_REASON(PIT_REINJ), \
__APICV_INHIBIT_REASON(SEV), \
- __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
+ __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
+ __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
-struct kvm_arch {
- unsigned long n_used_mmu_pages;
- unsigned long n_requested_mmu_pages;
- unsigned long n_max_mmu_pages;
- unsigned int indirect_shadow_pages;
- u8 mmu_valid_gen;
- u8 vm_type;
- bool has_private_mem;
- bool has_protected_state;
- bool pre_fault_allowed;
- struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
- struct list_head active_mmu_pages;
+struct kvm_possible_nx_huge_pages {
/*
* A list of kvm_mmu_page structs that, if zapped, could possibly be
* replaced by an NX huge page. A shadow page is on this list if its
@@ -1357,7 +1364,32 @@ struct kvm_arch {
* guest attempts to execute from the region then KVM obviously can't
* create an NX huge page (without hanging the guest).
*/
- struct list_head possible_nx_huge_pages;
+ struct list_head pages;
+ u64 nr_pages;
+};
+
+enum kvm_mmu_type {
+ KVM_SHADOW_MMU,
+#ifdef CONFIG_X86_64
+ KVM_TDP_MMU,
+#endif
+ KVM_NR_MMU_TYPES,
+};
+
+struct kvm_arch {
+ unsigned long n_used_mmu_pages;
+ unsigned long n_requested_mmu_pages;
+ unsigned long n_max_mmu_pages;
+ unsigned int indirect_shadow_pages;
+ u8 mmu_valid_gen;
+ u8 vm_type;
+ bool has_private_mem;
+ bool has_protected_state;
+ bool has_protected_eoi;
+ bool pre_fault_allowed;
+ struct hlist_head *mmu_page_hash;
+ struct list_head active_mmu_pages;
+ struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
struct kvm_page_track_notifier_head track_notifier_head;
#endif
@@ -1373,11 +1405,13 @@ struct kvm_arch {
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
atomic_t noncoherent_dma_count;
-#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
- atomic_t assigned_device_count;
+ unsigned long nr_possible_bypass_irqs;
+
+#ifdef CONFIG_KVM_IOAPIC
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
+#endif
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
struct kvm_apic_map __rcu *apic_map;
@@ -1392,12 +1426,8 @@ struct kvm_arch {
gpa_t wall_clock;
- bool mwait_in_guest;
- bool hlt_in_guest;
- bool pause_in_guest;
- bool cstate_in_guest;
+ u64 disabled_exits;
- unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
/*
@@ -1423,11 +1453,6 @@ struct kvm_arch {
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
- struct delayed_work kvmclock_update_work;
- struct delayed_work kvmclock_sync_work;
-
- /* reads protected by irq_srcu, writes by irq_lock */
- struct hlist_head mask_notifier_list;
#ifdef CONFIG_KVM_HYPERV
struct kvm_hv hyperv;
@@ -1451,6 +1476,7 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+ bool has_mapped_host_mmio;
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
@@ -1516,7 +1542,7 @@ struct kvm_arch {
* is held in read mode:
* - tdp_mmu_roots (above)
* - the link field of kvm_mmu_page structs used by the TDP MMU
- * - possible_nx_huge_pages;
+ * - possible_nx_huge_pages[KVM_TDP_MMU];
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
* by the TDP MMU
* Because the lock is only taken within the MMU lock, strictly
@@ -1674,6 +1700,12 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
}
+enum kvm_x86_run_flags {
+ KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0),
+ KVM_RUN_LOAD_GUEST_DR6 = BIT(1),
+ KVM_RUN_LOAD_DEBUGCTL = BIT(2),
+};
+
struct kvm_x86_ops {
const char *name;
@@ -1702,6 +1734,12 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ /*
+ * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to
+ * match the host's value even while the guest is active.
+ */
+ const u64 HOST_OWNED_DEBUGCTL;
+
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -1724,7 +1762,6 @@ struct kvm_x86_ops {
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
- void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -1755,7 +1792,7 @@ struct kvm_x86_ops {
int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
- bool force_immediate_exit);
+ u64 run_flags);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1806,15 +1843,15 @@ struct kvm_x86_ops {
void *external_spt);
/* Update the external page table from spte getting set. */
int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
- kvm_pfn_t pfn_for_gfn);
+ u64 mirror_spte);
/* Update external page tables for page table about to be freed. */
int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
void *external_spt);
/* Update external page table from spte getting removed, and flush TLB. */
- int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
- kvm_pfn_t pfn_for_gfn);
+ void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ u64 mirror_spte);
bool (*has_wbinvd_exit)(void);
@@ -1847,9 +1884,10 @@ struct kvm_x86_ops {
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
- int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set);
- void (*pi_start_assignment)(struct kvm *kvm);
+ int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
+ unsigned int host_irq, uint32_t guest_irq,
+ struct kvm_vcpu *vcpu, u32 vector);
+ void (*pi_start_bypass)(struct kvm *kvm);
void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
@@ -1871,6 +1909,7 @@ struct kvm_x86_ops {
int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
+ int (*vcpu_mem_enc_unlocked_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
@@ -1886,7 +1925,7 @@ struct kvm_x86_ops {
int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
void (*migrate_timers)(struct kvm_vcpu *vcpu);
- void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+ void (*recalc_intercepts)(struct kvm_vcpu *vcpu);
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
@@ -1900,7 +1939,7 @@ struct kvm_x86_ops {
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
- int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn);
+ int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
};
struct kvm_x86_nested_ops {
@@ -1944,6 +1983,7 @@ struct kvm_arch_async_pf {
extern u32 __read_mostly kvm_nr_uret_msrs;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern bool __read_mostly enable_apicv;
+extern bool __read_mostly enable_ipiv;
extern bool __read_mostly enable_device_posted_irqs;
extern struct kvm_x86_ops kvm_x86_ops;
@@ -1962,7 +2002,7 @@ void kvm_x86_vendor_exit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
- return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT);
}
#define __KVM_HAVE_ARCH_VM_FREE
@@ -2007,7 +2047,7 @@ void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
@@ -2038,19 +2078,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
-struct kvm_irq_mask_notifier {
- void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
- int irq;
- struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
- struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
- struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
- bool mask);
-
extern bool tdp_enabled;
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -2112,6 +2139,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* the gfn, i.e. retrying the instruction will hit a
* !PRESENT fault, which results in a new shadow page
* and sends KVM back to square one.
+ *
+ * EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip
+ * an instruction if it could generate a given software
+ * interrupt, which must be encoded via
+ * EMULTYPE_SET_SOFT_INT_VECTOR().
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
@@ -2122,6 +2154,10 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_PF (1 << 6)
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+#define EMULTYPE_SKIP_SOFT_INT (1 << 9)
+
+#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16)
+#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff)
static inline bool kvm_can_emulate_event_vectoring(int emul_type)
{
@@ -2136,16 +2172,20 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
+void kvm_prepare_unexpected_reason_exit(struct kvm_vcpu *vcpu, u64 exit_reason);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
-int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
-int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
int kvm_emulate_invd(struct kvm_vcpu *vcpu);
int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
@@ -2177,6 +2217,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -2209,9 +2250,6 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state);
}
-int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
-void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
-
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
@@ -2269,10 +2307,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
int tdp_max_root_level, int tdp_huge_page_level);
-#ifdef CONFIG_KVM_PRIVATE_MEM
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
-#else
-#define kvm_arch_has_private_mem(kvm) false
#endif
#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
@@ -2348,7 +2384,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
int kvm_add_user_return_msr(u32 msr);
int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
-void kvm_user_return_msr_update_cache(unsigned int index, u64 val);
+u64 kvm_get_user_return_msr(unsigned int slot);
static inline bool kvm_is_supported_user_return_msr(u32 msr)
{
@@ -2385,12 +2421,6 @@ void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
-bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
- struct kvm_vcpu **dest_vcpu);
-
-void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
- struct kvm_lapic_irq *irq);
-
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
{
/* We can only post Fixed and LowPrio IRQs */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 57bc74e112f2..4a47c16e2df8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -124,7 +124,6 @@ bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
-void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
@@ -148,7 +147,6 @@ static inline void kvm_spinlock_init(void)
#else /* CONFIG_KVM_GUEST */
#define kvm_async_pf_task_wait_schedule(T) do {} while(0)
-#define kvm_async_pf_task_wake(T) do {} while(0)
static inline bool kvm_para_available(void)
{
diff --git a/arch/x86/include/asm/kvm_types.h b/arch/x86/include/asm/kvm_types.h
index 08f1b57d3b62..d7c704ed1be9 100644
--- a/arch/x86/include/asm/kvm_types.h
+++ b/arch/x86/include/asm/kvm_types.h
@@ -2,6 +2,21 @@
#ifndef _ASM_X86_KVM_TYPES_H
#define _ASM_X86_KVM_TYPES_H
+#if IS_MODULE(CONFIG_KVM_AMD) && IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-amd,kvm-intel
+#elif IS_MODULE(CONFIG_KVM_AMD)
+#define KVM_SUB_MODULES kvm-amd
+#elif IS_MODULE(CONFIG_KVM_INTEL)
+#define KVM_SUB_MODULES kvm-intel
+#else
+#undef KVM_SUB_MODULES
+/*
+ * Don't export symbols for KVM without vendor modules, as kvm.ko is built iff
+ * at least one vendor module is enabled.
+ */
+#define EXPORT_SYMBOL_FOR_KVM(symbol)
+#endif
+
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
#endif /* _ASM_X86_KVM_TYPES_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6c77c03139f7..2d98886de09a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -48,6 +48,7 @@
/* AMD-specific bits */
#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
+#define MCI_STATUS_PADDRV BIT_ULL(54) /* Valid System Physical Address */
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
@@ -62,6 +63,7 @@
*/
#define MCI_CONFIG_MCAX 0x1
#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
+#define MCI_CONFIG_PADDRV BIT_ULL(11)
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF
@@ -166,6 +168,12 @@
#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
/*
+ * Indicates that handler should check and clear Deferred error registers
+ * rather than common ones.
+ */
+#define MCE_CHECK_DFR_REGS BIT_ULL(8)
+
+/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
* debugging tools. Each entry is only valid when its finished flag
@@ -241,12 +249,14 @@ struct cper_ia_proc_ctx;
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
+void mca_bsp_init(struct cpuinfo_x86 *c);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id);
#else
static inline int mcheck_init(void) { return 0; }
+static inline void mca_bsp_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
@@ -290,8 +300,7 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
MCP_TIMESTAMP = BIT(0), /* log time stamp */
MCP_UC = BIT(1), /* log uncorrected errors */
- MCP_DONTLOG = BIT(2), /* only clear, don't log */
- MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
+ MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */
};
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
@@ -301,6 +310,12 @@ DECLARE_PER_CPU(struct mce, injectm);
/* Disable CMCI/polling for MCA bank claimed by firmware */
extern void mce_disable_bank(int bank);
+#ifdef CONFIG_X86_MCE_THRESHOLD
+void mce_save_apei_thr_limit(u32 thr_limit);
+#else
+static inline void mce_save_apei_thr_limit(u32 thr_limit) { }
+#endif /* CONFIG_X86_MCE_THRESHOLD */
+
/*
* Exception handler
*/
@@ -371,15 +386,9 @@ enum smca_bank_types {
extern bool amd_mce_is_memory_error(struct mce *m);
-extern int mce_threshold_create_device(unsigned int cpu);
-extern int mce_threshold_remove_device(unsigned int cpu);
-
void mce_amd_feature_init(struct cpuinfo_x86 *c);
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank);
#else
-
-static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
-static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index e988bac0a4a1..3c2de4ce3b10 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,12 +5,20 @@
#include <asm-generic/module.h>
#include <asm/orc_types.h>
+struct its_array {
+#ifdef CONFIG_MITIGATION_ITS
+ void **pages;
+ int num;
+#endif
+};
+
struct mod_arch_specific {
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
+ struct its_array its_pages;
};
#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e1752ba47e67..eef4c3a5ba28 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,10 +6,12 @@
#include <linux/nmi.h>
#include <linux/msi.h>
#include <linux/io.h>
+#include <linux/static_call.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
#include <asm/msr.h>
#include <hyperv/hvhdk.h>
+#include <asm/fpu/types.h>
/*
* Hyper-V always provides a single IO-APIC at this MMIO address.
@@ -39,16 +41,21 @@ static inline unsigned char hv_get_nmi_reason(void)
return 0;
}
-#if IS_ENABLED(CONFIG_HYPERV)
-extern bool hyperv_paravisor_present;
+extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
+#if IS_ENABLED(CONFIG_HYPERV)
extern void *hv_hypercall_pg;
extern union hv_ghcb * __percpu *hv_ghcb_pg;
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
-u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
+#endif
/*
* DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
@@ -65,37 +72,15 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
- u64 hv_status;
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
- return hv_tdx_hypercall(control, input_address, output_address);
-
- if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %[output_address], %%r8\n"
- "vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input_address)
- : [output_address] "r" (output_address)
- : "cc", "memory", "r8", "r9", "r10", "r11");
- return hv_status;
- }
-
- if (!hv_hypercall_pg)
- return U64_MAX;
-
- __asm__ __volatile__("mov %[output_address], %%r8\n"
- CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input_address)
- : [output_address] "r" (output_address),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "memory", "r8", "r9", "r10", "r11");
+ return static_call_mod(hv_hypercall)(control, input_address, output_address);
#else
u32 input_address_hi = upper_32_bits(input_address);
u32 input_address_lo = lower_32_bits(input_address);
u32 output_address_hi = upper_32_bits(output_address);
u32 output_address_lo = lower_32_bits(output_address);
+ u64 hv_status;
if (!hv_hypercall_pg)
return U64_MAX;
@@ -108,54 +93,30 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
"D"(output_address_hi), "S"(output_address_lo),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
-#endif /* !x86_64 */
return hv_status;
-}
-
-/* Hypercall to the L0 hypervisor */
-static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
-{
- return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+#endif /* !x86_64 */
}
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
- u64 hv_status;
-
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
- return hv_tdx_hypercall(control, input1, 0);
-
- if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__(
- "vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- :: "cc", "r8", "r9", "r10", "r11");
- } else {
- __asm__ __volatile__(CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : THUNK_TARGET(hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
- }
+ return static_call_mod(hv_hypercall)(control, input1, 0);
#else
- {
- u32 input1_hi = upper_32_bits(input1);
- u32 input1_lo = lower_32_bits(input1);
-
- __asm__ __volatile__ (CALL_NOSPEC
- : "=A"(hv_status),
- "+c"(input1_lo),
- ASM_CALL_CONSTRAINT
- : "A" (control),
- "b" (input1_hi),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "edi", "esi");
- }
-#endif
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u64 hv_status;
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo),
+ ASM_CALL_CONSTRAINT
+ : "A" (control),
+ "b" (input1_hi),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc", "edi", "esi");
return hv_status;
+#endif
}
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
@@ -165,55 +126,27 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return _hv_do_fast_hypercall8(control, input1);
}
-static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
-{
- u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
-
- return _hv_do_fast_hypercall8(control, input1);
-}
-
/* Fast hypercall with 16 bytes of input */
static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
{
- u64 hv_status;
-
#ifdef CONFIG_X86_64
- if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
- return hv_tdx_hypercall(control, input1, input2);
-
- if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %[input2], %%r8\n"
- "vmmcall"
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : [input2] "r" (input2)
- : "cc", "r8", "r9", "r10", "r11");
- } else {
- __asm__ __volatile__("mov %[input2], %%r8\n"
- CALL_NOSPEC
- : "=a" (hv_status), ASM_CALL_CONSTRAINT,
- "+c" (control), "+d" (input1)
- : [input2] "r" (input2),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
- }
+ return static_call_mod(hv_hypercall)(control, input1, input2);
#else
- {
- u32 input1_hi = upper_32_bits(input1);
- u32 input1_lo = lower_32_bits(input1);
- u32 input2_hi = upper_32_bits(input2);
- u32 input2_lo = lower_32_bits(input2);
-
- __asm__ __volatile__ (CALL_NOSPEC
- : "=A"(hv_status),
- "+c"(input1_lo), ASM_CALL_CONSTRAINT
- : "A" (control), "b" (input1_hi),
- "D"(input2_hi), "S"(input2_lo),
- THUNK_TARGET(hv_hypercall_pg)
- : "cc");
- }
-#endif
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+ u32 input2_hi = upper_32_bits(input2);
+ u32 input2_lo = lower_32_bits(input2);
+ u64 hv_status;
+
+ __asm__ __volatile__ (CALL_NOSPEC
+ : "=A"(hv_status),
+ "+c"(input1_lo), ASM_CALL_CONSTRAINT
+ : "A" (control), "b" (input1_hi),
+ "D"(input2_hi), "S"(input2_lo),
+ THUNK_TARGET(hv_hypercall_pg)
+ : "cc");
return hv_status;
+#endif
}
static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
@@ -223,13 +156,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return _hv_do_fast_hypercall16(control, input1, input2);
}
-static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
-{
- u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
-
- return _hv_do_fast_hypercall16(control, input1, input2);
-}
-
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -251,6 +177,8 @@ int hyperv_flush_guest_mapping_range(u64 as,
int hyperv_fill_flush_guest_mapping_list(
struct hv_guest_mapping_flush_list *flush,
u64 start_gfn, u64 end_gfn);
+void hv_sleep_notifiers_register(void);
+void hv_machine_power_off(void);
#ifdef CONFIG_X86_64
void hv_apic_init(void);
@@ -262,6 +190,8 @@ static inline void hv_apic_init(void) {}
struct irq_domain *hv_create_pci_msi_domain(void);
+int hv_map_msi_interrupt(struct irq_data *data,
+ struct hv_interrupt_entry *out_entry);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
struct hv_interrupt_entry *entry);
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
@@ -310,6 +240,15 @@ static __always_inline u64 hv_raw_get_msr(unsigned int reg)
}
int hv_apicid_to_vp_index(u32 apic_id);
+#if IS_ENABLED(CONFIG_MSHV_ROOT) && IS_ENABLED(CONFIG_CRASH_DUMP)
+void hv_root_crash_init(void);
+void hv_crash_asm32(void);
+void hv_crash_asm64(void);
+void hv_crash_asm_end(void);
+#else /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */
+static inline void hv_root_crash_init(void) {}
+#endif /* CONFIG_MSHV_ROOT && CONFIG_CRASH_DUMP */
+
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
@@ -333,13 +272,46 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; }
#endif /* CONFIG_HYPERV */
+struct mshv_vtl_cpu_context {
+ union {
+ struct {
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 cr2;
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ };
+ u64 gp_regs[16];
+ };
+
+ struct fxregs_state fx_state;
+};
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
int __init hv_vtl_early_init(void);
+void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
+void mshv_vtl_return_call_init(u64 vtl_return_offset);
+void mshv_vtl_return_hypercall(void);
+void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
#else
static inline void __init hv_vtl_init_platform(void) {}
static inline int __init hv_vtl_early_init(void) { return 0; }
+static inline void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
+static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {}
+static inline void mshv_vtl_return_hypercall(void) {}
+static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
#endif
#include <asm-generic/mshyperv.h>
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b7dded3c8113..3d0a0950d20a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -166,6 +166,10 @@
* Processor MMIO stale data
* vulnerabilities.
*/
+#define ARCH_CAP_MCU_ENUM BIT(16) /*
+ * Indicates the presence of microcode update
+ * feature enumeration and status information.
+ */
#define ARCH_CAP_FB_CLEAR BIT(17) /*
* VERW clears CPU fill buffer
* even on MDS_NO CPUs.
@@ -315,12 +319,37 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
-#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
-#define PERF_CAP_ARCH_REG BIT_ULL(7)
-#define PERF_CAP_PEBS_FORMAT 0xf00
-#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
-#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
- PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
+
+#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_FW_WRITES BIT_ULL(13)
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
+ PERF_CAP_PEBS_TIMING_INFO)
+
+/* Arch PEBS */
+#define MSR_IA32_PEBS_BASE 0x000003f4
+#define MSR_IA32_PEBS_INDEX 0x000003f5
+#define ARCH_PEBS_OFFSET_MASK 0x7fffff
+#define ARCH_PEBS_INDEX_WR_SHIFT 4
+
+#define ARCH_PEBS_RELOAD 0xffffffff
+#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
+#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
+#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
+#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
+#define ARCH_PEBS_LBR_SHIFT 40
+#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
+#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
+#define ARCH_PEBS_GPR BIT_ULL(61)
+#define ARCH_PEBS_AUX BIT_ULL(62)
+#define ARCH_PEBS_EN BIT_ULL(63)
+#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
+ ARCH_PEBS_CNTR_METRICS)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
@@ -419,6 +448,7 @@
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
#define MSR_PEBS_FRONTEND 0x000003f7
@@ -628,7 +658,13 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
+
+#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT)
+
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_TW_CFG 0xc0011023
@@ -697,8 +733,15 @@
#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
#define MSR_AMD64_SNP_SMT_PROT_BIT 17
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 19
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT 0
+#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
#define MSR_AMD64_RMP_CFG 0xc0010136
@@ -731,6 +774,12 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
+
+/* AMD Hardware Feedback Support MSRs */
+#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
+#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501
+#define MSR_AMD_WORKLOAD_HRST 0xc0000502
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
@@ -830,6 +879,7 @@
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_HWCR_IRPERF_EN_BIT 30
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
#define MSR_K7_HWCR_CPB_DIS_BIT 25
@@ -903,6 +953,10 @@
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_UCODE_WRITE 0x00000079
+
+#define MSR_IA32_MCU_ENUMERATION 0x0000007b
+#define MCU_STAGING BIT(4)
+
#define MSR_IA32_UCODE_REV 0x0000008b
/* Intel SGX Launch Enclave Public Key Hash MSRs */
@@ -1200,6 +1254,8 @@
#define MSR_IA32_VMX_VMFUNC 0x00000491
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
+#define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5
+
/* Resctrl MSRs: */
/* - Intel: */
#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -1215,6 +1271,8 @@
/* - AMD: */
#define MSR_IA32_MBA_BW_BASE 0xc0000200
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
/* AMD-V MSRs */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index c69e269937c5..76b95bd1a405 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.0+ */
/* Generic MTRR (Memory Type Range Register) ioctls.
Copyright (C) 1997-1999 Richard Gooch
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
Richard Gooch may be reached by email at rgooch@atnf.csiro.au
The postal address is:
Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index dd2b129b0418..e4815e15dc9a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -36,15 +36,11 @@ static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx)
static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx)
{
- /* "monitorx %eax, %ecx, %edx" */
- asm volatile(".byte 0x0f, 0x01, 0xfa"
- :: "a" (eax), "c" (ecx), "d"(edx));
+ asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx));
}
static __always_inline void __mwait(u32 eax, u32 ecx)
{
- mds_idle_clear_cpu_buffers();
-
/*
* Use the instruction mnemonic with implicit operands, as the LLVM
* assembler fails to assemble the mnemonic with explicit operands:
@@ -80,11 +76,9 @@ static __always_inline void __mwait(u32 eax, u32 ecx)
*/
static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
{
- /* No MDS buffer clear as this is AMD/HYGON only */
+ /* No need for TSA buffer clearing on AMD */
- /* "mwaitx %eax, %ebx, %ecx" */
- asm volatile(".byte 0x0f, 0x01, 0xfb"
- :: "a" (eax), "b" (ebx), "c" (ecx));
+ asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx));
}
/*
@@ -98,7 +92,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx)
*/
static __always_inline void __sti_mwait(u32 eax, u32 ecx)
{
- mds_idle_clear_cpu_buffers();
asm volatile("sti; mwait" :: "a" (eax), "c" (ecx));
}
@@ -115,21 +108,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx)
*/
static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
const void *addr = &current_thread_info()->flags;
alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
__monitor(addr, 0, 0);
- if (!need_resched()) {
- if (ecx & 1) {
- __mwait(eax, ecx);
- } else {
- __sti_mwait(eax, ecx);
- raw_local_irq_disable();
- }
+ if (need_resched())
+ goto out;
+
+ if (ecx & 1) {
+ __mwait(eax, ecx);
+ } else {
+ __sti_mwait(eax, ecx);
+ raw_local_irq_disable();
}
}
+
+out:
current_clr_polling();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 20d754b98f3f..4f4b5e8a1574 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -302,24 +302,32 @@
.endm
/*
- * Macro to execute VERW instruction that mitigate transient data sampling
- * attacks such as MDS. On affected systems a microcode update overloaded VERW
- * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
- *
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
-.macro CLEAR_CPU_BUFFERS
#ifdef CONFIG_X86_64
- ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+#define VERW verw x86_verw_sel(%rip)
#else
- /*
- * In 32bit mode, the memory operand must be a %cs reference. The data
- * segments may not be usable (vm86 mode), and the stack segment may not
- * be flat (ESPFIX32).
- */
- ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+/*
+ * In 32bit mode, the memory operand must be a %cs reference. The data segments
+ * may not be usable (vm86 mode), and the stack segment may not be flat (ESPFIX32).
+ */
+#define VERW verw %cs:x86_verw_sel
#endif
-.endm
+
+/*
+ * Provide a stringified VERW macro for simple usage, and a non-stringified
+ * VERW macro for use in more elaborate sequences, e.g. to encode a conditional
+ * VERW within an ALTERNATIVE.
+ */
+#define __CLEAR_CPU_BUFFERS __stringify(VERW)
+
+/* If necessary, emit VERW on exit-to-userspace to clear CPU buffers. */
+#define CLEAR_CPU_BUFFERS \
+ ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
@@ -458,7 +466,7 @@ static inline void call_depth_return_thunk(void) {}
*/
# define CALL_NOSPEC \
ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n" \
"call *%[thunk_target]\n", \
" jmp 904f;\n" \
" .align 16\n" \
@@ -474,7 +482,7 @@ static inline void call_depth_return_thunk(void) {}
"904: call 901b;\n", \
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n" \
"call *%[thunk_target]\n", \
X86_FEATURE_RETPOLINE_LFENCE)
@@ -508,6 +516,7 @@ enum spectre_v2_user_mitigation {
/* The Speculative Store Bypass disable variants */
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_AUTO,
SPEC_STORE_BYPASS_DISABLE,
SPEC_STORE_BYPASS_PRCTL,
SPEC_STORE_BYPASS_SECCOMP,
@@ -524,6 +533,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
: "memory");
}
+DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
+
static inline void indirect_branch_prediction_barrier(void)
{
asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
@@ -567,24 +578,22 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
-DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear);
-
-extern u16 mds_verw_sel;
+extern u16 x86_verw_sel;
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static __always_inline void mds_clear_cpu_buffers(void)
+static __always_inline void x86_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -601,14 +610,15 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static __always_inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void x86_idle_clear_cpu_buffers(void)
{
- if (static_branch_likely(&mds_idle_clear))
- mds_clear_cpu_buffers();
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
}
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 015d23f3e01f..2f0e47be79a4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -9,6 +9,7 @@
#include <asm/alternative.h>
#include <linux/kmsan-checks.h>
+#include <linux/mmdebug.h>
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
@@ -31,18 +32,28 @@ static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
-extern unsigned long __phys_addr_symbol(unsigned long);
#else
#define __phys_addr(x) __phys_addr_nodebug(x)
-#define __phys_addr_symbol(x) \
- ((unsigned long)(x) - __START_KERNEL_map + phys_base)
#endif
+static inline unsigned long __phys_addr_symbol(unsigned long x)
+{
+ unsigned long y = x - __START_KERNEL_map;
+
+ /* only check upper bounds since lower bounds will trigger carry */
+ VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+
+ return y + phys_base;
+}
+
#define __phys_reloc_hide(x) (x)
void clear_page_orig(void *page);
void clear_page_rep(void *page);
void clear_page_erms(void *page);
+KCFI_REFERENCE(clear_page_orig);
+KCFI_REFERENCE(clear_page_rep);
+KCFI_REFERENCE(clear_page_erms);
static inline void clear_page(void *page)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 37a8627d8277..3502939415ad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -249,7 +249,7 @@ extern struct paravirt_patch_template pv_ops;
* don't need to bother with CFI prefixes.
*/
#define PARAVIRT_CALL \
- ANNOTATE_RETPOLINE_SAFE \
+ ANNOTATE_RETPOLINE_SAFE "\n\t" \
"call *%[paravirt_opptr];"
/*
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index b0d03b6c279b..725d0eff7acd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -23,6 +23,7 @@
#else /* !__ASSEMBLY__: */
#include <linux/args.h>
+#include <linux/bits.h>
#include <linux/build_bug.h>
#include <linux/stringify.h>
#include <asm/asm.h>
@@ -309,8 +310,7 @@ do { \
\
asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
__percpu_arg([var]) \
- CC_SET(z) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[oval] "+a" (pco_old__), \
[var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
@@ -367,8 +367,7 @@ do { \
asm_inline qual ( \
ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- CC_SET(z) \
- : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ : ALT_OUTPUT_SP("=@ccz" (success), \
[var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), "+d" (old__.high)) \
: "b" (new__.low), "c" (new__.high), \
@@ -436,8 +435,7 @@ do { \
asm_inline qual ( \
ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- CC_SET(z) \
- : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ : ALT_OUTPUT_SP("=@ccz" (success), \
[var] "+m" (__my_cpu_var(_var)), \
"+a" (old__.low), "+d" (old__.high)) \
: "b" (new__.low), "c" (new__.high), \
@@ -575,9 +573,9 @@ do { \
#define x86_this_cpu_constant_test_bit(_nr, _var) \
({ \
unsigned long __percpu *addr__ = \
- (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \
+ (unsigned long __percpu *)&(_var) + BIT_WORD(_nr); \
\
- !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \
+ !!(BIT_MASK(_nr) & raw_cpu_read(*addr__)); \
})
#define x86_this_cpu_variable_test_bit(_nr, _var) \
@@ -585,8 +583,7 @@ do { \
bool oldbit; \
\
asm volatile("btl %[nr], " __percpu_arg([var]) \
- CC_SET(c) \
- : CC_OUT(c) (oldbit) \
+ : "=@ccc" (oldbit) \
: [var] "m" (__my_cpu_var(_var)), \
[nr] "rI" (_nr)); \
oldbit; \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 70d1d94aca7e..7276ba70c88a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -35,7 +35,6 @@
#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
-#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
#define INTEL_FIXED_0_KERNEL (1ULL << 0)
#define INTEL_FIXED_0_USER (1ULL << 1)
@@ -48,6 +47,11 @@
#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
+#define INTEL_FIXED_BITS_MASK \
+ (INTEL_FIXED_0_KERNEL | INTEL_FIXED_0_USER | \
+ INTEL_FIXED_0_ANYTHREAD | INTEL_FIXED_0_ENABLE_PMI | \
+ ICL_FIXED_0_ADAPTIVE)
+
#define intel_fixed_bits_by_idx(_idx, _bits) \
((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
@@ -137,16 +141,16 @@
#define ARCH_PERFMON_EVENTS_COUNT 7
#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
-#define PEBS_DATACFG_GP BIT_ULL(1)
+#define PEBS_DATACFG_GP BIT_ULL(1)
#define PEBS_DATACFG_XMMS BIT_ULL(2)
#define PEBS_DATACFG_LBRS BIT_ULL(3)
-#define PEBS_DATACFG_LBR_SHIFT 24
#define PEBS_DATACFG_CNTR BIT_ULL(4)
+#define PEBS_DATACFG_METRICS BIT_ULL(5)
+#define PEBS_DATACFG_LBR_SHIFT 24
#define PEBS_DATACFG_CNTR_SHIFT 32
#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
#define PEBS_DATACFG_FIX_SHIFT 48
#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
-#define PEBS_DATACFG_METRICS BIT_ULL(5)
/* Steal the highest bit of pebs_data_cfg for SW usage */
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
@@ -196,6 +200,8 @@ union cpuid10_edx {
#define ARCH_PERFMON_EXT_LEAF 0x00000023
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
#define ARCH_PERFMON_ACR_LEAF 0x2
+#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4
+#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5
union cpuid35_eax {
struct {
@@ -206,7 +212,10 @@ union cpuid35_eax {
unsigned int acr_subleaf:1;
/* Events Sub-Leaf */
unsigned int events_subleaf:1;
- unsigned int reserved:28;
+ /* arch-PEBS Sub-Leaves */
+ unsigned int pebs_caps_subleaf:1;
+ unsigned int pebs_cnts_subleaf:1;
+ unsigned int reserved:26;
} split;
unsigned int full;
};
@@ -428,9 +437,11 @@ static inline bool is_topdown_idx(int idx)
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
+#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
-#define GLOBAL_CTRL_EN_PERF_METRICS 48
+#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
/*
* We model guest LBR event tracing as another fixed-mode PMC like BTS.
*
@@ -499,6 +510,107 @@ struct pebs_cntr_header {
#define INTEL_CNTR_METRICS 0x3
/*
+ * Arch PEBS
+ */
+union arch_pebs_index {
+ struct {
+ u64 rsvd:4,
+ wr:23,
+ rsvd2:4,
+ full:1,
+ en:1,
+ rsvd3:3,
+ thresh:23,
+ rsvd4:5;
+ };
+ u64 whole;
+};
+
+struct arch_pebs_header {
+ union {
+ u64 format;
+ struct {
+ u64 size:16, /* Record size */
+ rsvd:14,
+ mode:1, /* 64BIT_MODE */
+ cont:1,
+ rsvd2:3,
+ cntr:5,
+ lbr:2,
+ rsvd3:7,
+ xmm:1,
+ ymmh:1,
+ rsvd4:2,
+ opmask:1,
+ zmmh:1,
+ h16zmm:1,
+ rsvd5:5,
+ gpr:1,
+ aux:1,
+ basic:1;
+ };
+ };
+ u64 rsvd6;
+};
+
+struct arch_pebs_basic {
+ u64 ip;
+ u64 applicable_counters;
+ u64 tsc;
+ u64 retire :16, /* Retire Latency */
+ valid :1,
+ rsvd :47;
+ u64 rsvd2;
+ u64 rsvd3;
+};
+
+struct arch_pebs_aux {
+ u64 address;
+ u64 rsvd;
+ u64 rsvd2;
+ u64 rsvd3;
+ u64 rsvd4;
+ u64 aux;
+ u64 instr_latency :16,
+ pad2 :16,
+ cache_latency :16,
+ pad3 :16;
+ u64 tsx_tuning;
+};
+
+struct arch_pebs_gprs {
+ u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+ u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
+ u64 rsvd;
+};
+
+struct arch_pebs_xer_header {
+ u64 xstate;
+ u64 rsvd;
+};
+
+#define ARCH_PEBS_LBR_NAN 0x0
+#define ARCH_PEBS_LBR_NUM_8 0x1
+#define ARCH_PEBS_LBR_NUM_16 0x2
+#define ARCH_PEBS_LBR_NUM_VAR 0x3
+#define ARCH_PEBS_BASE_LBR_ENTRIES 8
+struct arch_pebs_lbr_header {
+ u64 rsvd;
+ u64 ctl;
+ u64 depth;
+ u64 ler_from;
+ u64 ler_to;
+ u64 ler_info;
+};
+
+struct arch_pebs_cntr_header {
+ u32 cntr;
+ u32 fixed;
+ u32 metrics;
+ u32 reserved;
+};
+
+/*
* AMD Extended Performance Monitoring and Debug cpuid feature detection
*/
#define EXT_PERFMON_DEBUG_FEATURES 0x80000022
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 774430c3abff..e33df3da6980 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -301,16 +301,15 @@ static inline bool pmd_leaf(pmd_t pte)
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */
static inline int pmd_trans_huge(pmd_t pmd)
{
- return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+ return (pmd_val(pmd) & _PAGE_PSE) == _PAGE_PSE;
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static inline int pud_trans_huge(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
+ return (pud_val(pud) & _PAGE_PSE) == _PAGE_PSE;
}
#endif
@@ -320,24 +319,6 @@ static inline int has_transparent_hugepage(void)
return boot_cpu_has(X86_FEATURE_PSE);
}
-#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
-static inline int pmd_devmap(pmd_t pmd)
-{
- return !!(pmd_val(pmd) & _PAGE_DEVMAP);
-}
-
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static inline int pud_devmap(pud_t pud)
-{
- return !!(pud_val(pud) & _PAGE_DEVMAP);
-}
-#else
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-#endif
-
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
static inline bool pmd_special(pmd_t pmd)
{
@@ -361,12 +342,6 @@ static inline pud_t pud_mkspecial(pud_t pud)
return pud_set_flags(pud, _PAGE_SPECIAL);
}
#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
-
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
-}
-#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
@@ -527,11 +502,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL);
}
-static inline pte_t pte_mkdevmap(pte_t pte)
-{
- return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
@@ -603,11 +573,6 @@ static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_DIRTY);
}
-static inline pmd_t pmd_mkdevmap(pmd_t pmd)
-{
- return pmd_set_flags(pmd, _PAGE_DEVMAP);
-}
-
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_PSE);
@@ -673,11 +638,6 @@ static inline pud_t pud_mkdirty(pud_t pud)
return pud_mksaveddirty(pud);
}
-static inline pud_t pud_mkdevmap(pud_t pud)
-{
- return pud_set_flags(pud, _PAGE_DEVMAP);
-}
-
static inline pud_t pud_mkhuge(pud_t pud)
{
return pud_set_flags(pud, _PAGE_PSE);
@@ -1008,13 +968,6 @@ static inline int pte_present(pte_t a)
return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}
-#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
-static inline int pte_devmap(pte_t a)
-{
- return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
-}
-#endif
-
#define pte_accessible pte_accessible
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
@@ -1561,7 +1514,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE);
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE;
}
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 4604f924d8b8..7eb61ef6a185 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -36,6 +36,9 @@ static inline bool pgtable_l5_enabled(void)
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
+#define ARCH_PAGE_TABLE_SYNC_MASK \
+ (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b74ec5c3643b..2ec250ba467e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -34,7 +34,6 @@
#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */
-#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
#ifdef CONFIG_X86_64
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
@@ -121,11 +120,9 @@
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
-#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4)
#else
#define _PAGE_NX (_AT(pteval_t, 0))
-#define _PAGE_DEVMAP (_AT(pteval_t, 0))
#define _PAGE_SOFTW4 (_AT(pteval_t, 0))
#endif
@@ -154,7 +151,7 @@
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | \
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
- _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP)
+ _PAGE_CC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
@@ -214,9 +211,6 @@ enum page_cache_mode {
#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0)
#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0)
-#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G)
-#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G)
-
/*
* Page tables needs to have Write=1 in order for any lower PTEs to be
* writable. This includes shadow stack memory (Write=0, Dirty=1)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bde58f6510ac..a24c7805acdb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -731,6 +731,8 @@ void __noreturn stop_this_cpu(void *dummy);
void microcode_check(struct cpuinfo_x86 *prev_info);
void store_cpu_caps(struct cpuinfo_x86 *info);
+DECLARE_PER_CPU(bool, cache_state_incoherent);
+
enum l1tf_mitigations {
L1TF_MITIGATION_OFF,
L1TF_MITIGATION_AUTO,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 50f75467f73d..35d062a2e304 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -84,8 +84,8 @@ struct fred_ss {
: 4,
/* Event was incident to enclave execution */
enclave : 1,
- /* CPU was in long mode */
- lm : 1,
+ /* CPU was in 64-bit mode */
+ l : 1,
/*
* Nested exception during FRED delivery, not set
* for #DF.
@@ -187,12 +187,12 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);
-static inline unsigned long regs_return_value(struct pt_regs *regs)
+static __always_inline unsigned long regs_return_value(struct pt_regs *regs)
{
return regs->ax;
}
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->ax = rc;
}
@@ -277,34 +277,34 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
}
#endif
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-static inline unsigned long instruction_pointer(struct pt_regs *regs)
+static __always_inline unsigned long instruction_pointer(struct pt_regs *regs)
{
return regs->ip;
}
-static inline void instruction_pointer_set(struct pt_regs *regs,
- unsigned long val)
+static __always_inline
+void instruction_pointer_set(struct pt_regs *regs, unsigned long val)
{
regs->ip = val;
}
-static inline unsigned long frame_pointer(struct pt_regs *regs)
+static __always_inline unsigned long frame_pointer(struct pt_regs *regs)
{
return regs->bp;
}
-static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-static inline void user_stack_pointer_set(struct pt_regs *regs,
- unsigned long val)
+static __always_inline
+void user_stack_pointer_set(struct pt_regs *regs, unsigned long val)
{
regs->sp = val;
}
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index f607081a022a..e406a1e92c63 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -78,7 +78,7 @@ extern unsigned char secondary_startup_64[];
extern unsigned char secondary_startup_64_no_verify[];
#endif
-static inline size_t real_mode_size_needed(void)
+static __always_inline size_t real_mode_size_needed(void)
{
if (real_mode_header)
return 0; /* already allocated. */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index feb93b50e990..575f8408a9e7 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -44,7 +44,6 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
-extern unsigned int rdt_mon_features;
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
@@ -84,21 +83,6 @@ static inline void resctrl_arch_disable_mon(void)
static_branch_dec_cpuslocked(&rdt_enable_key);
}
-static inline bool resctrl_arch_is_llc_occupancy_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_total_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
-}
-
-static inline bool resctrl_arch_is_mbm_local_enabled(void)
-{
- return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
-}
-
/*
* __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 3821ee3fae35..54c8fc430684 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -6,37 +6,15 @@
#define __CLOBBERS_MEM(clb...) "memory", ## clb
-#ifndef __GCC_ASM_FLAG_OUTPUTS__
-
-/* Use asm goto */
-
-#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
-({ \
- bool c = false; \
- asm goto (fullop "; j" #cc " %l[cc_label]" \
- : : [var] "m" (_var), ## __VA_ARGS__ \
- : clobbers : cc_label); \
- if (0) { \
-cc_label: c = true; \
- } \
- c; \
-})
-
-#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */
-
-/* Use flags output or a set instruction */
-
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c; \
- asm_inline volatile (fullop CC_SET(cc) \
- : [var] "+m" (_var), CC_OUT(cc) (c) \
+ asm_inline volatile (fullop \
+ : [var] "+m" (_var), "=@cc" #cc (c) \
: __VA_ARGS__ : clobbers); \
c; \
})
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */
-
#define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \
__GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index 8d983cfd06ea..e5a13dc8816e 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -2,6 +2,10 @@
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
+#ifdef MODULE
+ #error "Cannot use runtime-const infrastructure from modules"
+#endif
+
#ifdef __ASSEMBLY__
.macro RUNTIME_CONST_PTR sym reg
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 77d8f49b92bd..f59ae7186940 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -244,7 +244,7 @@ static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node)
static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
{
- unsigned int p;
+ unsigned long p;
/*
* Load CPU and node number from the GDT. LSL is faster than RDTSCP
@@ -254,10 +254,10 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[seg],%[p]",
- ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+ alternative_io ("lsl %[seg],%k[p]",
+ "rdpid %[p]",
X86_FEATURE_RDPID,
- [p] "=a" (p), [seg] "r" (__CPUNODE_SEG));
+ [p] "=r" (p), [seg] "r" (__CPUNODE_SEG));
if (cpu)
*cpu = (p & VDSO_CPUNODE_MASK);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 692af46603a1..914eb32581c7 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -53,6 +53,7 @@ extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
extern void startup_64_setup_gdt_idt(void);
extern void startup_64_load_idt(void *vc_handler);
+extern void __pi_startup_64_load_idt(void *vc_handler);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 0020d77a0800..01a6e4dbe423 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -208,6 +208,7 @@ struct snp_psc_desc {
#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */
#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */
+#define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
index 3dfd306d1c9e..c58c47c68ab6 100644
--- a/arch/x86/include/asm/sev-internal.h
+++ b/arch/x86/include/asm/sev-internal.h
@@ -2,7 +2,6 @@
#define DR7_RESET_VALUE 0x400
-extern struct ghcb boot_ghcb_page;
extern u64 sev_hv_features;
extern u64 sev_secrets_pa;
@@ -56,31 +55,15 @@ DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
- unsigned long npages, enum psc_op op);
+ unsigned long npages, const struct psc_desc *desc);
DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
DECLARE_PER_CPU(u64, svsm_caa_pa);
-extern struct svsm_ca *boot_svsm_caa;
extern u64 boot_svsm_caa_pa;
-static __always_inline struct svsm_ca *svsm_get_caa(void)
-{
- if (sev_cfg.use_cas)
- return this_cpu_read(svsm_caa);
- else
- return boot_svsm_caa;
-}
-
-static __always_inline u64 svsm_get_caa_pa(void)
-{
- if (sev_cfg.use_cas)
- return this_cpu_read(svsm_caa_pa);
- else
- return boot_svsm_caa_pa;
-}
-
-int svsm_perform_call_protocol(struct svsm_call *call);
+enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt);
+void vc_forward_exception(struct es_em_ctxt *ctxt);
static inline u64 sev_es_rd_ghcb_msr(void)
{
@@ -97,9 +80,8 @@ static __always_inline void sev_es_wr_ghcb_msr(u64 val)
native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
}
-void snp_register_ghcb_early(unsigned long paddr);
-bool sev_es_negotiate_protocol(void);
-bool sev_es_check_cpu_features(void);
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write);
+
u64 get_hv_features(void);
const struct snp_cpuid_table *snp_cpuid_get_table(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 58e028d42e41..0e6c0940100f 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -223,6 +223,18 @@ struct snp_tsc_info_resp {
u8 rsvd2[100];
} __packed;
+/*
+ * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with
+ * TSC_FACTOR as documented in the SNP Firmware ABI specification:
+ *
+ * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001))
+ *
+ * which is equivalent to:
+ *
+ * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000;
+ */
+#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000)
+
struct snp_guest_req {
void *req_buf;
size_t req_sz;
@@ -231,6 +243,7 @@ struct snp_guest_req {
size_t resp_sz;
u64 exit_code;
+ u64 exitinfo2;
unsigned int vmpck_id;
u8 msg_version;
u8 msg_type;
@@ -282,8 +295,11 @@ struct snp_secrets_page {
u8 svsm_guest_vmpl;
u8 rsvd3[3];
+ /* The percentage decrease from nominal to mean TSC frequency. */
+ u32 tsc_factor;
+
/* Remainder of page */
- u8 rsvd4[3744];
+ u8 rsvd4[3740];
} __packed;
struct snp_msg_desc {
@@ -445,7 +461,7 @@ static __always_inline void sev_es_nmi_complete(void)
cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
__sev_es_nmi_complete();
}
-extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+extern int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd);
extern void sev_enable(struct boot_params *bp);
/*
@@ -475,8 +491,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
/* "pvalidate" mnemonic support in binutils 2.36 and newer */
asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t"
- CC_SET(c)
- : CC_OUT(c) (no_rmpupdate), "=a"(rc)
+ : "=@ccc"(no_rmpupdate), "=a"(rc)
: "a"(vaddr), "c"(rmp_psize), "d"(validate)
: "memory", "cc");
@@ -486,9 +501,8 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
return rc;
}
-struct snp_guest_request_ioctl;
-
void setup_ghcb(void);
+void snp_register_ghcb_early(unsigned long paddr);
void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
@@ -497,14 +511,12 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
-void __noreturn snp_abort(void);
void snp_dmi_setup(void);
int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
-void snp_update_svsm_ca(void);
int prepare_pte_enc(struct pte_enc_desc *d);
void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
void snp_kexec_finish(void);
@@ -513,13 +525,16 @@ void snp_kexec_begin(void);
int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id);
struct snp_msg_desc *snp_msg_alloc(void);
void snp_msg_free(struct snp_msg_desc *mdesc);
-int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
- struct snp_guest_request_ioctl *rio);
+int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req);
int snp_svsm_vtpm_send_command(u8 *buffer);
void __init snp_secure_tsc_prepare(void);
void __init snp_secure_tsc_init(void);
+enum es_result savic_register_gpa(u64 gpa);
+enum es_result savic_unregister_gpa(u64 *gpa);
+u64 savic_ghcb_msr_read(u32 reg);
+void savic_ghcb_msr_write(u32 reg, u64 value);
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
@@ -527,8 +542,6 @@ static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
__builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
-void vc_forward_exception(struct es_em_ctxt *ctxt);
-
/* I/O parameters for CPUID-related helpers */
struct cpuid_leaf {
u32 fn;
@@ -539,7 +552,13 @@ struct cpuid_leaf {
u32 edx;
};
-int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf);
+int svsm_perform_msr_protocol(struct svsm_call *call);
+int __pi_svsm_perform_msr_protocol(struct svsm_call *call);
+int snp_cpuid(void (*cpuid_fn)(void *ctx, struct cpuid_leaf *leaf),
+ void *ctx, struct cpuid_leaf *leaf);
+
+void svsm_issue_call(struct svsm_call *call, u8 *pending);
+int svsm_process_result_codes(struct svsm_call *call);
void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
@@ -547,7 +566,36 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
u64 exit_code, u64 exit_info_1,
u64 exit_info_2);
+bool sev_es_negotiate_protocol(void);
+bool sev_es_check_cpu_features(void);
+
+extern u16 ghcb_version;
extern struct ghcb *boot_ghcb;
+extern bool sev_snp_needs_sfw;
+
+struct psc_desc {
+ enum psc_op op;
+ struct svsm_ca *ca;
+ u64 caa_pa;
+};
+
+static inline void sev_evict_cache(void *va, int npages)
+{
+ volatile u8 val __always_unused;
+ u8 *bytes = va;
+ int page_idx;
+
+ /*
+ * For SEV guests, a read from the first/last cache-lines of a 4K page
+ * using the guest key is sufficient to cause a flush of all cache-lines
+ * associated with that 4K page without incurring all the overhead of a
+ * full CLFLUSH sequence.
+ */
+ for (page_idx = 0; page_idx < npages; page_idx++) {
+ val = bytes[page_idx * PAGE_SIZE];
+ val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1];
+ }
+}
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -556,7 +604,7 @@ static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { }
-static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline int sev_es_efi_map_ghcbs_cas(pgd_t *pgd) { return 0; }
static inline void sev_enable(struct boot_params *bp) { }
static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
@@ -569,7 +617,6 @@ static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npag
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
-static inline void snp_abort(void) { }
static inline void snp_dmi_setup(void) { }
static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
{
@@ -579,7 +626,6 @@ static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
-static inline void snp_update_svsm_ca(void) { }
static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
static inline void snp_kexec_finish(void) { }
@@ -587,11 +633,16 @@ static inline void snp_kexec_begin(void) { }
static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; }
static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; }
static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
-static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
- struct snp_guest_request_ioctl *rio) { return -ENODEV; }
+static inline int snp_send_guest_request(struct snp_msg_desc *mdesc,
+ struct snp_guest_req *req) { return -ENODEV; }
static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
static inline void __init snp_secure_tsc_prepare(void) { }
static inline void __init snp_secure_tsc_init(void) { }
+static inline void sev_evict_cache(void *va, int npages) {}
+static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; }
+static inline void savic_ghcb_msr_write(u32 reg, u64 value) { }
+static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
@@ -603,9 +654,13 @@ void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
-void snp_leak_pages(u64 pfn, unsigned int npages);
+void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp);
void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
+static inline void snp_leak_pages(u64 pfn, unsigned int pages)
+{
+ __snp_leak_pages(pfn, pages, true);
+}
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_rmptable_init(void) { return -ENOSYS; }
@@ -618,6 +673,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
return -ENODEV;
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
+static inline void __snp_leak_pages(u64 pfn, unsigned int npages, bool dump_rmp) {}
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
static inline void snp_fixup_e820_tables(void) {}
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 6a0069761508..04958459a7ca 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/**
+/*
* Copyright(c) 2016-20 Intel Corporation.
*
* Intel Software Guard Extensions (SGX) support.
@@ -28,21 +28,22 @@
#define SGX_CPUID_EPC_MASK GENMASK(3, 0)
enum sgx_encls_function {
- ECREATE = 0x00,
- EADD = 0x01,
- EINIT = 0x02,
- EREMOVE = 0x03,
- EDGBRD = 0x04,
- EDGBWR = 0x05,
- EEXTEND = 0x06,
- ELDU = 0x08,
- EBLOCK = 0x09,
- EPA = 0x0A,
- EWB = 0x0B,
- ETRACK = 0x0C,
- EAUG = 0x0D,
- EMODPR = 0x0E,
- EMODT = 0x0F,
+ ECREATE = 0x00,
+ EADD = 0x01,
+ EINIT = 0x02,
+ EREMOVE = 0x03,
+ EDGBRD = 0x04,
+ EDGBWR = 0x05,
+ EEXTEND = 0x06,
+ ELDU = 0x08,
+ EBLOCK = 0x09,
+ EPA = 0x0A,
+ EWB = 0x0B,
+ ETRACK = 0x0C,
+ EAUG = 0x0D,
+ EMODPR = 0x0E,
+ EMODT = 0x0F,
+ EUPDATESVN = 0x18,
};
/**
@@ -65,15 +66,19 @@ enum sgx_encls_function {
/**
* enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
- * %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function.
- * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
+ * @SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function.
+ * @SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
* been completed yet.
- * %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
- * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
+ * @SGX_CHILD_PRESENT: SECS has child pages present in the EPC.
+ * @SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
* public key does not match IA32_SGXLEPUBKEYHASH.
- * %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it
+ * @SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it
* is in the PENDING or MODIFIED state.
- * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
+ * @SGX_INSUFFICIENT_ENTROPY: Insufficient entropy in RNG.
+ * @SGX_NO_UPDATE: EUPDATESVN could not update the CPUSVN because the
+ * current SVN was not newer than CPUSVN. This is the most
+ * common error code returned by EUPDATESVN.
+ * @SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
*/
enum sgx_return_code {
SGX_EPC_PAGE_CONFLICT = 7,
@@ -81,6 +86,8 @@ enum sgx_return_code {
SGX_CHILD_PRESENT = 13,
SGX_INVALID_EINITTOKEN = 16,
SGX_PAGE_NOT_MODIFIABLE = 20,
+ SGX_INSUFFICIENT_ENTROPY = 29,
+ SGX_NO_UPDATE = 31,
SGX_UNMASKED_EVENT = 128,
};
@@ -89,7 +96,7 @@ enum sgx_return_code {
/**
* enum sgx_miscselect - additional information to an SSA frame
- * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame.
+ * @SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame.
*
* Save State Area (SSA) is a stack inside the enclave used to store processor
* state when an exception or interrupt occurs. This enum defines additional
@@ -105,17 +112,17 @@ enum sgx_miscselect {
#define SGX_SSA_MISC_EXINFO_SIZE 16
/**
- * enum sgx_attributes - the attributes field in &struct sgx_secs
- * %SGX_ATTR_INIT: Enclave can be entered (is initialized).
- * %SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR).
- * %SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave.
- * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote
+ * enum sgx_attribute - the attributes field in &struct sgx_secs
+ * @SGX_ATTR_INIT: Enclave can be entered (is initialized).
+ * @SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR).
+ * @SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave.
+ * @SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote
* attestation.
- * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS).
- * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to
+ * @SGX_ATTR_KSS: Allow to use key separation and sharing (KSS).
+ * @SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to
* sign cryptographic tokens that can be passed to
* EINIT as an authorization to run an enclave.
- * %SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an
+ * @SGX_ATTR_ASYNC_EXIT_NOTIFY: Allow enclaves to be notified after an
* asynchronous exit has occurred.
*/
enum sgx_attribute {
@@ -188,7 +195,7 @@ struct sgx_secs {
/**
* enum sgx_tcs_flags - execution flags for TCS
- * %SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints
+ * @SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints
* inside an enclave. It is cleared by EADD but can
* be set later with EDBGWR.
*/
@@ -253,11 +260,11 @@ struct sgx_pageinfo {
/**
* enum sgx_page_type - bits in the SECINFO flags defining the page type
- * %SGX_PAGE_TYPE_SECS: a SECS page
- * %SGX_PAGE_TYPE_TCS: a TCS page
- * %SGX_PAGE_TYPE_REG: a regular page
- * %SGX_PAGE_TYPE_VA: a VA page
- * %SGX_PAGE_TYPE_TRIM: a page in trimmed state
+ * @SGX_PAGE_TYPE_SECS: a SECS page
+ * @SGX_PAGE_TYPE_TCS: a TCS page
+ * @SGX_PAGE_TYPE_REG: a regular page
+ * @SGX_PAGE_TYPE_VA: a VA page
+ * @SGX_PAGE_TYPE_TRIM: a page in trimmed state
*
* Make sure when making changes to this enum that its values can still fit
* in the bitfield within &struct sgx_encl_page
@@ -275,14 +282,14 @@ enum sgx_page_type {
/**
* enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo
- * %SGX_SECINFO_R: allow read
- * %SGX_SECINFO_W: allow write
- * %SGX_SECINFO_X: allow execution
- * %SGX_SECINFO_SECS: a SECS page
- * %SGX_SECINFO_TCS: a TCS page
- * %SGX_SECINFO_REG: a regular page
- * %SGX_SECINFO_VA: a VA page
- * %SGX_SECINFO_TRIM: a page in trimmed state
+ * @SGX_SECINFO_R: allow read
+ * @SGX_SECINFO_W: allow write
+ * @SGX_SECINFO_X: allow execution
+ * @SGX_SECINFO_SECS: a SECS page
+ * @SGX_SECINFO_TCS: a TCS page
+ * @SGX_SECINFO_REG: a regular page
+ * @SGX_SECINFO_VA: a VA page
+ * @SGX_SECINFO_TRIM: a page in trimmed state
*/
enum sgx_secinfo_flags {
SGX_SECINFO_R = BIT(0),
diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h
index 1e6ec10b3a15..a20b1c08c99f 100644
--- a/arch/x86/include/asm/shared/msr.h
+++ b/arch/x86/include/asm/shared/msr.h
@@ -12,4 +12,19 @@ struct msr {
};
};
+/*
+ * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the
+ * boot kernel since they rely on tracepoint/exception handling infrastructure
+ * that's not available here.
+ */
+static inline void raw_rdmsr(unsigned int reg, struct msr *m)
+{
+ asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg));
+}
+
+static inline void raw_wrmsr(unsigned int reg, const struct msr *m)
+{
+ asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory");
+}
+
#endif /* _ASM_X86_SHARED_MSR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 2f3820342598..8bc074c8d7c6 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -72,6 +72,7 @@
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_GET_QUOTE 0x10002
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
/*
* TDG.VP.VMCALL Status Codes (returned in R10)
@@ -80,6 +81,7 @@
#define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL
#define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL
#define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL
+#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL
/*
* Bitmasks of exposed registers (with VMM).
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index ba6f2fe43848..fc7dcec58fd4 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -16,25 +16,29 @@ struct thread_shstk {
long shstk_prctl(struct task_struct *task, int option, unsigned long arg2);
void reset_thread_features(void);
-unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags,
+unsigned long shstk_alloc_thread_stack(struct task_struct *p, u64 clone_flags,
unsigned long stack_size);
void shstk_free(struct task_struct *p);
int setup_signal_shadow_stack(struct ksignal *ksig);
int restore_signal_shadow_stack(void);
int shstk_update_last_frame(unsigned long val);
bool shstk_is_enabled(void);
+int shstk_pop(u64 *val);
+int shstk_push(u64 val);
#else
static inline long shstk_prctl(struct task_struct *task, int option,
unsigned long arg2) { return -EINVAL; }
static inline void reset_thread_features(void) {}
static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
- unsigned long clone_flags,
+ u64 clone_flags,
unsigned long stack_size) { return 0; }
static inline void shstk_free(struct task_struct *p) {}
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
static inline int restore_signal_shadow_stack(void) { return 0; }
static inline int shstk_update_last_frame(unsigned long val) { return 0; }
static inline bool shstk_is_enabled(void) { return false; }
+static inline int shstk_pop(u64 *val) { return -ENOTSUPP; }
+static inline int shstk_push(u64 val) { return -ENOTSUPP; }
#endif /* CONFIG_X86_USER_SHADOW_STACK */
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..8727c7e21dd1 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent immediate repeat of single step trap on return from SIGTRAP
+ * handler if the trap flag (TF) is set without an external debugger attached,
+ * clear the software event flag in the augmented SS, ensuring no single-step
+ * trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original
+ * state is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+#ifdef CONFIG_X86_FRED
+ if (!(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+#endif
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index c72d46175374..5c03aaa89014 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -83,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
static inline int __gen_sigismember(sigset_t *set, int _sig)
{
bool ret;
- asm("btl %2,%1" CC_SET(c)
- : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1));
+ asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1));
return ret;
}
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 4f84d421d1cf..977bef14a0ab 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -23,24 +23,61 @@
#else /* __ASSEMBLER__ */
+/*
+ * The CLAC/STAC instructions toggle the enforcement of
+ * X86_FEATURE_SMAP along with X86_FEATURE_LASS.
+ *
+ * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page
+ * tables. The kernel is not allowed to touch pages with that bit set
+ * unless the AC bit is set.
+ *
+ * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings,
+ * regardless of location.
+ *
+ * Note: a barrier is implicit in alternative().
+ */
+
static __always_inline void clac(void)
{
- /* Note: a barrier is implicit in alternative() */
alternative("", "clac", X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
- /* Note: a barrier is implicit in alternative() */
alternative("", "stac", X86_FEATURE_SMAP);
}
+/*
+ * LASS enforcement is based on bit 63 of the virtual address. The
+ * kernel is not allowed to touch memory in the lower half of the
+ * virtual address space.
+ *
+ * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data
+ * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP.
+ *
+ * Even with the AC bit set, LASS will continue to block instruction
+ * fetches from the user half of the address space. To allow those,
+ * clear CR4.LASS to disable the LASS mechanism entirely.
+ *
+ * Note: a barrier is implicit in alternative().
+ */
+
+static __always_inline void lass_clac(void)
+{
+ alternative("", "clac", X86_FEATURE_LASS);
+}
+
+static __always_inline void lass_stac(void)
+{
+ alternative("", "stac", X86_FEATURE_LASS);
+}
+
static __always_inline unsigned long smap_save(void)
{
unsigned long flags;
asm volatile ("# smap_save\n\t"
- ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
+ ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t"
"", "pushf; pop %0; clac",
X86_FEATURE_SMAP)
: "=rm" (flags) : : "memory", "cc");
@@ -51,7 +88,7 @@ static __always_inline unsigned long smap_save(void)
static __always_inline void smap_restore(unsigned long flags)
{
asm volatile ("# smap_restore\n\t"
- ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
+ ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "\n\t"
"", "push %0; popf",
X86_FEATURE_SMAP)
: : "g" (flags) : "memory", "cc");
@@ -64,9 +101,9 @@ static __always_inline void smap_restore(unsigned long flags)
ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
#define ASM_CLAC_UNSAFE \
- ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP)
+ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "clac", X86_FEATURE_SMAP)
#define ASM_STAC_UNSAFE \
- ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "stac", X86_FEATURE_SMAP)
+ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "\n\t" "stac", X86_FEATURE_SMAP)
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0c1c68039d6f..84951572ab81 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -109,10 +109,13 @@ int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
int native_cpu_disable(void);
void __noreturn hlt_play_dead(void);
-void native_play_dead(void);
+void __noreturn native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
-int wbinvd_on_all_cpus(void);
+void wbinvd_on_all_cpus(void);
+void wbinvd_on_cpus_mask(struct cpumask *cpus);
+void wbnoinvd_on_all_cpus(void);
+void wbnoinvd_on_cpus_mask(struct cpumask *cpus);
void smp_kick_mwait_play_dead(void);
void __noreturn mwait_play_dead(unsigned int eax_hint);
@@ -148,10 +151,24 @@ static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
-static inline int wbinvd_on_all_cpus(void)
+static inline void wbinvd_on_all_cpus(void)
{
wbinvd();
- return 0;
+}
+
+static inline void wbinvd_on_cpus_mask(struct cpumask *cpus)
+{
+ wbinvd();
+}
+
+static inline void wbnoinvd_on_all_cpus(void)
+{
+ wbnoinvd();
+}
+
+static inline void wbnoinvd_on_cpus_mask(struct cpumask *cpus)
+{
+ wbnoinvd();
}
static inline struct cpumask *cpu_llc_shared_mask(int cpu)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index ecda17efa042..46aa2c9c1bda 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -75,9 +75,7 @@ static inline u32 rdpkru(void)
* "rdpkru" instruction. Places PKRU contents in to EAX,
* clears EDX and requires that ecx=0.
*/
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (pkru), "=d" (edx)
- : "c" (ecx));
+ asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx));
return pkru;
}
@@ -89,8 +87,7 @@ static inline void wrpkru(u32 pkru)
* "wrpkru" instruction. Loads contents in EAX to PKRU,
* requires that ecx = edx = 0.
*/
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (pkru), "c"(ecx), "d"(edx));
+ asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx));
}
#else
@@ -104,9 +101,36 @@ static inline void wrpkru(u32 pkru)
}
#endif
+/*
+ * Write back all modified lines in all levels of cache associated with this
+ * logical processor to main memory, and then invalidate all caches. Depending
+ * on the micro-architecture, WBINVD (and WBNOINVD below) may or may not affect
+ * lower level caches associated with another logical processor that shares any
+ * level of this processor's cache hierarchy.
+ */
static __always_inline void wbinvd(void)
{
- asm volatile("wbinvd": : :"memory");
+ asm volatile("wbinvd" : : : "memory");
+}
+
+/* Instruction encoding provided for binutils backwards compatibility. */
+#define ASM_WBNOINVD _ASM_BYTES(0xf3,0x0f,0x09)
+
+/*
+ * Write back all modified lines in all levels of cache associated with this
+ * logical processor to main memory, but do NOT explicitly invalidate caches,
+ * i.e. leave all/most cache lines in the hierarchy in non-modified state.
+ */
+static __always_inline void wbnoinvd(void)
+{
+ /*
+ * Explicitly encode WBINVD if X86_FEATURE_WBNOINVD is unavailable even
+ * though WBNOINVD is backwards compatible (it's simply WBINVD with an
+ * ignored REP prefix), to guarantee that WBNOINVD isn't used if it
+ * needs to be avoided for any reason. For all supported usage in the
+ * kernel, WBINVD is functionally a superset of WBNOINVD.
+ */
+ alternative("wbinvd", ASM_WBNOINVD, X86_FEATURE_WBNOINVD);
}
static inline unsigned long __read_cr4(void)
@@ -260,8 +284,7 @@ static inline int enqcmds(void __iomem *dst, const void *src)
* See movdir64b()'s comment on operand specification.
*/
asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
- CC_SET(z)
- : CC_OUT(z) (zf), "+m" (*__dst)
+ : "=@ccz" (zf), "+m" (*__dst)
: "m" (*__src), "a" (__dst), "d" (__src));
/* Submission failure is indicated via EFLAGS.ZF=1 */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 41502bd2afd6..4cd725a8fe91 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -36,7 +36,7 @@
".align 4 \n" \
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
- ANNOTATE_NOENDBR \
+ ANNOTATE_NOENDBR " \n" \
insns " \n" \
".byte 0x0f, 0xb9, 0xcc \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index c3c2c1914d65..9cb5aae7fba9 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,6 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_STRING_H
+#define _ASM_X86_STRING_H
+
#ifdef CONFIG_X86_32
# include <asm/string_32.h>
#else
# include <asm/string_64.h>
#endif
+
+static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len)
+{
+ void *ret = to;
+
+ asm volatile("rep movsb"
+ : "+D" (to), "+S" (from), "+c" (len)
+ : : "memory");
+ return ret;
+}
+
+static __always_inline void *__inline_memset(void *s, int v, size_t n)
+{
+ void *ret = s;
+
+ asm volatile("rep stosb"
+ : "+D" (s), "+c" (n)
+ : "a" ((uint8_t)v)
+ : "memory");
+ return ret;
+}
+
+#endif /* _ASM_X86_STRING_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 79e9695dc13e..4635616863f5 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -31,7 +31,7 @@ KCFI_REFERENCE(__memset);
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
- const __auto_type s0 = s;
+ const auto s0 = s;
asm volatile (
"rep stosw"
: "+D" (s), "+c" (n)
@@ -44,7 +44,7 @@ static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
#define __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
- const __auto_type s0 = s;
+ const auto s0 = s;
asm volatile (
"rep stosl"
: "+D" (s), "+c" (n)
@@ -57,7 +57,7 @@ static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
#define __HAVE_ARCH_MEMSET64
static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
{
- const __auto_type s0 = s;
+ const auto s0 = s;
asm volatile (
"rep stosq"
: "+D" (s), "+c" (n)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index ad954a1a6656..56aa99503dc4 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -252,16 +252,21 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+/*
+ * GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
+ * tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
+ * GA log interrupts to wake the vCPU (because it's blocking or about to block).
+ */
+#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61)
+
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
-#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12)
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL)
#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
-#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-
#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
@@ -274,7 +279,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
-#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0)
/*
* For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
@@ -284,18 +289,20 @@ enum avic_ipi_failure_cause {
/*
* For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
+ * With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095).
*/
#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+#define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
-
-#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID);
#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
+#define SVM_SEV_FEAT_SECURE_TSC BIT(9)
#define VMCB_ALLOWED_SEV_FEATURES_VALID BIT_ULL(63)
@@ -697,5 +704,6 @@ DEFINE_GHCB_ACCESSORS(sw_exit_info_1)
DEFINE_GHCB_ACCESSORS(sw_exit_info_2)
DEFINE_GHCB_ACCESSORS(sw_scratch)
DEFINE_GHCB_ACCESSORS(xcr0)
+DEFINE_GHCB_ACCESSORS(xss)
#endif
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 8b19294600c4..6b338d7f01b7 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -102,18 +102,41 @@ u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
void tdx_init(void);
+#include <linux/preempt.h>
#include <asm/archrandom.h>
+#include <asm/processor.h>
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
-static inline u64 sc_retry(sc_func_t func, u64 fn,
+static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
+ struct tdx_module_args *args)
+{
+ lockdep_assert_preemption_disabled();
+
+ /*
+ * SEAMCALLs are made to the TDX module and can generate dirty
+ * cachelines of TDX private memory. Mark cache state incoherent
+ * so that the cache can be flushed during kexec.
+ *
+ * This needs to be done before actually making the SEAMCALL,
+ * because kexec-ing CPU could send NMI to stop remote CPUs,
+ * in which case even disabling IRQ won't help here.
+ */
+ this_cpu_write(cache_state_incoherent, true);
+
+ return func(fn, args);
+}
+
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
struct tdx_module_args *args)
{
int retry = RDRAND_RETRY_LOOPS;
u64 ret;
do {
- ret = func(fn, args);
+ preempt_disable();
+ ret = __seamcall_dirty_cache(func, fn, args);
+ preempt_enable();
} while (ret == TDX_RND_NO_ENTROPY && --retry);
return ret;
@@ -131,6 +154,8 @@ int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
+void tdx_quirk_reset_page(struct page *page);
+
struct tdx_td {
/* TD root structure: */
struct page *tdr_page;
@@ -146,6 +171,8 @@ struct tdx_td {
struct tdx_vp {
/* TDVP root page */
struct page *tdvpr_page;
+ /* precalculated page_to_phys(tdvpr_page) for use in noinstr code */
+ phys_addr_t tdvpr_pa;
/* TD vCPU control structure: */
struct page **tdcx_pages;
@@ -203,5 +230,11 @@ static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
#endif /* CONFIG_INTEL_TDX_HOST */
+#ifdef CONFIG_KEXEC_CORE
+void tdx_cpu_flush_cache_for_kexec(void);
+#else
+static inline void tdx_cpu_flush_cache_for_kexec(void) { }
+#endif
+
#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5337f1be18f6..f2d142a0a862 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -178,9 +178,9 @@ void int3_emulate_ret(struct pt_regs *regs)
}
static __always_inline
-void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+bool __emulate_cc(unsigned long flags, u8 cc)
{
- static const unsigned long jcc_mask[6] = {
+ static const unsigned long cc_mask[6] = {
[0] = X86_EFLAGS_OF,
[1] = X86_EFLAGS_CF,
[2] = X86_EFLAGS_ZF,
@@ -193,15 +193,21 @@ void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned lo
bool match;
if (cc < 0xc) {
- match = regs->flags & jcc_mask[cc >> 1];
+ match = flags & cc_mask[cc >> 1];
} else {
- match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
- ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
if (cc >= 0xe)
- match = match || (regs->flags & X86_EFLAGS_ZF);
+ match = match || (flags & X86_EFLAGS_ZF);
}
- if ((match && !invert) || (!match && invert))
+ return (match && !invert) || (!match && invert);
+}
+
+static __always_inline
+void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+{
+ if (__emulate_cc(regs->flags, cc))
ip += disp;
int3_emulate_jmp(regs, ip);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 9282465eea21..e71e0e8362ed 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -80,56 +80,42 @@ struct thread_info {
#endif
/*
- * thread information flags
- * - these are process state flags that various assembly files
- * may need to access
+ * Tell the generic TIF infrastructure which bits x86 supports
*/
-#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
-#define TIF_SIGPENDING 2 /* signal pending */
-#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
-#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
-#define TIF_SSBD 6 /* Speculative store bypass disable */
-#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
-#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
-#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
-#define TIF_UPROBE 12 /* breakpointed or singlestepping */
-#define TIF_PATCH_PENDING 13 /* pending live patching update */
-#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
-#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
-#define TIF_NOTSC 16 /* TSC is not accessible in userland */
-#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
-#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
+#define HAVE_TIF_NEED_RESCHED_LAZY
+#define HAVE_TIF_POLLING_NRFLAG
+#define HAVE_TIF_SINGLESTEP
+
+#include <asm-generic/thread_info_tif.h>
+
+/* Architecture specific TIF space starts at 16 */
+#define TIF_SSBD 16 /* Speculative store bypass disable */
+#define TIF_SPEC_IB 17 /* Indirect branch speculation mitigation */
+#define TIF_SPEC_L1D_FLUSH 18 /* Flush L1D on mm switches (processes) */
+#define TIF_NEED_FPU_LOAD 19 /* load FPU on return to userspace */
+#define TIF_NOCPUID 20 /* CPUID is not accessible in userland */
+#define TIF_NOTSC 21 /* TSC is not accessible in userland */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_SPEC_FORCE_UPDATE 23 /* Force speculation MSR update in context switch */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
-#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
+#define TIF_SINGLESTEP 25 /* reenable singlestep on user return*/
+#define TIF_BLOCKSTEP 26 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
-#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
-
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
-#define _TIF_SSBD (1 << TIF_SSBD)
-#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
-#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
-#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
-#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
-#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
-#define _TIF_NOCPUID (1 << TIF_NOCPUID)
-#define _TIF_NOTSC (1 << TIF_NOTSC)
-#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
-#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
-#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
-#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
-#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
-#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
-#define _TIF_ADDR32 (1 << TIF_ADDR32)
+#define TIF_ADDR32 28 /* 32-bit address space on 64 bits */
+
+#define _TIF_SSBD BIT(TIF_SSBD)
+#define _TIF_SPEC_IB BIT(TIF_SPEC_IB)
+#define _TIF_SPEC_L1D_FLUSH BIT(TIF_SPEC_L1D_FLUSH)
+#define _TIF_NEED_FPU_LOAD BIT(TIF_NEED_FPU_LOAD)
+#define _TIF_NOCPUID BIT(TIF_NOCPUID)
+#define _TIF_NOTSC BIT(TIF_NOTSC)
+#define _TIF_IO_BITMAP BIT(TIF_IO_BITMAP)
+#define _TIF_SPEC_FORCE_UPDATE BIT(TIF_SPEC_FORCE_UPDATE)
+#define _TIF_FORCED_TF BIT(TIF_FORCED_TF)
+#define _TIF_BLOCKSTEP BIT(TIF_BLOCKSTEP)
+#define _TIF_SINGLESTEP BIT(TIF_SINGLESTEP)
+#define _TIF_LAZY_MMU_UPDATES BIT(TIF_LAZY_MMU_UPDATES)
+#define _TIF_ADDR32 BIT(TIF_ADDR32)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index e9b81876ebe4..00daedfefc1b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -356,11 +356,6 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
-static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
-{
- flush_tlb_mm(mm);
-}
-
extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
static inline bool pte_flags_need_flush(unsigned long oldflags,
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6c79ee7c0957..1fadf0cf520c 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,6 +218,12 @@ static inline unsigned int topology_amd_nodes_per_pkg(void)
return __amd_nodes_per_pkg;
}
+#else /* CONFIG_SMP */
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
+static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
+#endif /* !CONFIG_SMP */
+
extern struct cpumask __cpu_primary_thread_mask;
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -231,11 +237,15 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
}
#define topology_is_primary_thread topology_is_primary_thread
-#else /* CONFIG_SMP */
-static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_max_smt_threads(void) { return 1; }
-static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
-#endif /* !CONFIG_SMP */
+int topology_get_primary_thread(unsigned int cpu);
+
+static inline bool topology_is_core_online(unsigned int cpu)
+{
+ int pcpu = topology_get_primary_thread(cpu);
+
+ return pcpu >= 0 ? cpu_online(pcpu) : false;
+}
+#define topology_is_core_online topology_is_core_online
static inline void arch_fix_phys_package_id(int num, u32 slot)
{
@@ -315,4 +325,6 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled
extern void arch_scale_freq_tick(void);
#define arch_scale_freq_tick arch_scale_freq_tick
+extern int arch_sched_node_distance(int from, int to);
+
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 3a7755c1a441..367297b188c3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -378,7 +378,7 @@ do { \
asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[ptr] "+m" (*_ptr), \
[old] "+a" (__old) \
: [new] ltype (__new) \
@@ -397,7 +397,7 @@ do { \
asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
"+A" (__old), \
[ptr] "+m" (*_ptr) \
: "b" ((u32)__new), \
@@ -417,11 +417,10 @@ do { \
__typeof__(*(_ptr)) __new = (_new); \
asm volatile("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
- CC_SET(z) \
"2:\n" \
_ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
%[errout]) \
- : CC_OUT(z) (success), \
+ : "=@ccz" (success), \
[errout] "+r" (__err), \
[ptr] "+m" (*_ptr), \
[old] "+a" (__old) \
@@ -529,18 +528,18 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt
#define user_access_save() smap_save()
#define user_access_restore(x) smap_restore(x)
-#define unsafe_put_user(x, ptr, label) \
+#define arch_unsafe_put_user(x, ptr, label) \
__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-#define unsafe_get_user(x, ptr, err_label) \
+#define arch_unsafe_get_user(x, ptr, err_label) \
do { \
__inttype(*(ptr)) __gu_val; \
__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-#define unsafe_get_user(x, ptr, err_label) \
+#define arch_unsafe_get_user(x, ptr, err_label) \
do { \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
@@ -619,11 +618,11 @@ do { \
} while (0)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-#define __get_kernel_nofault(dst, src, type, err_label) \
+#define arch_get_kernel_nofault(dst, src, type, err_label) \
__get_user_size(*((type *)(dst)), (__force type __user *)(src), \
sizeof(type), err_label)
#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-#define __get_kernel_nofault(dst, src, type, err_label) \
+#define arch_get_kernel_nofault(dst, src, type, err_label) \
do { \
int __kr_err; \
\
@@ -634,7 +633,7 @@ do { \
} while (0)
#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-#define __put_kernel_nofault(dst, src, type, err_label) \
+#define arch_put_kernel_nofault(dst, src, type, err_label) \
__put_user_size(*((type *)(src)), (__force type __user *)(dst), \
sizeof(type), err_label)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index c8a5ae35c871..915124011c27 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -12,12 +12,12 @@
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/percpu.h>
-#include <asm/runtime-const.h>
-/*
- * Virtual variable: there's no actual backing store for this,
- * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
- */
+#ifdef MODULE
+ #define runtime_const_ptr(sym) (sym)
+#else
+ #include <asm/runtime-const.h>
+#endif
extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING
@@ -72,7 +72,7 @@ static inline void __user *mask_user_address(const void __user *ptr)
return ret;
}
#define masked_user_access_begin(x) ({ \
- __auto_type __masked_ptr = (x); \
+ auto __masked_ptr = (x); \
__masked_ptr = mask_user_address(__masked_ptr); \
__uaccess_begin(); __masked_ptr; })
diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h
new file mode 100644
index 000000000000..12064284bc4e
--- /dev/null
+++ b/arch/x86/include/asm/unwind_user.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UNWIND_USER_H
+#define _ASM_X86_UNWIND_USER_H
+
+#ifdef CONFIG_HAVE_UNWIND_USER_FP
+
+#include <asm/ptrace.h>
+#include <asm/uprobes.h>
+
+#define ARCH_INIT_USER_FP_FRAME(ws) \
+ .cfa_off = 2*(ws), \
+ .ra_off = -1*(ws), \
+ .fp_off = -2*(ws), \
+ .use_fp = true,
+
+#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \
+ .cfa_off = 1*(ws), \
+ .ra_off = -1*(ws), \
+ .fp_off = 0, \
+ .use_fp = false,
+
+static inline int unwind_user_word_size(struct pt_regs *regs)
+{
+ /* We can't unwind VM86 stacks */
+ if (regs->flags & X86_VM_MASK)
+ return 0;
+#ifdef CONFIG_X86_64
+ if (!user_64bit_mode(regs))
+ return sizeof(int);
+#endif
+ return sizeof(long);
+}
+
+static inline bool unwind_user_at_function_start(struct pt_regs *regs)
+{
+ return is_uprobe_at_func_entry(regs);
+}
+
+#endif /* CONFIG_HAVE_UNWIND_USER_FP */
+
+#endif /* _ASM_X86_UNWIND_USER_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 678fb546f0a7..362210c79998 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -20,6 +20,11 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN 0xcc
#define UPROBE_SWBP_INSN_SIZE 1
+enum {
+ ARCH_UPROBE_FLAG_CAN_OPTIMIZE = 0,
+ ARCH_UPROBE_FLAG_OPTIMIZE_FAIL = 1,
+};
+
struct uprobe_xol_ops;
struct arch_uprobe {
@@ -45,6 +50,8 @@ struct arch_uprobe {
u8 ilen;
} push;
};
+
+ unsigned long flags;
};
struct arch_uprobe_task {
@@ -55,4 +62,13 @@ struct arch_uprobe_task {
unsigned int saved_tf;
};
+#ifdef CONFIG_UPROBES
+extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_UPROBES */
+
#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6989b824fd32..d0b62e255290 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -122,7 +122,7 @@ struct uv_systab {
struct {
u32 type:8; /* type of entry */
u32 offset:24; /* byte offset from struct start to entry */
- } entry[1]; /* additional entries follow */
+ } entry[]; /* additional entries follow */
};
extern struct uv_systab *uv_systab;
diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h
index 0950c9535fae..08ec328203ef 100644
--- a/arch/x86/include/asm/video.h
+++ b/arch/x86/include/asm/video.h
@@ -13,8 +13,10 @@ pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long offset);
#define pgprot_framebuffer pgprot_framebuffer
+#ifdef CONFIG_VIDEO
bool video_is_primary_device(struct device *dev);
#define video_is_primary_device video_is_primary_device
+#endif
#include <asm-generic/video.h>
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cca7d6641287..c85c50019523 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -106,6 +106,7 @@
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
+#define VM_EXIT_LOAD_CET_STATE 0x10000000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -119,6 +120,7 @@
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
+#define VM_ENTRY_LOAD_CET_STATE 0x00100000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@@ -132,6 +134,7 @@
#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
#define VMX_BASIC_INOUT BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
@@ -369,6 +372,9 @@ enum vmcs_field {
GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
GUEST_SYSENTER_ESP = 0x00006824,
GUEST_SYSENTER_EIP = 0x00006826,
+ GUEST_S_CET = 0x00006828,
+ GUEST_SSP = 0x0000682a,
+ GUEST_INTR_SSP_TABLE = 0x0000682c,
HOST_CR0 = 0x00006c00,
HOST_CR3 = 0x00006c02,
HOST_CR4 = 0x00006c04,
@@ -381,6 +387,9 @@ enum vmcs_field {
HOST_IA32_SYSENTER_EIP = 0x00006c12,
HOST_RSP = 0x00006c14,
HOST_RIP = 0x00006c16,
+ HOST_S_CET = 0x00006c18,
+ HOST_SSP = 0x00006c1a,
+ HOST_INTR_SSP_TABLE = 0x00006c1c
};
/*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 36698cc9fb44..6c8a6ead84f6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -79,7 +79,7 @@ struct x86_init_paging {
/**
* struct x86_init_timers - platform specific timer setup
- * @setup_perpcu_clockev: set up the per cpu clock event device for the
+ * @setup_percpu_clockev: set up the per cpu clock event device for the
* boot cpu
* @timer_init: initialize the platform timer (default PIT/HPET)
* @wallclock_init: init the wallclock device
@@ -132,7 +132,7 @@ struct x86_hyper_init {
/**
* struct x86_init_acpi - x86 ACPI init functions
- * @set_root_poitner: set RSDP address
+ * @set_root_pointer: set RSDP address
* @get_root_pointer: get RSDP address
* @reduced_hw_early_init: hardware reduced platform early init
*/
@@ -145,14 +145,14 @@ struct x86_init_acpi {
/**
* struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc.
*
- * @enc_status_change_prepare Notify HV before the encryption status of a range is changed
- * @enc_status_change_finish Notify HV after the encryption status of a range is changed
- * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
- * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
- * @enc_kexec_begin Begin the two-step process of converting shared memory back
+ * @enc_status_change_prepare: Notify HV before the encryption status of a range is changed
+ * @enc_status_change_finish: Notify HV after the encryption status of a range is changed
+ * @enc_tlb_flush_required: Returns true if a TLB flush is needed before changing page encryption status
+ * @enc_cache_flush_required: Returns true if a cache flush is needed before changing page encryption status
+ * @enc_kexec_begin: Begin the two-step process of converting shared memory back
* to private. It stops the new conversions from being started
* and waits in-flight conversions to finish, if possible.
- * @enc_kexec_finish Finish the two-step process of converting shared memory to
+ * @enc_kexec_finish: Finish the two-step process of converting shared memory to
* private. All memory is private after the call when
* the function returns.
* It is called on only one CPU while the others are shut down
@@ -229,7 +229,7 @@ struct x86_legacy_devices {
* given platform/subarch.
* @X86_LEGACY_I8042_FIRMWARE_ABSENT: firmware reports that the controller
* is absent.
- * @X86_LEGACY_i8042_EXPECTED_PRESENT: the controller is likely to be
+ * @X86_LEGACY_I8042_EXPECTED_PRESENT: the controller is likely to be
* present, the i8042 driver should probe for controller existence.
*/
enum x86_legacy_i8042_state {
@@ -244,6 +244,8 @@ enum x86_legacy_i8042_state {
* @i8042: indicated if we expect the device to have i8042 controller
* present.
* @rtc: this device has a CMOS real-time clock present
+ * @warm_reset: 1 if platform allows warm reset, else 0
+ * @no_vga: 1 if (FADT.boot_flags & ACPI_FADT_NO_VGA) is set, else 0
* @reserve_bios_regions: boot code will search for the EBDA address and the
* start of the 640k - 1M BIOS region. If false, the platform must
* ensure that its memory map correctly reserves sub-1MB regions as needed.
@@ -290,9 +292,10 @@ struct x86_hyper_runtime {
* @calibrate_tsc: calibrate TSC, if different from CPU
* @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock
- * @is_untracked_pat_range exclude from PAT logic
- * @nmi_init enable NMI on cpus
- * @get_nmi_reason get the reason an NMI was received
+ * @iommu_shutdown: set by an IOMMU driver for shutdown if necessary
+ * @is_untracked_pat_range: exclude from PAT logic
+ * @nmi_init: enable NMI on cpus
+ * @get_nmi_reason: get the reason an NMI was received
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
* @apic_post_init: adjust apic if needed
@@ -307,6 +310,7 @@ struct x86_hyper_runtime {
* @realmode_reserve: reserve memory for realmode trampoline
* @realmode_init: initialize realmode trampoline
* @hyper: x86 hypervisor specific runtime callbacks
+ * @guest: guest incarnations callbacks
*/
struct x86_platform_ops {
unsigned long (*calibrate_cpu)(void);
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 59a62c3780a2..a16d4631547c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
#ifdef MODULE
#define __ADDRESSABLE_xen_hypercall
#else
-#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
+#define __ADDRESSABLE_xen_hypercall \
+ __stringify(.global STATIC_CALL_KEY(xen_hypercall);)
#endif
#define __HYPERCALL \
__ADDRESSABLE_xen_hypercall \
- "call __SCT__xen_hypercall"
+ __stringify(call STATIC_CALL_TRAMP(xen_hypercall))
#define __HYPERCALL_ENTRY(x) "a" (x)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 85e63d58c074..59f642a94b9d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,9 +12,9 @@
#include <asm/extable.h>
#include <asm/page.h>
+#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
-#include <xen/features.h>
/* Xen machine address */
typedef struct xmaddr {
@@ -162,7 +162,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
* pfn_to_mfn. This will have to be removed when we figured
* out which call.
*/
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return pfn;
mfn = __pfn_to_mfn(pfn);
@@ -175,7 +175,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
static inline int phys_to_machine_mapping_valid(unsigned long pfn)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return 1;
return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
@@ -210,7 +210,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
* gfn_to_pfn. This will have to be removed when we figure
* out which call.
*/
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return mfn;
pfn = mfn_to_pfn_no_overrides(mfn);
@@ -242,7 +242,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
/* Pseudo-physical <-> Guest conversion */
static inline unsigned long pfn_to_gfn(unsigned long pfn)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return pfn;
else
return pfn_to_mfn(pfn);
@@ -250,7 +250,7 @@ static inline unsigned long pfn_to_gfn(unsigned long pfn)
static inline unsigned long gfn_to_pfn(unsigned long gfn)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return gfn;
else
return mfn_to_pfn(gfn);
@@ -284,7 +284,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn;
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return mfn;
pfn = mfn_to_pfn(mfn);