summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/common.c')
-rw-r--r--arch/x86/kernel/cpu/common.c468
1 files changed, 305 insertions, 163 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7cce91b19fb2..8feb8fd2957a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -29,7 +29,7 @@
#include <asm/alternative.h>
#include <asm/cmdline.h>
-#include <asm/cpuid.h>
+#include <asm/cpuid/api.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/doublefault.h>
@@ -148,7 +148,7 @@ static void ppin_init(struct cpuinfo_x86 *c)
*/
info = (struct ppin_info *)id->driver_data;
- if (rdmsrl_safe(info->msr_ppin_ctl, &val))
+ if (rdmsrq_safe(info->msr_ppin_ctl, &val))
goto clear_ppin;
if ((val & 3UL) == 1UL) {
@@ -158,13 +158,13 @@ static void ppin_init(struct cpuinfo_x86 *c)
/* If PPIN is disabled, try to enable */
if (!(val & 2UL)) {
- wrmsrl_safe(info->msr_ppin_ctl, val | 2UL);
- rdmsrl_safe(info->msr_ppin_ctl, &val);
+ wrmsrq_safe(info->msr_ppin_ctl, val | 2UL);
+ rdmsrq_safe(info->msr_ppin_ctl, &val);
}
/* Is the enable bit set? */
if (val & 2UL) {
- c->ppin = __rdmsr(info->msr_ppin);
+ c->ppin = native_rdmsrq(info->msr_ppin);
set_cpu_cap(c, info->feature);
return;
}
@@ -242,6 +242,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+SYM_PIC_ALIAS(gdt_page);
#ifdef CONFIG_X86_64
static int __init x86_nopcid_setup(char *s)
@@ -321,7 +322,7 @@ static int __init cachesize_setup(char *str)
__setup("cachesize=", cachesize_setup);
/* Probe for the CPUID instruction */
-bool have_cpuid_p(void)
+bool cpuid_feature(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -562,9 +563,9 @@ __noendbr u64 ibt_save(bool disable)
u64 msr = 0;
if (cpu_feature_enabled(X86_FEATURE_IBT)) {
- rdmsrl(MSR_IA32_S_CET, msr);
+ rdmsrq(MSR_IA32_S_CET, msr);
if (disable)
- wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN);
+ wrmsrq(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN);
}
return msr;
@@ -575,10 +576,10 @@ __noendbr void ibt_restore(u64 save)
u64 msr;
if (cpu_feature_enabled(X86_FEATURE_IBT)) {
- rdmsrl(MSR_IA32_S_CET, msr);
+ rdmsrq(MSR_IA32_S_CET, msr);
msr &= ~CET_ENDBR_EN;
msr |= (save & CET_ENDBR_EN);
- wrmsrl(MSR_IA32_S_CET, msr);
+ wrmsrq(MSR_IA32_S_CET, msr);
}
}
@@ -602,15 +603,15 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_USER_SHSTK);
if (kernel_ibt)
- wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN);
+ wrmsrq(MSR_IA32_S_CET, CET_ENDBR_EN);
else
- wrmsrl(MSR_IA32_S_CET, 0);
+ wrmsrq(MSR_IA32_S_CET, 0);
cr4_set_bits(X86_CR4_CET);
if (kernel_ibt && ibt_selftest()) {
pr_err("IBT selftest: Failed!\n");
- wrmsrl(MSR_IA32_S_CET, 0);
+ wrmsrq(MSR_IA32_S_CET, 0);
setup_clear_cpu_cap(X86_FEATURE_IBT);
}
}
@@ -621,8 +622,8 @@ __noendbr void cet_disable(void)
cpu_feature_enabled(X86_FEATURE_SHSTK)))
return;
- wrmsrl(MSR_IA32_S_CET, 0);
- wrmsrl(MSR_IA32_U_CET, 0);
+ wrmsrq(MSR_IA32_S_CET, 0);
+ wrmsrq(MSR_IA32_U_CET, 0);
}
/*
@@ -667,8 +668,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
if (!warn)
continue;
- pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
- x86_cap_flag(df->feature), df->level);
+ pr_warn("CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
+ x86_cap_flags[df->feature], df->level);
}
}
@@ -751,9 +752,9 @@ void __init switch_gdt_and_percpu_base(int cpu)
* No need to load %gs. It is already correct.
*
* Writing %gs on 64bit would zero GSBASE which would make any per
- * CPU operation up to the point of the wrmsrl() fault.
+ * CPU operation up to the point of the wrmsrq() fault.
*
- * Set GSBASE to the new offset. Until the wrmsrl() happens the
+ * Set GSBASE to the new offset. Until the wrmsrq() happens the
* early mapping is still valid. That means the GSBASE update will
* lose any prior per CPU data which was not copied over in
* setup_per_cpu_areas().
@@ -761,7 +762,7 @@ void __init switch_gdt_and_percpu_base(int cpu)
* This works even with stackprotector enabled because the
* per CPU stack canary is 0 in both per CPU areas.
*/
- wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
+ wrmsrq(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#else
/*
* %fs is already set to __KERNEL_PERCPU, but after switching GDT
@@ -846,13 +847,13 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
c->x86_cache_size = l2size;
}
-u16 __read_mostly tlb_lli_4k[NR_INFO];
-u16 __read_mostly tlb_lli_2m[NR_INFO];
-u16 __read_mostly tlb_lli_4m[NR_INFO];
-u16 __read_mostly tlb_lld_4k[NR_INFO];
-u16 __read_mostly tlb_lld_2m[NR_INFO];
-u16 __read_mostly tlb_lld_4m[NR_INFO];
-u16 __read_mostly tlb_lld_1g[NR_INFO];
+u16 __read_mostly tlb_lli_4k;
+u16 __read_mostly tlb_lli_2m;
+u16 __read_mostly tlb_lli_4m;
+u16 __read_mostly tlb_lld_4k;
+u16 __read_mostly tlb_lld_2m;
+u16 __read_mostly tlb_lld_4m;
+u16 __read_mostly tlb_lld_1g;
static void cpu_detect_tlb(struct cpuinfo_x86 *c)
{
@@ -860,12 +861,10 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c)
this_cpu->c_detect_tlb(c);
pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
- tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
- tlb_lli_4m[ENTRIES]);
+ tlb_lli_4k, tlb_lli_2m, tlb_lli_4m);
pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
- tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES],
- tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
+ tlb_lld_4k, tlb_lld_2m, tlb_lld_4m, tlb_lld_1g);
}
void get_cpu_vendor(struct cpuinfo_x86 *c)
@@ -1007,17 +1006,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* AMD-defined flags: level 0x80000001 */
+ /*
+ * Check if extended CPUID leaves are implemented: Max extended
+ * CPUID leaf must be in the 0x80000001-0x8000ffff range.
+ */
eax = cpuid_eax(0x80000000);
- c->extended_cpuid_level = eax;
+ c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0;
- if ((eax & 0xffff0000) == 0x80000000) {
- if (eax >= 0x80000001) {
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (c->extended_cpuid_level >= 0x80000001) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_8000_0001_ECX] = ecx;
- c->x86_capability[CPUID_8000_0001_EDX] = edx;
- }
+ c->x86_capability[CPUID_8000_0001_ECX] = ecx;
+ c->x86_capability[CPUID_8000_0001_EDX] = edx;
}
if (c->extended_cpuid_level >= 0x80000007) {
@@ -1164,7 +1164,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB),
- VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY),
+ VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID2,NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY),
VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
@@ -1205,6 +1205,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define VULNBL_INTEL_STEPS(vfm, max_stepping, issues) \
X86_MATCH_VFM_STEPS(vfm, X86_STEP_MIN, max_stepping, issues)
+#define VULNBL_INTEL_TYPE(vfm, cpu_type, issues) \
+ X86_MATCH_VFM_CPU_TYPE(vfm, INTEL_CPU_TYPE_##cpu_type, issues)
+
#define VULNBL_AMD(family, blacklist) \
VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
@@ -1226,6 +1229,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define GDS BIT(6)
/* CPU is affected by Register File Data Sampling */
#define RFDS BIT(7)
+/* CPU is affected by Indirect Target Selection */
+#define ITS BIT(8)
+/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
+#define ITS_NATIVE_ONLY BIT(9)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
@@ -1237,25 +1244,28 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS),
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO),
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS),
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED),
- VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS),
- VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS),
- VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS),
+ VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS),
@@ -1287,7 +1297,7 @@ u64 x86_read_arch_cap_msr(void)
u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
+ rdmsrq(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
return x86_arch_cap_msr;
}
@@ -1317,10 +1327,78 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
return cpu_matches(cpu_vuln_blacklist, RFDS);
}
+static bool __init vulnerable_to_its(u64 x86_arch_cap_msr)
+{
+ /* The "immunity" bit trumps everything else: */
+ if (x86_arch_cap_msr & ARCH_CAP_ITS_NO)
+ return false;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ /* None of the affected CPUs have BHI_CTRL */
+ if (boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ /*
+ * If a VMM did not expose ITS_NO, assume that a guest could
+ * be running on a vulnerable hardware or may migrate to such
+ * hardware.
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
+
+ if (cpu_matches(cpu_vuln_blacklist, ITS))
+ return true;
+
+ return false;
+}
+
+static struct x86_cpu_id cpu_latest_microcode[] = {
+#include "microcode/intel-ucode-defs.h"
+ {}
+};
+
+static bool __init cpu_has_old_microcode(void)
+{
+ const struct x86_cpu_id *m = x86_match_cpu(cpu_latest_microcode);
+
+ /* Give unknown CPUs a pass: */
+ if (!m) {
+ /* Intel CPUs should be in the list. Warn if not: */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ pr_info("x86/CPU: Model not found in latest microcode list\n");
+ return false;
+ }
+
+ /*
+ * Hosts usually lie to guests with a super high microcode
+ * version. Just ignore what hosts tell guests:
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return false;
+
+ /* Consider all debug microcode to be old: */
+ if (boot_cpu_data.microcode & BIT(31))
+ return true;
+
+ /* Give new microcode a pass: */
+ if (boot_cpu_data.microcode >= m->driver_data)
+ return false;
+
+ /* Uh oh, too old: */
+ return true;
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (cpu_has_old_microcode()) {
+ pr_warn("x86/CPU: Running old microcode\n");
+ setup_force_cpu_bug(X86_BUG_OLD_MICROCODE);
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ }
+
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
!(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
@@ -1331,8 +1409,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2))
+ if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) {
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2_USER);
+ }
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
!(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
@@ -1399,15 +1479,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Affected CPU list is generally enough to enumerate the vulnerability,
* but for virtualization case check for ARCH_CAP MSR bits also, VMM may
* not want the guest to enumerate the bug.
- *
- * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
- * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
- else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
- setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
@@ -1436,9 +1511,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
- /* When virtualized, eIBRS could be hidden, assume vulnerable */
- if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
- !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ /*
+ * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
+ * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
+ * attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
+ */
+ if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
@@ -1446,6 +1524,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
+ if (vulnerable_to_its(x86_arch_cap_msr)) {
+ setup_force_cpu_bug(X86_BUG_ITS);
+ if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY))
+ setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1479,15 +1563,96 @@ static void detect_nopl(void)
#endif
}
+static inline bool parse_set_clear_cpuid(char *arg, bool set)
+{
+ char *opt;
+ int taint = 0;
+
+ while (arg) {
+ bool found __maybe_unused = false;
+ unsigned int bit;
+
+ opt = strsep(&arg, ",");
+
+ /*
+ * Handle naked numbers first for feature flags which don't
+ * have names. It doesn't make sense for a bug not to have a
+ * name so don't handle bug flags here.
+ */
+ if (!kstrtouint(opt, 10, &bit)) {
+ if (bit < NCAPINTS * 32) {
+
+ if (set) {
+ pr_warn("setcpuid: force-enabling CPU feature flag:");
+ setup_force_cpu_cap(bit);
+ } else {
+ pr_warn("clearcpuid: force-disabling CPU feature flag:");
+ setup_clear_cpu_cap(bit);
+ }
+ /* empty-string, i.e., ""-defined feature flags */
+ if (!x86_cap_flags[bit])
+ pr_cont(" %d:%d\n", bit >> 5, bit & 31);
+ else
+ pr_cont(" %s\n", x86_cap_flags[bit]);
+
+ taint++;
+ }
+ /*
+ * The assumption is that there are no feature names with only
+ * numbers in the name thus go to the next argument.
+ */
+ continue;
+ }
+
+ for (bit = 0; bit < 32 * (NCAPINTS + NBUGINTS); bit++) {
+ const char *flag;
+ const char *kind;
+
+ if (bit < 32 * NCAPINTS) {
+ flag = x86_cap_flags[bit];
+ kind = "feature";
+ } else {
+ kind = "bug";
+ flag = x86_bug_flags[bit - (32 * NCAPINTS)];
+ }
+
+ if (!flag)
+ continue;
+
+ if (strcmp(flag, opt))
+ continue;
+
+ if (set) {
+ pr_warn("setcpuid: force-enabling CPU %s flag: %s\n",
+ kind, flag);
+ setup_force_cpu_cap(bit);
+ } else {
+ pr_warn("clearcpuid: force-disabling CPU %s flag: %s\n",
+ kind, flag);
+ setup_clear_cpu_cap(bit);
+ }
+ taint++;
+ found = true;
+ break;
+ }
+
+ if (!found)
+ pr_warn("%s: unknown CPU flag: %s", set ? "setcpuid" : "clearcpuid", opt);
+ }
+
+ return taint;
+}
+
+
/*
* We parse cpu parameters early because fpu__init_system() is executed
* before parse_early_param().
*/
static void __init cpu_parse_early_param(void)
{
+ bool cpuid_taint = false;
char arg[128];
- char *argptr = arg, *opt;
- int arglen, taint = 0;
+ int arglen;
#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -1519,61 +1684,17 @@ static void __init cpu_parse_early_param(void)
setup_clear_cpu_cap(X86_FEATURE_FRED);
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
- if (arglen <= 0)
- return;
-
- pr_info("Clearing CPUID bits:");
-
- while (argptr) {
- bool found __maybe_unused = false;
- unsigned int bit;
-
- opt = strsep(&argptr, ",");
-
- /*
- * Handle naked numbers first for feature flags which don't
- * have names.
- */
- if (!kstrtouint(opt, 10, &bit)) {
- if (bit < NCAPINTS * 32) {
+ if (arglen > 0)
+ cpuid_taint |= parse_set_clear_cpuid(arg, false);
- /* empty-string, i.e., ""-defined feature flags */
- if (!x86_cap_flags[bit])
- pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit));
- else
- pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+ arglen = cmdline_find_option(boot_command_line, "setcpuid", arg, sizeof(arg));
+ if (arglen > 0)
+ cpuid_taint |= parse_set_clear_cpuid(arg, true);
- setup_clear_cpu_cap(bit);
- taint++;
- }
- /*
- * The assumption is that there are no feature names with only
- * numbers in the name thus go to the next argument.
- */
- continue;
- }
-
- for (bit = 0; bit < 32 * NCAPINTS; bit++) {
- if (!x86_cap_flag(bit))
- continue;
-
- if (strcmp(x86_cap_flag(bit), opt))
- continue;
-
- pr_cont(" %s", opt);
- setup_clear_cpu_cap(bit);
- taint++;
- found = true;
- break;
- }
-
- if (!found)
- pr_cont(" (unknown: %s)", opt);
- }
- pr_cont("\n");
-
- if (taint)
+ if (cpuid_taint) {
+ pr_warn("!!! setcpuid=/clearcpuid= in use, this is for TESTING ONLY, may break things horribly. Tainting kernel.\n");
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ }
}
/*
@@ -1590,11 +1711,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
c->extended_cpuid_level = 0;
- if (!have_cpuid_p())
+ if (!cpuid_feature())
identify_cpu_without_cpuid(c);
/* cyrix could have cpuid enabled via c_identify()*/
- if (have_cpuid_p()) {
+ if (cpuid_feature()) {
cpu_detect(c);
get_cpu_vendor(c);
intel_unlock_cpuid_leafs(c);
@@ -1610,6 +1731,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
c->cpu_index = 0;
filter_cpuid_features(c, false);
+ check_cpufeature_deps(c);
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
@@ -1708,11 +1830,11 @@ static bool detect_null_seg_behavior(void)
*/
unsigned long old_base, tmp;
- rdmsrl(MSR_FS_BASE, old_base);
- wrmsrl(MSR_FS_BASE, 1);
+ rdmsrq(MSR_FS_BASE, old_base);
+ wrmsrq(MSR_FS_BASE, 1);
loadsegment(fs, 0);
- rdmsrl(MSR_FS_BASE, tmp);
- wrmsrl(MSR_FS_BASE, old_base);
+ rdmsrq(MSR_FS_BASE, tmp);
+ wrmsrq(MSR_FS_BASE, old_base);
return tmp == 0;
}
@@ -1753,11 +1875,11 @@ static void generic_identify(struct cpuinfo_x86 *c)
{
c->extended_cpuid_level = 0;
- if (!have_cpuid_p())
+ if (!cpuid_feature())
identify_cpu_without_cpuid(c);
/* cyrix could have cpuid enabled via c_identify()*/
- if (!have_cpuid_p())
+ if (!cpuid_feature())
return;
cpu_detect(c);
@@ -1870,6 +1992,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Filter out anything that depends on CPUID levels we don't have */
filter_cpuid_features(c, true);
+ /* Check for unmet dependencies based on the CPUID dependency table */
+ check_cpufeature_deps(c);
+
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
const char *p;
@@ -1938,9 +2063,9 @@ void enable_sep_cpu(void)
*/
tss->x86_tss.ss1 = __KERNEL_CS;
- wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
+ wrmsrq(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1);
+ wrmsrq(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
+ wrmsrq(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32);
put_cpu();
}
@@ -1962,9 +2087,15 @@ static __init void identify_boot_cpu(void)
lkgs_init();
}
-void identify_secondary_cpu(struct cpuinfo_x86 *c)
+void identify_secondary_cpu(unsigned int cpu)
{
- BUG_ON(c == &boot_cpu_data);
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ /* Copy boot_cpu_data only on the first bringup */
+ if (!c->initialized)
+ *c = boot_cpu_data;
+ c->cpu_index = cpu;
+
identify_cpu(c);
#ifdef CONFIG_X86_32
enable_sep_cpu();
@@ -1975,6 +2106,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
update_gds_msr();
tsx_ap_init();
+ c->initialized = true;
}
void print_cpu_info(struct cpuinfo_x86 *c)
@@ -2005,29 +2137,42 @@ void print_cpu_info(struct cpuinfo_x86 *c)
}
/*
- * clearcpuid= was already parsed in cpu_parse_early_param(). This dummy
- * function prevents it from becoming an environment variable for init.
+ * clearcpuid= and setcpuid= were already parsed in cpu_parse_early_param().
+ * These dummy functions prevent them from becoming an environment variable for
+ * init.
*/
+
static __init int setup_clearcpuid(char *arg)
{
return 1;
}
__setup("clearcpuid=", setup_clearcpuid);
-DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
- .current_task = &init_task,
- .preempt_count = INIT_PREEMPT_COUNT,
- .top_of_stack = TOP_OF_INIT_STACK,
-};
-EXPORT_PER_CPU_SYMBOL(pcpu_hot);
-EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
+static __init int setup_setcpuid(char *arg)
+{
+ return 1;
+}
+__setup("setcpuid=", setup_setcpuid);
+
+DEFINE_PER_CPU_CACHE_HOT(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+EXPORT_PER_CPU_SYMBOL(const_current_task);
+
+DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
+
+DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
#ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
- fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
+/*
+ * Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING
+ * so that this space is reserved in the hot cache section even when the
+ * mitigation is disabled.
+ */
+DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+EXPORT_PER_CPU_SYMBOL(__x86_call_depth);
-static void wrmsrl_cstar(unsigned long val)
+static void wrmsrq_cstar(unsigned long val)
{
/*
* Intel CPUs do not support 32-bit SYSCALL. Writing to MSR_CSTAR
@@ -2035,37 +2180,37 @@ static void wrmsrl_cstar(unsigned long val)
* guest. Avoid the pointless write on all Intel CPUs.
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- wrmsrl(MSR_CSTAR, val);
+ wrmsrq(MSR_CSTAR, val);
}
static inline void idt_syscall_init(void)
{
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ wrmsrq(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
if (ia32_enabled()) {
- wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
+ wrmsrq_cstar((unsigned long)entry_SYSCALL_compat);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
* This does not cause SYSENTER to jump to the wrong location, because
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
- wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+ wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+ wrmsrq_safe(MSR_IA32_SYSENTER_ESP,
(unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+ wrmsrq_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
} else {
- wrmsrl_cstar((unsigned long)entry_SYSCALL32_ignore);
- wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+ wrmsrq_cstar((unsigned long)entry_SYSCALL32_ignore);
+ wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+ wrmsrq_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrq_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
}
/*
* Flags to clear on syscall; clear as much as possible
* to minimize user space-kernel interference.
*/
- wrmsrl(MSR_SYSCALL_MASK,
+ wrmsrq(MSR_SYSCALL_MASK,
X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF|
X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF|
@@ -2089,18 +2234,15 @@ void syscall_init(void)
if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_syscall_init();
}
-
-#else /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_64 */
#ifdef CONFIG_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+DEFINE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
#ifndef CONFIG_SMP
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif
-#endif /* CONFIG_X86_64 */
-
/*
* Clear all 6 debug registers:
*/
@@ -2137,7 +2279,7 @@ static inline void setup_getcpu(int cpu)
struct desc_struct d = { };
if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
- wrmsr(MSR_TSC_AUX, cpudata, 0);
+ wrmsrq(MSR_TSC_AUX, cpudata);
/* Store CPU and node number in limit. */
d.limit0 = cpudata;
@@ -2252,8 +2394,8 @@ void cpu_init(void)
memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
- wrmsrl(MSR_FS_BASE, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
+ wrmsrq(MSR_FS_BASE, 0);
+ wrmsrq(MSR_KERNEL_GS_BASE, 0);
barrier();
x2apic_setup();