diff options
Diffstat (limited to 'arch/x86/kernel/cpu/common.c')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 468 |
1 files changed, 305 insertions, 163 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7cce91b19fb2..8feb8fd2957a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -29,7 +29,7 @@ #include <asm/alternative.h> #include <asm/cmdline.h> -#include <asm/cpuid.h> +#include <asm/cpuid/api.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> #include <asm/doublefault.h> @@ -148,7 +148,7 @@ static void ppin_init(struct cpuinfo_x86 *c) */ info = (struct ppin_info *)id->driver_data; - if (rdmsrl_safe(info->msr_ppin_ctl, &val)) + if (rdmsrq_safe(info->msr_ppin_ctl, &val)) goto clear_ppin; if ((val & 3UL) == 1UL) { @@ -158,13 +158,13 @@ static void ppin_init(struct cpuinfo_x86 *c) /* If PPIN is disabled, try to enable */ if (!(val & 2UL)) { - wrmsrl_safe(info->msr_ppin_ctl, val | 2UL); - rdmsrl_safe(info->msr_ppin_ctl, &val); + wrmsrq_safe(info->msr_ppin_ctl, val | 2UL); + rdmsrq_safe(info->msr_ppin_ctl, &val); } /* Is the enable bit set? */ if (val & 2UL) { - c->ppin = __rdmsr(info->msr_ppin); + c->ppin = native_rdmsrq(info->msr_ppin); set_cpu_cap(c, info->feature); return; } @@ -242,6 +242,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +SYM_PIC_ALIAS(gdt_page); #ifdef CONFIG_X86_64 static int __init x86_nopcid_setup(char *s) @@ -321,7 +322,7 @@ static int __init cachesize_setup(char *str) __setup("cachesize=", cachesize_setup); /* Probe for the CPUID instruction */ -bool have_cpuid_p(void) +bool cpuid_feature(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -562,9 +563,9 @@ __noendbr u64 ibt_save(bool disable) u64 msr = 0; if (cpu_feature_enabled(X86_FEATURE_IBT)) { - rdmsrl(MSR_IA32_S_CET, msr); + rdmsrq(MSR_IA32_S_CET, msr); if (disable) - wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN); + wrmsrq(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN); } return msr; @@ -575,10 +576,10 @@ __noendbr void ibt_restore(u64 save) u64 msr; if (cpu_feature_enabled(X86_FEATURE_IBT)) { - rdmsrl(MSR_IA32_S_CET, msr); + rdmsrq(MSR_IA32_S_CET, msr); msr &= ~CET_ENDBR_EN; msr |= (save & CET_ENDBR_EN); - wrmsrl(MSR_IA32_S_CET, msr); + wrmsrq(MSR_IA32_S_CET, msr); } } @@ -602,15 +603,15 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_USER_SHSTK); if (kernel_ibt) - wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN); + wrmsrq(MSR_IA32_S_CET, CET_ENDBR_EN); else - wrmsrl(MSR_IA32_S_CET, 0); + wrmsrq(MSR_IA32_S_CET, 0); cr4_set_bits(X86_CR4_CET); if (kernel_ibt && ibt_selftest()) { pr_err("IBT selftest: Failed!\n"); - wrmsrl(MSR_IA32_S_CET, 0); + wrmsrq(MSR_IA32_S_CET, 0); setup_clear_cpu_cap(X86_FEATURE_IBT); } } @@ -621,8 +622,8 @@ __noendbr void cet_disable(void) cpu_feature_enabled(X86_FEATURE_SHSTK))) return; - wrmsrl(MSR_IA32_S_CET, 0); - wrmsrl(MSR_IA32_U_CET, 0); + wrmsrq(MSR_IA32_S_CET, 0); + wrmsrq(MSR_IA32_U_CET, 0); } /* @@ -667,8 +668,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) if (!warn) continue; - pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", - x86_cap_flag(df->feature), df->level); + pr_warn("CPU: CPU feature %s disabled, no CPUID level 0x%x\n", + x86_cap_flags[df->feature], df->level); } } @@ -751,9 +752,9 @@ void __init switch_gdt_and_percpu_base(int cpu) * No need to load %gs. It is already correct. * * Writing %gs on 64bit would zero GSBASE which would make any per - * CPU operation up to the point of the wrmsrl() fault. + * CPU operation up to the point of the wrmsrq() fault. * - * Set GSBASE to the new offset. Until the wrmsrl() happens the + * Set GSBASE to the new offset. Until the wrmsrq() happens the * early mapping is still valid. That means the GSBASE update will * lose any prior per CPU data which was not copied over in * setup_per_cpu_areas(). @@ -761,7 +762,7 @@ void __init switch_gdt_and_percpu_base(int cpu) * This works even with stackprotector enabled because the * per CPU stack canary is 0 in both per CPU areas. */ - wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); + wrmsrq(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); #else /* * %fs is already set to __KERNEL_PERCPU, but after switching GDT @@ -846,13 +847,13 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; } -u16 __read_mostly tlb_lli_4k[NR_INFO]; -u16 __read_mostly tlb_lli_2m[NR_INFO]; -u16 __read_mostly tlb_lli_4m[NR_INFO]; -u16 __read_mostly tlb_lld_4k[NR_INFO]; -u16 __read_mostly tlb_lld_2m[NR_INFO]; -u16 __read_mostly tlb_lld_4m[NR_INFO]; -u16 __read_mostly tlb_lld_1g[NR_INFO]; +u16 __read_mostly tlb_lli_4k; +u16 __read_mostly tlb_lli_2m; +u16 __read_mostly tlb_lli_4m; +u16 __read_mostly tlb_lld_4k; +u16 __read_mostly tlb_lld_2m; +u16 __read_mostly tlb_lld_4m; +u16 __read_mostly tlb_lld_1g; static void cpu_detect_tlb(struct cpuinfo_x86 *c) { @@ -860,12 +861,10 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) this_cpu->c_detect_tlb(c); pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", - tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], - tlb_lli_4m[ENTRIES]); + tlb_lli_4k, tlb_lli_2m, tlb_lli_4m); pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", - tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], - tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); + tlb_lld_4k, tlb_lld_2m, tlb_lld_4m, tlb_lld_1g); } void get_cpu_vendor(struct cpuinfo_x86 *c) @@ -1007,17 +1006,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* AMD-defined flags: level 0x80000001 */ + /* + * Check if extended CPUID leaves are implemented: Max extended + * CPUID leaf must be in the 0x80000001-0x8000ffff range. + */ eax = cpuid_eax(0x80000000); - c->extended_cpuid_level = eax; + c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0; - if ((eax & 0xffff0000) == 0x80000000) { - if (eax >= 0x80000001) { - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (c->extended_cpuid_level >= 0x80000001) { + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_8000_0001_ECX] = ecx; - c->x86_capability[CPUID_8000_0001_EDX] = edx; - } + c->x86_capability[CPUID_8000_0001_ECX] = ecx; + c->x86_capability[CPUID_8000_0001_EDX] = edx; } if (c->extended_cpuid_level >= 0x80000007) { @@ -1164,7 +1164,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(INTEL_CORE_YONAH, NO_SSB), - VULNWL_INTEL(INTEL_ATOM_AIRMONT_MID, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY), + VULNWL_INTEL(INTEL_ATOM_SILVERMONT_MID2,NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | MSBDS_ONLY), VULNWL_INTEL(INTEL_ATOM_AIRMONT_NP, NO_SSB | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(INTEL_ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), @@ -1205,6 +1205,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define VULNBL_INTEL_STEPS(vfm, max_stepping, issues) \ X86_MATCH_VFM_STEPS(vfm, X86_STEP_MIN, max_stepping, issues) +#define VULNBL_INTEL_TYPE(vfm, cpu_type, issues) \ + X86_MATCH_VFM_CPU_TYPE(vfm, INTEL_CPU_TYPE_##cpu_type, issues) + #define VULNBL_AMD(family, blacklist) \ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) @@ -1226,6 +1229,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define GDS BIT(6) /* CPU is affected by Register File Data Sampling */ #define RFDS BIT(7) +/* CPU is affected by Indirect Target Selection */ +#define ITS BIT(8) +/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ +#define ITS_NATIVE_ONLY BIT(9) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1237,25 +1244,28 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), - VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, RFDS), + VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS), @@ -1287,7 +1297,7 @@ u64 x86_read_arch_cap_msr(void) u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); + rdmsrq(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); return x86_arch_cap_msr; } @@ -1317,10 +1327,78 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) return cpu_matches(cpu_vuln_blacklist, RFDS); } +static bool __init vulnerable_to_its(u64 x86_arch_cap_msr) +{ + /* The "immunity" bit trumps everything else: */ + if (x86_arch_cap_msr & ARCH_CAP_ITS_NO) + return false; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + /* None of the affected CPUs have BHI_CTRL */ + if (boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + /* + * If a VMM did not expose ITS_NO, assume that a guest could + * be running on a vulnerable hardware or may migrate to such + * hardware. + */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; + + if (cpu_matches(cpu_vuln_blacklist, ITS)) + return true; + + return false; +} + +static struct x86_cpu_id cpu_latest_microcode[] = { +#include "microcode/intel-ucode-defs.h" + {} +}; + +static bool __init cpu_has_old_microcode(void) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_latest_microcode); + + /* Give unknown CPUs a pass: */ + if (!m) { + /* Intel CPUs should be in the list. Warn if not: */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + pr_info("x86/CPU: Model not found in latest microcode list\n"); + return false; + } + + /* + * Hosts usually lie to guests with a super high microcode + * version. Just ignore what hosts tell guests: + */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return false; + + /* Consider all debug microcode to be old: */ + if (boot_cpu_data.microcode & BIT(31)) + return true; + + /* Give new microcode a pass: */ + if (boot_cpu_data.microcode >= m->driver_data) + return false; + + /* Uh oh, too old: */ + return true; +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (cpu_has_old_microcode()) { + pr_warn("x86/CPU: Running old microcode\n"); + setup_force_cpu_bug(X86_BUG_OLD_MICROCODE); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) @@ -1331,8 +1409,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) { setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2_USER); + } if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && @@ -1399,15 +1479,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Affected CPU list is generally enough to enumerate the vulnerability, * but for virtualization case check for ARCH_CAP MSR bits also, VMM may * not want the guest to enumerate the bug. - * - * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, - * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) - setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { @@ -1436,9 +1511,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); - /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && - !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + /* + * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with + * BHI_NO still need to use the BHI mitigation to prevent Intra-mode + * attacks. When virtualized, eIBRS could be hidden, assume vulnerable. + */ + if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); @@ -1446,6 +1524,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); + if (vulnerable_to_its(x86_arch_cap_msr)) { + setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY)) + setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -1479,15 +1563,96 @@ static void detect_nopl(void) #endif } +static inline bool parse_set_clear_cpuid(char *arg, bool set) +{ + char *opt; + int taint = 0; + + while (arg) { + bool found __maybe_unused = false; + unsigned int bit; + + opt = strsep(&arg, ","); + + /* + * Handle naked numbers first for feature flags which don't + * have names. It doesn't make sense for a bug not to have a + * name so don't handle bug flags here. + */ + if (!kstrtouint(opt, 10, &bit)) { + if (bit < NCAPINTS * 32) { + + if (set) { + pr_warn("setcpuid: force-enabling CPU feature flag:"); + setup_force_cpu_cap(bit); + } else { + pr_warn("clearcpuid: force-disabling CPU feature flag:"); + setup_clear_cpu_cap(bit); + } + /* empty-string, i.e., ""-defined feature flags */ + if (!x86_cap_flags[bit]) + pr_cont(" %d:%d\n", bit >> 5, bit & 31); + else + pr_cont(" %s\n", x86_cap_flags[bit]); + + taint++; + } + /* + * The assumption is that there are no feature names with only + * numbers in the name thus go to the next argument. + */ + continue; + } + + for (bit = 0; bit < 32 * (NCAPINTS + NBUGINTS); bit++) { + const char *flag; + const char *kind; + + if (bit < 32 * NCAPINTS) { + flag = x86_cap_flags[bit]; + kind = "feature"; + } else { + kind = "bug"; + flag = x86_bug_flags[bit - (32 * NCAPINTS)]; + } + + if (!flag) + continue; + + if (strcmp(flag, opt)) + continue; + + if (set) { + pr_warn("setcpuid: force-enabling CPU %s flag: %s\n", + kind, flag); + setup_force_cpu_cap(bit); + } else { + pr_warn("clearcpuid: force-disabling CPU %s flag: %s\n", + kind, flag); + setup_clear_cpu_cap(bit); + } + taint++; + found = true; + break; + } + + if (!found) + pr_warn("%s: unknown CPU flag: %s", set ? "setcpuid" : "clearcpuid", opt); + } + + return taint; +} + + /* * We parse cpu parameters early because fpu__init_system() is executed * before parse_early_param(). */ static void __init cpu_parse_early_param(void) { + bool cpuid_taint = false; char arg[128]; - char *argptr = arg, *opt; - int arglen, taint = 0; + int arglen; #ifdef CONFIG_X86_32 if (cmdline_find_option_bool(boot_command_line, "no387")) @@ -1519,61 +1684,17 @@ static void __init cpu_parse_early_param(void) setup_clear_cpu_cap(X86_FEATURE_FRED); arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); - if (arglen <= 0) - return; - - pr_info("Clearing CPUID bits:"); - - while (argptr) { - bool found __maybe_unused = false; - unsigned int bit; - - opt = strsep(&argptr, ","); - - /* - * Handle naked numbers first for feature flags which don't - * have names. - */ - if (!kstrtouint(opt, 10, &bit)) { - if (bit < NCAPINTS * 32) { + if (arglen > 0) + cpuid_taint |= parse_set_clear_cpuid(arg, false); - /* empty-string, i.e., ""-defined feature flags */ - if (!x86_cap_flags[bit]) - pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit)); - else - pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); + arglen = cmdline_find_option(boot_command_line, "setcpuid", arg, sizeof(arg)); + if (arglen > 0) + cpuid_taint |= parse_set_clear_cpuid(arg, true); - setup_clear_cpu_cap(bit); - taint++; - } - /* - * The assumption is that there are no feature names with only - * numbers in the name thus go to the next argument. - */ - continue; - } - - for (bit = 0; bit < 32 * NCAPINTS; bit++) { - if (!x86_cap_flag(bit)) - continue; - - if (strcmp(x86_cap_flag(bit), opt)) - continue; - - pr_cont(" %s", opt); - setup_clear_cpu_cap(bit); - taint++; - found = true; - break; - } - - if (!found) - pr_cont(" (unknown: %s)", opt); - } - pr_cont("\n"); - - if (taint) + if (cpuid_taint) { + pr_warn("!!! setcpuid=/clearcpuid= in use, this is for TESTING ONLY, may break things horribly. Tainting kernel.\n"); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } } /* @@ -1590,11 +1711,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0; - if (!have_cpuid_p()) + if (!cpuid_feature()) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (have_cpuid_p()) { + if (cpuid_feature()) { cpu_detect(c); get_cpu_vendor(c); intel_unlock_cpuid_leafs(c); @@ -1610,6 +1731,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) c->cpu_index = 0; filter_cpuid_features(c, false); + check_cpufeature_deps(c); if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); @@ -1708,11 +1830,11 @@ static bool detect_null_seg_behavior(void) */ unsigned long old_base, tmp; - rdmsrl(MSR_FS_BASE, old_base); - wrmsrl(MSR_FS_BASE, 1); + rdmsrq(MSR_FS_BASE, old_base); + wrmsrq(MSR_FS_BASE, 1); loadsegment(fs, 0); - rdmsrl(MSR_FS_BASE, tmp); - wrmsrl(MSR_FS_BASE, old_base); + rdmsrq(MSR_FS_BASE, tmp); + wrmsrq(MSR_FS_BASE, old_base); return tmp == 0; } @@ -1753,11 +1875,11 @@ static void generic_identify(struct cpuinfo_x86 *c) { c->extended_cpuid_level = 0; - if (!have_cpuid_p()) + if (!cpuid_feature()) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid_p()) + if (!cpuid_feature()) return; cpu_detect(c); @@ -1870,6 +1992,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Filter out anything that depends on CPUID levels we don't have */ filter_cpuid_features(c, true); + /* Check for unmet dependencies based on the CPUID dependency table */ + check_cpufeature_deps(c); + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { const char *p; @@ -1938,9 +2063,9 @@ void enable_sep_cpu(void) */ tss->x86_tss.ss1 = __KERNEL_CS; - wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); + wrmsrq(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1); + wrmsrq(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); + wrmsrq(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32); put_cpu(); } @@ -1962,9 +2087,15 @@ static __init void identify_boot_cpu(void) lkgs_init(); } -void identify_secondary_cpu(struct cpuinfo_x86 *c) +void identify_secondary_cpu(unsigned int cpu) { - BUG_ON(c == &boot_cpu_data); + struct cpuinfo_x86 *c = &cpu_data(cpu); + + /* Copy boot_cpu_data only on the first bringup */ + if (!c->initialized) + *c = boot_cpu_data; + c->cpu_index = cpu; + identify_cpu(c); #ifdef CONFIG_X86_32 enable_sep_cpu(); @@ -1975,6 +2106,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) update_gds_msr(); tsx_ap_init(); + c->initialized = true; } void print_cpu_info(struct cpuinfo_x86 *c) @@ -2005,29 +2137,42 @@ void print_cpu_info(struct cpuinfo_x86 *c) } /* - * clearcpuid= was already parsed in cpu_parse_early_param(). This dummy - * function prevents it from becoming an environment variable for init. + * clearcpuid= and setcpuid= were already parsed in cpu_parse_early_param(). + * These dummy functions prevent them from becoming an environment variable for + * init. */ + static __init int setup_clearcpuid(char *arg) { return 1; } __setup("clearcpuid=", setup_clearcpuid); -DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { - .current_task = &init_task, - .preempt_count = INIT_PREEMPT_COUNT, - .top_of_stack = TOP_OF_INIT_STACK, -}; -EXPORT_PER_CPU_SYMBOL(pcpu_hot); -EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); +static __init int setup_setcpuid(char *arg) +{ + return 1; +} +__setup("setcpuid=", setup_setcpuid); + +DEFINE_PER_CPU_CACHE_HOT(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); +EXPORT_PER_CPU_SYMBOL(const_current_task); + +DEFINE_PER_CPU_CACHE_HOT(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + +DEFINE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; #ifdef CONFIG_X86_64 -DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, - fixed_percpu_data) __aligned(PAGE_SIZE) __visible; -EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); +/* + * Note: Do not make this dependant on CONFIG_MITIGATION_CALL_DEPTH_TRACKING + * so that this space is reserved in the hot cache section even when the + * mitigation is disabled. + */ +DEFINE_PER_CPU_CACHE_HOT(u64, __x86_call_depth); +EXPORT_PER_CPU_SYMBOL(__x86_call_depth); -static void wrmsrl_cstar(unsigned long val) +static void wrmsrq_cstar(unsigned long val) { /* * Intel CPUs do not support 32-bit SYSCALL. Writing to MSR_CSTAR @@ -2035,37 +2180,37 @@ static void wrmsrl_cstar(unsigned long val) * guest. Avoid the pointless write on all Intel CPUs. */ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - wrmsrl(MSR_CSTAR, val); + wrmsrq(MSR_CSTAR, val); } static inline void idt_syscall_init(void) { - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + wrmsrq(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); if (ia32_enabled()) { - wrmsrl_cstar((unsigned long)entry_SYSCALL_compat); + wrmsrq_cstar((unsigned long)entry_SYSCALL_compat); /* * This only works on Intel CPUs. * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. * This does not cause SYSENTER to jump to the wrong location, because * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, + wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrq_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); + wrmsrq_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); } else { - wrmsrl_cstar((unsigned long)entry_SYSCALL32_ignore); - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); + wrmsrq_cstar((unsigned long)entry_SYSCALL32_ignore); + wrmsrq_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); + wrmsrq_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrq_safe(MSR_IA32_SYSENTER_EIP, 0ULL); } /* * Flags to clear on syscall; clear as much as possible * to minimize user space-kernel interference. */ - wrmsrl(MSR_SYSCALL_MASK, + wrmsrq(MSR_SYSCALL_MASK, X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF| X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF| @@ -2089,18 +2234,15 @@ void syscall_init(void) if (!cpu_feature_enabled(X86_FEATURE_FRED)) idt_syscall_init(); } - -#else /* CONFIG_X86_64 */ +#endif /* CONFIG_X86_64 */ #ifdef CONFIG_STACKPROTECTOR -DEFINE_PER_CPU(unsigned long, __stack_chk_guard); +DEFINE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard); #ifndef CONFIG_SMP EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -#endif /* CONFIG_X86_64 */ - /* * Clear all 6 debug registers: */ @@ -2137,7 +2279,7 @@ static inline void setup_getcpu(int cpu) struct desc_struct d = { }; if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID)) - wrmsr(MSR_TSC_AUX, cpudata, 0); + wrmsrq(MSR_TSC_AUX, cpudata); /* Store CPU and node number in limit. */ d.limit0 = cpudata; @@ -2252,8 +2394,8 @@ void cpu_init(void) memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); + wrmsrq(MSR_FS_BASE, 0); + wrmsrq(MSR_KERNEL_GS_BASE, 0); barrier(); x2apic_setup(); |