summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm/svm.c')
-rw-r--r--arch/x86/kvm/svm/svm.c228
1 files changed, 178 insertions, 50 deletions
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a713c803a3a3..67fee545d42a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -31,6 +31,7 @@
#include <linux/string_choices.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
@@ -475,24 +476,18 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu)
static void svm_init_erratum_383(void)
{
- u32 low, high;
- int err;
u64 val;
if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
return;
/* Use _safe variants to not break nested virtualization */
- val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
- if (err)
+ if (native_read_msr_safe(MSR_AMD64_DC_CFG, &val))
return;
val |= (1ULL << 47);
- low = lower_32_bits(val);
- high = upper_32_bits(val);
-
- native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
+ native_write_msr_safe(MSR_AMD64_DC_CFG, val);
erratum_383_found = true;
}
@@ -566,7 +561,7 @@ static void __svm_write_tsc_multiplier(u64 multiplier)
if (multiplier == __this_cpu_read(current_tsc_ratio))
return;
- wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
+ wrmsrq(MSR_AMD64_TSC_RATIO, multiplier);
__this_cpu_write(current_tsc_ratio, multiplier);
}
@@ -579,15 +574,15 @@ static inline void kvm_cpu_svm_disable(void)
{
uint64_t efer;
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
+ wrmsrq(MSR_VM_HSAVE_PA, 0);
+ rdmsrq(MSR_EFER, efer);
if (efer & EFER_SVME) {
/*
* Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
* NMI aren't blocked.
*/
stgi();
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ wrmsrq(MSR_EFER, efer & ~EFER_SVME);
}
}
@@ -616,7 +611,7 @@ static int svm_enable_virtualization_cpu(void)
uint64_t efer;
int me = raw_smp_processor_id();
- rdmsrl(MSR_EFER, efer);
+ rdmsrq(MSR_EFER, efer);
if (efer & EFER_SVME)
return -EBUSY;
@@ -626,9 +621,9 @@ static int svm_enable_virtualization_cpu(void)
sd->next_asid = sd->max_asid + 1;
sd->min_asid = max_sev_asid + 1;
- wrmsrl(MSR_EFER, efer | EFER_SVME);
+ wrmsrq(MSR_EFER, efer | EFER_SVME);
- wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
+ wrmsrq(MSR_VM_HSAVE_PA, sd->save_area_pa);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
/*
@@ -649,13 +644,12 @@ static int svm_enable_virtualization_cpu(void)
* erratum is present everywhere).
*/
if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
- uint64_t len, status = 0;
+ u64 len, status = 0;
int err;
- len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
+ err = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &len);
if (!err)
- status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
- &err);
+ err = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, &status);
if (err)
osvw_status = osvw_len = 0;
@@ -1297,8 +1291,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_MWAIT);
}
- if (!kvm_hlt_in_guest(vcpu->kvm))
- svm_set_intercept(svm, INTERCEPT_HLT);
+ if (!kvm_hlt_in_guest(vcpu->kvm)) {
+ if (cpu_feature_enabled(X86_FEATURE_IDLE_HLT))
+ svm_set_intercept(svm, INTERCEPT_IDLE_HLT);
+ else
+ svm_set_intercept(svm, INTERCEPT_HLT);
+ }
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
@@ -1508,6 +1506,63 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}
+#ifdef CONFIG_CPU_MITIGATIONS
+static DEFINE_SPINLOCK(srso_lock);
+static atomic_t srso_nr_vms;
+
+static void svm_srso_clear_bp_spec_reduce(void *ign)
+{
+ struct svm_cpu_data *sd = this_cpu_ptr(&svm_data);
+
+ if (!sd->bp_spec_reduce_set)
+ return;
+
+ msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT);
+ sd->bp_spec_reduce_set = false;
+}
+
+static void svm_srso_vm_destroy(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE))
+ return;
+
+ if (atomic_dec_return(&srso_nr_vms))
+ return;
+
+ guard(spinlock)(&srso_lock);
+
+ /*
+ * Verify a new VM didn't come along, acquire the lock, and increment
+ * the count before this task acquired the lock.
+ */
+ if (atomic_read(&srso_nr_vms))
+ return;
+
+ on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1);
+}
+
+static void svm_srso_vm_init(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE))
+ return;
+
+ /*
+ * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0
+ * transition, i.e. destroying the last VM, is fully complete, e.g. so
+ * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs.
+ */
+ if (atomic_inc_not_zero(&srso_nr_vms))
+ return;
+
+ guard(spinlock)(&srso_lock);
+
+ atomic_inc(&srso_nr_vms);
+}
+#else
+static void svm_srso_vm_init(void) { }
+static void svm_srso_vm_destroy(void) { }
+#endif
+
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1540,6 +1595,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
+ if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) &&
+ !sd->bp_spec_reduce_set) {
+ sd->bp_spec_reduce_set = true;
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT);
+ }
svm->guest_state_loaded = true;
}
@@ -1559,7 +1619,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
- if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
+ if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT) &&
+ static_branch_likely(&switch_vcpu_ibpb))
indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
@@ -1932,7 +1993,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
- kvm_update_cpuid_runtime(vcpu);
+ vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -2138,14 +2199,13 @@ static int ac_interception(struct kvm_vcpu *vcpu)
static bool is_erratum_383(void)
{
- int err, i;
+ int i;
u64 value;
if (!erratum_383_found)
return false;
- value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
- if (err)
+ if (native_read_msr_safe(MSR_IA32_MC0_STATUS, &value))
return false;
/* Bit 62 may or may not be set for this mce */
@@ -2156,17 +2216,11 @@ static bool is_erratum_383(void)
/* Clear MCi_STATUS registers */
for (i = 0; i < 6; ++i)
- native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
-
- value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
- if (!err) {
- u32 low, high;
+ native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0);
+ if (!native_read_msr_safe(MSR_IA32_MCG_STATUS, &value)) {
value &= ~(1ULL << 2);
- low = lower_32_bits(value);
- high = upper_32_bits(value);
-
- native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
+ native_write_msr_safe(MSR_IA32_MCG_STATUS, value);
}
/* Flush tlb to evict multi-match entries */
@@ -2220,6 +2274,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
*/
if (!sev_es_guest(vcpu->kvm)) {
clear_page(svm->vmcb);
+#ifdef CONFIG_KVM_SMM
+ if (is_smm(vcpu))
+ kvm_smm_changed(vcpu, false);
+#endif
kvm_vcpu_reset(vcpu, true);
}
@@ -2973,11 +3031,7 @@ static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
- X86_TRAP_GP |
- SVM_EVTINJ_TYPE_EXEPT |
- SVM_EVTINJ_VALID);
+ svm_vmgexit_inject_exception(svm, X86_TRAP_GP);
return 1;
}
@@ -3165,6 +3219,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break;
}
+
+ /*
+ * AMD changed the architectural behavior of bits 5:2. On CPUs
+ * without BusLockTrap, bits 5:2 control "external pins", but
+ * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap
+ * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed
+ * the guest to set bits 5:2 despite not actually virtualizing
+ * Performance-Monitoring/Breakpoint external pins. Drop bits
+ * 5:2 for backwards compatibility.
+ */
+ data &= ~GENMASK(5, 2);
+
+ /*
+ * Suppress BTF as KVM doesn't virtualize BTF, but there's no
+ * way to communicate lack of support to the guest.
+ */
+ if (data & DEBUGCTLMSR_BTF) {
+ kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
+ data &= ~DEBUGCTLMSR_BTF;
+ }
+
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
@@ -3272,6 +3347,17 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;
+ /*
+ * FIXME: Perform segment checks for 32-bit mode, and inject #SS if the
+ * stack segment is used. The intercept takes priority over all
+ * #GP checks except CPL>0, but somehow still generates a linear
+ * address? The APM is sorely lacking.
+ */
+ if (is_noncanonical_address(gva, vcpu, 0)) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
+
return kvm_handle_invpcid(vcpu, type, gva);
}
@@ -3342,6 +3428,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[SVM_EXIT_CR4_WRITE_TRAP] = cr_trap,
[SVM_EXIT_CR8_WRITE_TRAP] = cr_trap,
[SVM_EXIT_INVPCID] = invpcid_interception,
+ [SVM_EXIT_IDLE_HLT] = kvm_emulate_halt,
[SVM_EXIT_NPF] = npf_interception,
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
@@ -3504,7 +3591,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
return interrupt_window_interception(vcpu);
else if (exit_code == SVM_EXIT_INTR)
return intr_interception(vcpu);
- else if (exit_code == SVM_EXIT_HLT)
+ else if (exit_code == SVM_EXIT_HLT || exit_code == SVM_EXIT_IDLE_HLT)
return kvm_emulate_halt(vcpu);
else if (exit_code == SVM_EXIT_NPF)
return npf_interception(vcpu);
@@ -3587,7 +3674,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return svm_invoke_exit_handler(vcpu, exit_code);
}
-static void pre_svm_run(struct kvm_vcpu *vcpu)
+static int pre_svm_run(struct kvm_vcpu *vcpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3609,6 +3696,8 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
/* FIXME: handle wraparound of asid_generation */
if (svm->current_vmcb->asid_generation != sd->asid_generation)
new_asid(svm, sd);
+
+ return 0;
}
static void svm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -4116,20 +4205,23 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_injected = true;
svm->nmi_l1_to_l2 = nmi_l1_to_l2;
break;
- case SVM_EXITINTINFO_TYPE_EXEPT:
+ case SVM_EXITINTINFO_TYPE_EXEPT: {
+ u32 error_code = 0;
+
/*
* Never re-inject a #VC exception.
*/
if (vector == X86_TRAP_VC)
break;
- if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
- u32 err = svm->vmcb->control.exit_int_info_err;
- kvm_requeue_exception_e(vcpu, vector, err);
+ if (exitintinfo & SVM_EXITINTINFO_VALID_ERR)
+ error_code = svm->vmcb->control.exit_int_info_err;
- } else
- kvm_requeue_exception(vcpu, vector);
+ kvm_requeue_exception(vcpu, vector,
+ exitintinfo & SVM_EXITINTINFO_VALID_ERR,
+ error_code);
break;
+ }
case SVM_EXITINTINFO_TYPE_INTR:
kvm_queue_interrupt(vcpu, vector, false);
break;
@@ -4189,6 +4281,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_enter_irqoff();
+ /*
+ * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of
+ * VMRUN controls whether or not physical IRQs are masked (KVM always
+ * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the
+ * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow
+ * into guest state if delivery of an event during VMRUN triggers a
+ * #VMEXIT, and the guest_state transitions already tell lockdep that
+ * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of
+ * this path, so IRQs aren't actually unmasked while running host code.
+ */
+ raw_local_irq_enable();
+
amd_clear_divider();
if (sev_es_guest(vcpu->kvm))
@@ -4197,6 +4301,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
+ raw_local_irq_disable();
+
guest_state_exit_irqoff();
}
@@ -4231,7 +4337,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
if (force_immediate_exit)
smp_send_reschedule(vcpu->cpu);
- pre_svm_run(vcpu);
+ if (pre_svm_run(vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ vcpu->run->fail_entry.hardware_entry_failure_reason = SVM_EXIT_ERR;
+ vcpu->run->fail_entry.cpu = vcpu->cpu;
+ return EXIT_FASTPATH_EXIT_USERSPACE;
+ }
sync_lapic_to_cr8(vcpu);
@@ -4253,6 +4364,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
clgi();
kvm_load_guest_xsave_state(vcpu);
+ /*
+ * Hardware only context switches DEBUGCTL if LBR virtualization is
+ * enabled. Manually load DEBUGCTL if necessary (and restore it after
+ * VM-Exit), as running with the host's DEBUGCTL can negatively affect
+ * guest state and can even be fatal, e.g. due to Bus Lock Detect.
+ */
+ if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
+ vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
+ update_debugctlmsr(svm->vmcb->save.dbgctl);
+
kvm_wait_lapic_expire(vcpu);
/*
@@ -4280,6 +4401,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+ if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
+ vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
+ update_debugctlmsr(vcpu->arch.host_debugctl);
+
kvm_load_host_xsave_state(vcpu);
stgi();
@@ -4958,6 +5083,8 @@ static void svm_vm_destroy(struct kvm *kvm)
{
avic_vm_destroy(kvm);
sev_vm_destroy(kvm);
+
+ svm_srso_vm_destroy();
}
static int svm_vm_init(struct kvm *kvm)
@@ -4983,6 +5110,7 @@ static int svm_vm_init(struct kvm *kvm)
return ret;
}
+ svm_srso_vm_init();
return 0;
}
@@ -5154,7 +5282,7 @@ static __init void svm_adjust_mmio_mask(void)
return;
/* If memory encryption is not enabled, use existing mask */
- rdmsrl(MSR_AMD64_SYSCFG, msr);
+ rdmsrq(MSR_AMD64_SYSCFG, msr);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;