diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 137 |
1 files changed, 87 insertions, 50 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be7bb6d20129..570e7f8cbf64 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -90,7 +90,6 @@ #include "trace.h" #define MAX_IO_MSRS 256 -#define KVM_MAX_MCE_BANKS 32 /* * Note, kvm_caps fields should *never* have default values, all fields must be @@ -578,7 +577,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn) for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { values = &msrs->values[slot]; if (values->host != values->curr) { - wrmsrl(kvm_uret_msrs_list[slot], values->host); + wrmsrq(kvm_uret_msrs_list[slot], values->host); values->curr = values->host; } } @@ -590,10 +589,10 @@ static int kvm_probe_user_return_msr(u32 msr) int ret; preempt_disable(); - ret = rdmsrl_safe(msr, &val); + ret = rdmsrq_safe(msr, &val); if (ret) goto out; - ret = wrmsrl_safe(msr, val); + ret = wrmsrq_safe(msr, val); out: preempt_enable(); return ret; @@ -630,12 +629,21 @@ static void kvm_user_return_msr_cpu_online(void) int i; for (i = 0; i < kvm_nr_uret_msrs; ++i) { - rdmsrl_safe(kvm_uret_msrs_list[i], &value); + rdmsrq_safe(kvm_uret_msrs_list[i], &value); msrs->values[i].host = value; msrs->values[i].curr = value; } } +static void kvm_user_return_register_notifier(struct kvm_user_return_msrs *msrs) +{ + if (!msrs->registered) { + msrs->urn.on_user_return = kvm_on_user_return; + user_return_notifier_register(&msrs->urn); + msrs->registered = true; + } +} + int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); @@ -644,20 +652,25 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) value = (value & mask) | (msrs->values[slot].host & ~mask); if (value == msrs->values[slot].curr) return 0; - err = wrmsrl_safe(kvm_uret_msrs_list[slot], value); + err = wrmsrq_safe(kvm_uret_msrs_list[slot], value); if (err) return 1; msrs->values[slot].curr = value; - if (!msrs->registered) { - msrs->urn.on_user_return = kvm_on_user_return; - user_return_notifier_register(&msrs->urn); - msrs->registered = true; - } + kvm_user_return_register_notifier(msrs); return 0; } EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); +void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) +{ + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); + + msrs->values[slot].curr = value; + kvm_user_return_register_notifier(msrs); +} +EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); + static void drop_user_return_notifiers(void) { struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); @@ -1174,7 +1187,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && vcpu->arch.ia32_xss != kvm_host.xss) - wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); + wrmsrq(MSR_IA32_XSS, vcpu->arch.ia32_xss); } if (cpu_feature_enabled(X86_FEATURE_PKU) && @@ -1205,7 +1218,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) && vcpu->arch.ia32_xss != kvm_host.xss) - wrmsrl(MSR_IA32_XSS, kvm_host.xss); + wrmsrq(MSR_IA32_XSS, kvm_host.xss); } } @@ -1662,7 +1675,7 @@ static int kvm_get_feature_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, *data = MSR_PLATFORM_INFO_CPUID_FAULT; break; case MSR_IA32_UCODE_REV: - rdmsrl_safe(index, data); + rdmsrq_safe(index, data); break; default: return kvm_x86_call(get_feature_msr)(index, data); @@ -3829,7 +3842,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!data) break; - wrmsrl(MSR_IA32_PRED_CMD, data); + wrmsrq(MSR_IA32_PRED_CMD, data); break; } case MSR_IA32_FLUSH_CMD: @@ -3842,7 +3855,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!data) break; - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); + wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH); break; case MSR_EFER: return set_efer(vcpu, msr_info); @@ -4739,6 +4752,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; + if (kvm) + r = kvm->max_vcpus; break; case KVM_CAP_MAX_VCPU_ID: r = KVM_MAX_VCPU_IDS; @@ -4794,7 +4809,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0; break; case KVM_CAP_DISABLE_QUIRKS2: - r = KVM_X86_VALID_QUIRKS; + r = kvm_caps.supported_quirks; break; case KVM_CAP_X86_NOTIFY_VMEXIT: r = kvm_caps.has_notify_vmexit; @@ -5117,6 +5132,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + if (vcpu->arch.apic->guest_apic_protected) + return -EINVAL; + kvm_x86_call(sync_pir_to_irr)(vcpu); return kvm_apic_get_state(vcpu, s); @@ -5127,6 +5145,9 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, { int r; + if (vcpu->arch.apic->guest_apic_protected) + return -EINVAL; + r = kvm_apic_set_state(vcpu, s); if (r) return r; @@ -6304,6 +6325,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_DEVICE_ATTR: r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp); break; + case KVM_MEMORY_ENCRYPT_OP: + r = -ENOTTY; + if (!kvm_x86_ops.vcpu_mem_enc_ioctl) + goto out; + r = kvm_x86_ops.vcpu_mem_enc_ioctl(vcpu, argp); + break; default: r = -EINVAL; } @@ -6491,7 +6518,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) struct kvm_vcpu *vcpu; unsigned long i; - if (!kvm_x86_ops.cpu_dirty_log_size) + if (!kvm->arch.cpu_dirty_log_size) return; kvm_for_each_vcpu(i, vcpu, kvm) @@ -6521,11 +6548,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, switch (cap->cap) { case KVM_CAP_DISABLE_QUIRKS2: r = -EINVAL; - if (cap->args[0] & ~KVM_X86_VALID_QUIRKS) + if (cap->args[0] & ~kvm_caps.supported_quirks) break; fallthrough; case KVM_CAP_DISABLE_QUIRKS: - kvm->arch.disabled_quirks = cap->args[0]; + kvm->arch.disabled_quirks |= cap->args[0] & kvm_caps.supported_quirks; r = 0; break; case KVM_CAP_SPLIT_IRQCHIP: { @@ -7300,10 +7327,6 @@ set_pit2_out: goto out; } case KVM_MEMORY_ENCRYPT_OP: { - r = -ENOTTY; - if (!kvm_x86_ops.mem_enc_ioctl) - goto out; - r = kvm_x86_call(mem_enc_ioctl)(kvm, argp); break; } @@ -9738,7 +9761,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) * with an exception. PAT[0] is set to WB on RESET and also by the * kernel, i.e. failure indicates a kernel bug or broken firmware. */ - if (rdmsrl_safe(MSR_IA32_CR_PAT, &host_pat) || + if (rdmsrq_safe(MSR_IA32_CR_PAT, &host_pat) || (host_pat & GENMASK(2, 0)) != 6) { pr_err("host PAT[0] is not WB\n"); return -EIO; @@ -9771,16 +9794,18 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0; } + kvm_caps.supported_quirks = KVM_X86_VALID_QUIRKS; + kvm_caps.inapplicable_quirks = KVM_X86_CONDITIONAL_QUIRKS; - rdmsrl_safe(MSR_EFER, &kvm_host.efer); + rdmsrq_safe(MSR_EFER, &kvm_host.efer); if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrl(MSR_IA32_XSS, kvm_host.xss); + rdmsrq(MSR_IA32_XSS, kvm_host.xss); kvm_init_pmu_capability(ops->pmu_ops); if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities); + rdmsrq(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities); r = ops->hardware_setup(); if (r != 0) @@ -9815,6 +9840,10 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled) kvm_caps.supported_vm_types |= BIT(KVM_X86_SW_PROTECTED_VM); + /* KVM always ignores guest PAT for shadow paging. */ + if (!tdp_enabled) + kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; @@ -10023,13 +10052,16 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl, +int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl, int (*complete_hypercall)(struct kvm_vcpu *)) { unsigned long ret; + unsigned long nr = kvm_rax_read(vcpu); + unsigned long a0 = kvm_rbx_read(vcpu); + unsigned long a1 = kvm_rcx_read(vcpu); + unsigned long a2 = kvm_rdx_read(vcpu); + unsigned long a3 = kvm_rsi_read(vcpu); + int op_64_bit = is_64_bit_hypercall(vcpu); ++vcpu->stat.hypercalls; @@ -10132,9 +10164,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); - return __kvm_emulate_hypercall(vcpu, rax, rbx, rcx, rdx, rsi, - is_64_bit_hypercall(vcpu), - kvm_x86_call(get_cpl)(vcpu), + return __kvm_emulate_hypercall(vcpu, kvm_x86_call(get_cpl)(vcpu), complete_hypercall_exit); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); @@ -10976,9 +11006,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) switch_fpu_return(); if (vcpu->arch.guest_fpu.xfd_err) - wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); + wrmsrq(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); - if (unlikely(vcpu->arch.switch_db_regs)) { + if (unlikely(vcpu->arch.switch_db_regs && + !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) { set_debugreg(0, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); @@ -11030,6 +11061,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); + WARN_ON(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH); kvm_x86_call(sync_dirty_debug_regs)(vcpu); kvm_update_dr0123(vcpu); kvm_update_dr7(vcpu); @@ -11062,7 +11094,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_call(handle_exit_irqoff)(vcpu); if (vcpu->arch.guest_fpu.xfd_err) - wrmsrl(MSR_IA32_XFD_ERR, 0); + wrmsrq(MSR_IA32_XFD_ERR, 0); /* * Consume any pending interrupts, including the possible source of @@ -11134,7 +11166,7 @@ static bool kvm_vcpu_running(struct kvm_vcpu *vcpu) !vcpu->arch.apf.halted); } -static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) +bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) { if (!list_empty_careful(&vcpu->async_pf.done)) return true; @@ -11143,9 +11175,6 @@ static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) kvm_apic_init_sipi_allowed(vcpu)) return true; - if (vcpu->arch.pv.pv_unhalted) - return true; - if (kvm_is_exception_pending(vcpu)) return true; @@ -11183,10 +11212,12 @@ static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return false; } +EXPORT_SYMBOL_GPL(kvm_vcpu_has_events); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); + return kvm_vcpu_running(vcpu) || vcpu->arch.pv.pv_unhalted || + kvm_vcpu_has_events(vcpu); } /* Called within kvm->srcu read side. */ @@ -11320,7 +11351,7 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) */ ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { - if (kvm_vcpu_has_events(vcpu)) + if (kvm_vcpu_has_events(vcpu) || vcpu->arch.pv.pv_unhalted) state = KVM_MP_STATE_RUNNABLE; kvm_set_mp_state(vcpu, state); return 1; @@ -12694,6 +12725,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; } +EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { @@ -12723,6 +12755,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Decided by the vendor code for other VM types. */ kvm->arch.pre_fault_allowed = type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; + kvm->arch.disabled_quirks = kvm_caps.inapplicable_quirks & kvm_caps.supported_quirks; ret = kvm_page_track_init(kvm); if (ret) @@ -12876,6 +12909,7 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) kvm_free_pit(kvm); kvm_mmu_pre_destroy_vm(kvm); + static_call_cond(kvm_x86_vm_pre_destroy)(kvm); } void kvm_arch_destroy_vm(struct kvm *kvm) @@ -13073,7 +13107,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable) { int nr_slots; - if (!kvm_x86_ops.cpu_dirty_log_size) + if (!kvm->arch.cpu_dirty_log_size) return; nr_slots = atomic_read(&kvm->nr_memslots_dirty_logging); @@ -13145,7 +13179,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, if (READ_ONCE(eager_page_split)) kvm_mmu_slot_try_split_huge_pages(kvm, new, PG_LEVEL_4K); - if (kvm_x86_ops.cpu_dirty_log_size) { + if (kvm->arch.cpu_dirty_log_size) { kvm_mmu_slot_leaf_clear_dirty(kvm, new); kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M); } else { @@ -13534,8 +13568,10 @@ static void kvm_noncoherent_dma_assignment_start_or_stop(struct kvm *kvm) * due to toggling the "ignore PAT" bit. Zap all SPTEs when the first * (or last) non-coherent device is (un)registered to so that new SPTEs * with the correct "ignore guest PAT" setting are created. + * + * If KVM always honors guest PAT, however, there is nothing to do. */ - if (kvm_mmu_may_ignore_guest_pat()) + if (kvm_check_has_quirk(kvm, KVM_X86_QUIRK_IGNORE_GUEST_PAT)) kvm_zap_gfn_range(kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL)); } @@ -13668,12 +13704,12 @@ int kvm_spec_ctrl_test_value(u64 value) local_irq_save(flags); - if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value)) + if (rdmsrq_safe(MSR_IA32_SPEC_CTRL, &saved_value)) ret = 1; - else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value)) + else if (wrmsrq_safe(MSR_IA32_SPEC_CTRL, value)) ret = 1; else - wrmsrl(MSR_IA32_SPEC_CTRL, saved_value); + wrmsrq(MSR_IA32_SPEC_CTRL, saved_value); local_irq_restore(flags); @@ -14012,6 +14048,7 @@ EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |