summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst6
-rw-r--r--Documentation/virt/kvm/x86/hypercalls.rst6
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h31
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/uapi/asm/vmx.h6
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kvm/cpuid.c13
-rw-r--r--arch/x86/kvm/emulate.c13
-rw-r--r--arch/x86/kvm/hyperv.c12
-rw-r--r--arch/x86/kvm/ioapic.c15
-rw-r--r--arch/x86/kvm/irq.c57
-rw-r--r--arch/x86/kvm/irq.h4
-rw-r--r--arch/x86/kvm/kvm_emulate.h3
-rw-r--r--arch/x86/kvm/lapic.c169
-rw-r--r--arch/x86/kvm/lapic.h15
-rw-r--r--arch/x86/kvm/pmu.c169
-rw-r--r--arch/x86/kvm/pmu.h60
-rw-r--r--arch/x86/kvm/reverse_cpuid.h5
-rw-r--r--arch/x86/kvm/smm.c4
-rw-r--r--arch/x86/kvm/svm/pmu.c8
-rw-r--r--arch/x86/kvm/svm/svm.c30
-rw-r--r--arch/x86/kvm/vmx/capabilities.h3
-rw-r--r--arch/x86/kvm/vmx/main.c14
-rw-r--r--arch/x86/kvm/vmx/nested.c29
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c81
-rw-r--r--arch/x86/kvm/vmx/tdx.c5
-rw-r--r--arch/x86/kvm/vmx/vmx.c91
-rw-r--r--arch/x86/kvm/vmx/vmx.h13
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h2
-rw-r--r--arch/x86/kvm/x86.c330
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c8
34 files changed, 711 insertions, 516 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index c17a87a0a5ac..ffc350b649ad 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3075,6 +3075,12 @@ This IOCTL replaces the obsolete KVM_GET_PIT.
Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
See KVM_GET_PIT2 for details on struct kvm_pit_state2.
+.. Tip::
+ ``KVM_SET_PIT2`` strictly adheres to the spec of Intel 8254 PIT. For example,
+ a ``count`` value of 0 in ``struct kvm_pit_channel_state`` is interpreted as
+ 65536, which is the maximum count value. Refer to `Intel 8254 programmable
+ interval timer <https://www.scs.stanford.edu/10wi-cs140/pintos/specs/8254.pdf>`_.
+
This IOCTL replaces the obsolete KVM_SET_PIT.
diff --git a/Documentation/virt/kvm/x86/hypercalls.rst b/Documentation/virt/kvm/x86/hypercalls.rst
index 10db7924720f..521ecf9a8a36 100644
--- a/Documentation/virt/kvm/x86/hypercalls.rst
+++ b/Documentation/virt/kvm/x86/hypercalls.rst
@@ -137,7 +137,7 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
-6. KVM_HC_SEND_IPI
+7. KVM_HC_SEND_IPI
------------------
:Architecture: x86
@@ -158,7 +158,7 @@ corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
-7. KVM_HC_SCHED_YIELD
+8. KVM_HC_SCHED_YIELD
---------------------
:Architecture: x86
@@ -170,7 +170,7 @@ a0: destination APIC ID
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
any of the IPI target vCPUs was preempted.
-8. KVM_HC_MAP_GPA_RANGE
+9. KVM_HC_MAP_GPA_RANGE
-------------------------
:Architecture: x86
:Status: active
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ec68aba2fa7e..f1a9f40622cd 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -497,6 +497,7 @@
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_MSR_IMM (21*32+15) /* MSR immediate form instructions */
/*
* BUG word(s)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 62c3e4de3303..fdf178443f85 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -138,7 +138,7 @@ KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
KVM_X86_OP_OPTIONAL(migrate_timers)
-KVM_X86_OP(recalc_msr_intercepts)
+KVM_X86_OP(recalc_intercepts)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0fb3ecf51166..9e611901d310 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -120,7 +120,7 @@
#define KVM_REQ_TLB_FLUSH_GUEST \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
-#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
+#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
@@ -545,10 +545,10 @@ struct kvm_pmc {
#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
KVM_MAX_NR_AMD_GP_COUNTERS)
-#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
-#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
-#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
- KVM_MAX_NR_AMD_FIXED_COUTNERS)
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
+#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
+#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
+ KVM_MAX_NR_AMD_FIXED_COUNTERS)
struct kvm_pmu {
u8 version;
@@ -579,6 +579,9 @@ struct kvm_pmu {
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
+ DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
+
u64 ds_area;
u64 pebs_enable;
u64 pebs_enable_rsvd;
@@ -771,6 +774,7 @@ enum kvm_only_cpuid_leafs {
CPUID_7_2_EDX,
CPUID_24_0_EBX,
CPUID_8000_0021_ECX,
+ CPUID_7_1_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -926,6 +930,7 @@ struct kvm_vcpu_arch {
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
unsigned long cui_linear_rip;
+ int cui_rdmsr_imm_reg;
gpa_t time;
s8 pvclock_tsc_shift;
@@ -1381,6 +1386,7 @@ struct kvm_arch {
u8 vm_type;
bool has_private_mem;
bool has_protected_state;
+ bool has_protected_eoi;
bool pre_fault_allowed;
struct hlist_head *mmu_page_hash;
struct list_head active_mmu_pages;
@@ -1921,7 +1927,7 @@ struct kvm_x86_ops {
int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
void (*migrate_timers)(struct kvm_vcpu *vcpu);
- void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu);
+ void (*recalc_intercepts)(struct kvm_vcpu *vcpu);
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
@@ -2162,13 +2168,16 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
-int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
-int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
int kvm_emulate_invd(struct kvm_vcpu *vcpu);
int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b65c3ba5fa14..717baeba6db3 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -315,12 +315,15 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
-#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
-#define PERF_CAP_ARCH_REG BIT_ULL(7)
-#define PERF_CAP_PEBS_FORMAT 0xf00
-#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
-#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
- PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
+
+#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_FW_WRITES BIT_ULL(13)
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
@@ -733,6 +736,7 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
/* AMD Hardware Feedback Support MSRs */
#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0f4a4cf84a7..9792e329343e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -94,6 +94,8 @@
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
#define EXIT_REASON_TDCALL 77
+#define EXIT_REASON_MSR_READ_IMM 84
+#define EXIT_REASON_MSR_WRITE_IMM 85
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -158,7 +160,9 @@
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
- { EXIT_REASON_TDCALL, "TDCALL" }
+ { EXIT_REASON_TDCALL, "TDCALL" }, \
+ { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \
+ { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 6b868afb26c3..cf4ae822bcc0 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_MSR_IMM, CPUID_ECX, 5, 0x00000007, 1 },
{ X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e2836a255b16..efee08fad72e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -448,6 +448,8 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* adjustments to the reserved GPA bits.
*/
kvm_mmu_after_set_cpuid(vcpu);
+
+ kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu);
}
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
@@ -985,6 +987,10 @@ void kvm_set_cpu_caps(void)
F(LAM),
);
+ kvm_cpu_cap_init(CPUID_7_1_ECX,
+ SCATTERED_F(MSR_IMM),
+ );
+
kvm_cpu_cap_init(CPUID_7_1_EDX,
F(AVX_VNNI_INT8),
F(AVX_NE_CONVERT),
@@ -1411,9 +1417,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
cpuid_entry_override(entry, CPUID_7_1_EAX);
+ cpuid_entry_override(entry, CPUID_7_1_ECX);
cpuid_entry_override(entry, CPUID_7_1_EDX);
entry->ebx = 0;
- entry->ecx = 0;
}
if (max_idx >= 2) {
entry = do_host_cpuid(array, function, 2);
@@ -1820,7 +1826,8 @@ static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
int r;
if (func == CENTAUR_CPUID_SIGNATURE &&
- boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
+ boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
return 0;
r = do_cpuid_func(array, func, type);
@@ -2001,7 +2008,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
if (function == 7 && index == 0) {
u64 data;
if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
- !__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
+ !kvm_msr_read(vcpu, MSR_IA32_TSX_CTRL, &data) &&
(data & TSX_CTRL_CPUID_CLEAR))
*ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
} else if (function == 0x80000007) {
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1349e278cd2a..23929151a5b8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4330,8 +4330,8 @@ static const struct opcode opcode_table[256] = {
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
G(ByteOp, group11), G(0, group11),
/* 0xC8 - 0xCF */
- I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
- I(Stack | IsBranch, em_leave),
+ I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
+ I(Stack, em_leave),
I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
I(ImplicitOps | IsBranch, em_ret_far),
D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
@@ -5107,12 +5107,11 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt)
ctxt->mem_read.end = 0;
}
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
{
const struct x86_emulate_ops *ops = ctxt->ops;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = ctxt->dst.type;
- bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
ctxt->mem_read.pos = 0;
@@ -5160,7 +5159,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(&ctxt->dst);
}
- if (unlikely(is_guest_mode) && ctxt->intercept) {
+ if (unlikely(check_intercepts) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5189,7 +5188,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5243,7 +5242,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
+ if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 72b19a88a776..a471900c7325 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1168,15 +1168,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
- mutex_lock(&hv->hv_lock);
+ guard(mutex)(&hv->hv_lock);
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_SET ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
- goto out_unlock;
+ return;
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
- goto out_unlock;
+ return;
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
/*
@@ -1192,7 +1192,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
goto out_err;
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
- goto out_unlock;
+ return;
}
/*
@@ -1228,12 +1228,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
goto out_err;
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
- goto out_unlock;
+ return;
out_err:
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
-out_unlock:
- mutex_unlock(&hv->hv_lock);
}
void kvm_hv_request_tsc_page_update(struct kvm *kvm)
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 2b5d389bca5f..2c2783296aed 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
/*
* Copyright (C) 2001 MandrakeSoft S.A.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
@@ -8,20 +9,6 @@
* http://www.linux-mandrake.com/
* http://www.mandrakesoft.com/
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Yunhong Jiang <yunhong.jiang@intel.com>
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
* Based on Xen 3.1 code.
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 16da89259011..a6b122f732be 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -195,63 +195,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
return irqchip_in_kernel(kvm);
}
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, struct dest_map *dest_map)
-{
- int r = -1;
- struct kvm_vcpu *vcpu, *lowest = NULL;
- unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
- unsigned int dest_vcpus = 0;
-
- if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
- return r;
-
- if (irq->dest_mode == APIC_DEST_PHYSICAL &&
- irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
- pr_info("apic: phys broadcast and lowest prio\n");
- irq->delivery_mode = APIC_DM_FIXED;
- }
-
- memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!kvm_apic_present(vcpu))
- continue;
-
- if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
- irq->dest_id, irq->dest_mode))
- continue;
-
- if (!kvm_lowest_prio_delivery(irq)) {
- if (r < 0)
- r = 0;
- r += kvm_apic_set_irq(vcpu, irq, dest_map);
- } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
- if (!kvm_vector_hashing_enabled()) {
- if (!lowest)
- lowest = vcpu;
- else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
- lowest = vcpu;
- } else {
- __set_bit(i, dest_vcpu_bitmap);
- dest_vcpus++;
- }
- }
- }
-
- if (dest_vcpus != 0) {
- int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
- dest_vcpu_bitmap, KVM_MAX_VCPUS);
-
- lowest = kvm_get_vcpu(kvm, idx);
- }
-
- if (lowest)
- r = kvm_apic_set_irq(lowest, irq, dest_map);
-
- return r;
-}
-
static void kvm_msi_to_lapic_irq(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 5e62c1f79ce6..34f4a78a7a01 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -121,8 +121,4 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq,
- struct dest_map *dest_map);
-
#endif
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index c1df5acfacaf..7b5ddb787a25 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -235,7 +235,6 @@ struct x86_emulate_ops {
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
bool (*is_smm)(struct x86_emulate_ctxt *ctxt);
- bool (*is_guest_mode)(struct x86_emulate_ctxt *ctxt);
int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
@@ -521,7 +520,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_RESTART 1
#define EMULATION_INTERCEPTED 2
void init_decode_cache(struct x86_emulate_ctxt *ctxt);
-int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5fc437341e03..3b76192b24e9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -74,6 +74,10 @@ module_param(lapic_timer_advance, bool, 0444);
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+
+static bool __read_mostly vector_hashing_enabled = true;
+module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444);
+
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
@@ -130,7 +134,7 @@ static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
}
-bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
+static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops.set_hv_timer
&& !(kvm_mwait_in_guest(vcpu->kvm) ||
@@ -1063,19 +1067,12 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
}
EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
-int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
- const unsigned long *bitmap, u32 bitmap_size)
+static int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
+ const unsigned long *bitmap, u32 bitmap_size)
{
- u32 mod;
- int i, idx = -1;
-
- mod = vector % dest_vcpus;
-
- for (i = 0; i <= mod; i++) {
- idx = find_next_bit(bitmap, bitmap_size, idx + 1);
- BUG_ON(idx == bitmap_size);
- }
+ int idx = find_nth_bit(bitmap, bitmap_size, vector % dest_vcpus);
+ BUG_ON(idx >= bitmap_size);
return idx;
}
@@ -1106,6 +1103,16 @@ static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
return false;
}
+static bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
+{
+ return (irq->delivery_mode == APIC_DM_LOWEST || irq->msi_redir_hint);
+}
+
+static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
+{
+ return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
+}
+
/* Return true if the interrupt can be handled by using *bitmap as index mask
* for valid destinations in *dst array.
* Return false if kvm_apic_map_get_dest_lapic did nothing useful.
@@ -1149,7 +1156,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
if (!kvm_lowest_prio_delivery(irq))
return true;
- if (!kvm_vector_hashing_enabled()) {
+ if (!vector_hashing_enabled) {
lowest = -1;
for_each_set_bit(i, bitmap, 16) {
if (!(*dst)[i])
@@ -1258,6 +1265,63 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
return ret;
}
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, struct dest_map *dest_map)
+{
+ int r = -1;
+ struct kvm_vcpu *vcpu, *lowest = NULL;
+ unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int dest_vcpus = 0;
+
+ if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+ return r;
+
+ if (irq->dest_mode == APIC_DEST_PHYSICAL &&
+ irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
+ pr_info("apic: phys broadcast and lowest prio\n");
+ irq->delivery_mode = APIC_DM_FIXED;
+ }
+
+ memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+ irq->dest_id, irq->dest_mode))
+ continue;
+
+ if (!kvm_lowest_prio_delivery(irq)) {
+ if (r < 0)
+ r = 0;
+ r += kvm_apic_set_irq(vcpu, irq, dest_map);
+ } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
+ if (!vector_hashing_enabled) {
+ if (!lowest)
+ lowest = vcpu;
+ else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+ lowest = vcpu;
+ } else {
+ __set_bit(i, dest_vcpu_bitmap);
+ dest_vcpus++;
+ }
+ }
+ }
+
+ if (dest_vcpus != 0) {
+ int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
+ dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+ lowest = kvm_get_vcpu(kvm, idx);
+ }
+
+ if (lowest)
+ r = kvm_apic_set_irq(lowest, irq, dest_map);
+
+ return r;
+}
+
/*
* Add a pending IRQ into lapic.
* Return 1 if successfully added and 0 if discarded.
@@ -1401,11 +1465,6 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
rcu_read_unlock();
}
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
- return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
-}
-
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
{
return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
@@ -1483,24 +1542,30 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
}
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
-void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+static void kvm_icr_to_lapic_irq(struct kvm_lapic *apic, u32 icr_low,
+ u32 icr_high, struct kvm_lapic_irq *irq)
{
- struct kvm_lapic_irq irq;
-
/* KVM has no delay and should always clear the BUSY/PENDING flag. */
WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
- irq.vector = icr_low & APIC_VECTOR_MASK;
- irq.delivery_mode = icr_low & APIC_MODE_MASK;
- irq.dest_mode = icr_low & APIC_DEST_MASK;
- irq.level = (icr_low & APIC_INT_ASSERT) != 0;
- irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
- irq.shorthand = icr_low & APIC_SHORT_MASK;
- irq.msi_redir_hint = false;
+ irq->vector = icr_low & APIC_VECTOR_MASK;
+ irq->delivery_mode = icr_low & APIC_MODE_MASK;
+ irq->dest_mode = icr_low & APIC_DEST_MASK;
+ irq->level = (icr_low & APIC_INT_ASSERT) != 0;
+ irq->trig_mode = icr_low & APIC_INT_LEVELTRIG;
+ irq->shorthand = icr_low & APIC_SHORT_MASK;
+ irq->msi_redir_hint = false;
if (apic_x2apic_mode(apic))
- irq.dest_id = icr_high;
+ irq->dest_id = icr_high;
else
- irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
+ irq->dest_id = GET_XAPIC_DEST_FIELD(icr_high);
+}
+
+void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
+{
+ struct kvm_lapic_irq irq;
+
+ kvm_icr_to_lapic_irq(apic, icr_low, icr_high, &irq);
trace_kvm_apic_ipi(icr_low, irq.dest_id);
@@ -2435,7 +2500,7 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
-int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast)
{
if (data & X2APIC_ICR_RESERVED_BITS)
return 1;
@@ -2450,7 +2515,20 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
*/
data &= ~APIC_ICR_BUSY;
- kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ if (fast) {
+ struct kvm_lapic_irq irq;
+ int ignored;
+
+ kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq);
+
+ if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq,
+ &ignored, NULL))
+ return -EWOULDBLOCK;
+
+ trace_kvm_apic_ipi((u32)data, irq.dest_id);
+ } else {
+ kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
+ }
if (kvm_x86_ops.x2apic_icr_is_split) {
kvm_lapic_set_reg(apic, APIC_ICR, data);
kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
@@ -2461,6 +2539,16 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
return 0;
}
+static int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
+{
+ return __kvm_x2apic_icr_write(apic, data, false);
+}
+
+int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data)
+{
+ return __kvm_x2apic_icr_write(apic, data, true);
+}
+
static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
{
if (kvm_x86_ops.x2apic_icr_is_split)
@@ -2661,24 +2749,21 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
int kvm_alloc_apic_access_page(struct kvm *kvm)
{
void __user *hva;
- int ret = 0;
- mutex_lock(&kvm->slots_lock);
+ guard(mutex)(&kvm->slots_lock);
+
if (kvm->arch.apic_access_memslot_enabled ||
kvm->arch.apic_access_memslot_inhibited)
- goto out;
+ return 0;
hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
- if (IS_ERR(hva)) {
- ret = PTR_ERR(hva);
- goto out;
- }
+ if (IS_ERR(hva))
+ return PTR_ERR(hva);
kvm->arch.apic_access_memslot_enabled = true;
-out:
- mutex_unlock(&kvm->slots_lock);
- return ret;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 72de14527698..50123fe7f58f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -105,7 +105,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu);
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int shorthand, unsigned int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec);
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr);
@@ -119,6 +118,9 @@ void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq,
+ struct dest_map *dest_map);
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
@@ -137,7 +139,7 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
-int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data);
+int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data);
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
@@ -222,12 +224,6 @@ static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
!kvm_x86_call(apic_init_signal_blocked)(vcpu);
}
-static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
-{
- return (irq->delivery_mode == APIC_DM_LOWEST ||
- irq->msi_redir_hint);
-}
-
static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
{
return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
@@ -242,14 +238,11 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
-int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
- const unsigned long *bitmap, u32 bitmap_size);
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
-bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 75e9cfc689f8..b7dc5bd981ba 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -26,11 +26,18 @@
/* This is enough to filter the vast majority of currently defined events. */
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
+/* Unadultered PMU capabilities of the host, i.e. of hardware. */
+static struct x86_pmu_capability __read_mostly kvm_host_pmu;
+
+/* KVM's PMU capabilities, i.e. the intersection of KVM and hardware support. */
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
EXPORT_SYMBOL_GPL(kvm_pmu_cap);
-struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
-EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
+struct kvm_pmu_emulated_event_selectors {
+ u64 INSTRUCTIONS_RETIRED;
+ u64 BRANCH_INSTRUCTIONS_RETIRED;
+};
+static struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
/* Precise Distribution of Instructions Retired (PDIR) */
static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
@@ -96,6 +103,54 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
+void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
+{
+ bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
+ int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
+
+ perf_get_x86_pmu_capability(&kvm_host_pmu);
+
+ /*
+ * Hybrid PMUs don't play nice with virtualization without careful
+ * configuration by userspace, and KVM's APIs for reporting supported
+ * vPMU features do not account for hybrid PMUs. Disable vPMU support
+ * for hybrid PMUs until KVM gains a way to let userspace opt-in.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+ enable_pmu = false;
+
+ if (enable_pmu) {
+ /*
+ * WARN if perf did NOT disable hardware PMU if the number of
+ * architecturally required GP counters aren't present, i.e. if
+ * there are a non-zero number of counters, but fewer than what
+ * is architecturally required.
+ */
+ if (!kvm_host_pmu.num_counters_gp ||
+ WARN_ON_ONCE(kvm_host_pmu.num_counters_gp < min_nr_gp_ctrs))
+ enable_pmu = false;
+ else if (is_intel && !kvm_host_pmu.version)
+ enable_pmu = false;
+ }
+
+ if (!enable_pmu) {
+ memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
+ return;
+ }
+
+ memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
+ kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
+ kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
+ pmu_ops->MAX_NR_GP_COUNTERS);
+ kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
+ KVM_MAX_NR_FIXED_COUNTERS);
+
+ kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
+ perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
+ kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
+ perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+}
+
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -426,7 +481,7 @@ static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
return true;
}
-static bool check_pmu_event_filter(struct kvm_pmc *pmc)
+static bool pmc_is_event_allowed(struct kvm_pmc *pmc)
{
struct kvm_x86_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
@@ -441,12 +496,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return is_fixed_event_allowed(filter, pmc->idx);
}
-static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
-{
- return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
- check_pmu_event_filter(pmc);
-}
-
static int reprogram_counter(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -457,7 +506,8 @@ static int reprogram_counter(struct kvm_pmc *pmc)
emulate_overflow = pmc_pause_counter(pmc);
- if (!pmc_event_is_allowed(pmc))
+ if (!pmc_is_globally_enabled(pmc) || !pmc_is_locally_enabled(pmc) ||
+ !pmc_is_event_allowed(pmc))
return 0;
if (emulate_overflow)
@@ -492,6 +542,47 @@ static int reprogram_counter(struct kvm_pmc *pmc)
eventsel & ARCH_PERFMON_EVENTSEL_INT);
}
+static bool pmc_is_event_match(struct kvm_pmc *pmc, u64 eventsel)
+{
+ /*
+ * Ignore checks for edge detect (all events currently emulated by KVM
+ * are always rising edges), pin control (unsupported by modern CPUs),
+ * and counter mask and its invert flag (KVM doesn't emulate multiple
+ * events in a single clock cycle).
+ *
+ * Note, the uppermost nibble of AMD's mask overlaps Intel's IN_TX (bit
+ * 32) and IN_TXCP (bit 33), as well as two reserved bits (bits 35:34).
+ * Checking the "in HLE/RTM transaction" flags is correct as the vCPU
+ * can't be in a transaction if KVM is emulating an instruction.
+ *
+ * Checking the reserved bits might be wrong if they are defined in the
+ * future, but so could ignoring them, so do the simple thing for now.
+ */
+ return !((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB);
+}
+
+void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc)
+{
+ bitmap_clear(pmu->pmc_counting_instructions, pmc->idx, 1);
+ bitmap_clear(pmu->pmc_counting_branches, pmc->idx, 1);
+
+ /*
+ * Do NOT consult the PMU event filters, as the filters must be checked
+ * at the time of emulation to ensure KVM uses fresh information, e.g.
+ * omitting a PMC from a bitmap could result in a missed event if the
+ * filter is changed to allow counting the event.
+ */
+ if (!pmc_is_locally_enabled(pmc))
+ return;
+
+ if (pmc_is_event_match(pmc, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED))
+ bitmap_set(pmu->pmc_counting_instructions, pmc->idx, 1);
+
+ if (pmc_is_event_match(pmc, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED))
+ bitmap_set(pmu->pmc_counting_branches, pmc->idx, 1);
+}
+EXPORT_SYMBOL_GPL(kvm_pmu_recalc_pmc_emulation);
+
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
{
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
@@ -527,6 +618,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
*/
if (unlikely(pmu->need_cleanup))
kvm_pmu_cleanup(vcpu);
+
+ kvm_for_each_pmc(pmu, pmc, bit, bitmap)
+ kvm_pmu_recalc_pmc_emulation(pmu, pmc);
}
int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
@@ -650,6 +744,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = pmu->global_ctrl;
break;
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = 0;
break;
@@ -711,6 +806,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
break;
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
+ if (!msr_info->host_initiated)
+ pmu->global_status |= data & ~pmu->global_status_rsvd;
+ break;
default:
kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
return kvm_pmu_call(set_msr)(vcpu, msr_info);
@@ -789,6 +888,10 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
*/
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
+
+ bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
+ bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX,
+ pmu->nr_arch_fixed_counters);
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
@@ -813,7 +916,7 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
pmu->pmc_in_use, X86_PMC_IDX_MAX);
kvm_for_each_pmc(pmu, pmc, i, bitmask) {
- if (pmc->perf_event && !pmc_speculative_in_use(pmc))
+ if (pmc->perf_event && !pmc_is_locally_enabled(pmc))
pmc_stop_counter(pmc);
}
@@ -860,44 +963,46 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
select_user;
}
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
+static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu,
+ const unsigned long *event_pmcs)
{
DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- int i;
+ int i, idx;
BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
+ if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX))
+ return;
+
if (!kvm_pmu_has_perf_global_ctrl(pmu))
- bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
- else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
+ bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX);
+ else if (!bitmap_and(bitmap, event_pmcs,
(unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
return;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_for_each_pmc(pmu, pmc, i, bitmap) {
- /*
- * Ignore checks for edge detect (all events currently emulated
- * but KVM are always rising edges), pin control (unsupported
- * by modern CPUs), and counter mask and its invert flag (KVM
- * doesn't emulate multiple events in a single clock cycle).
- *
- * Note, the uppermost nibble of AMD's mask overlaps Intel's
- * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
- * bits (bits 35:34). Checking the "in HLE/RTM transaction"
- * flags is correct as the vCPU can't be in a transaction if
- * KVM is emulating an instruction. Checking the reserved bits
- * might be wrong if they are defined in the future, but so
- * could ignoring them, so do the simple thing for now.
- */
- if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
- !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
+ if (!pmc_is_event_allowed(pmc) || !cpl_is_matched(pmc))
continue;
kvm_pmu_incr_counter(pmc);
}
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+}
+
+void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu)
+{
+ kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_instructions);
+}
+EXPORT_SYMBOL_GPL(kvm_pmu_instruction_retired);
+
+void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu)
+{
+ kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_branches);
}
-EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
+EXPORT_SYMBOL_GPL(kvm_pmu_branch_retired);
static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter)
{
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ad89d0bd6005..08ae644db00e 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -23,11 +23,6 @@
#define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED
-struct kvm_pmu_emulated_event_selectors {
- u64 INSTRUCTIONS_RETIRED;
- u64 BRANCH_INSTRUCTIONS_RETIRED;
-};
-
struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
@@ -165,7 +160,7 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
return NULL;
}
-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+static inline bool pmc_is_locally_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -178,57 +173,15 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
}
extern struct x86_pmu_capability kvm_pmu_cap;
-extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel;
-static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
-{
- bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
- int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
+void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops);
- /*
- * Hybrid PMUs don't play nice with virtualization without careful
- * configuration by userspace, and KVM's APIs for reporting supported
- * vPMU features do not account for hybrid PMUs. Disable vPMU support
- * for hybrid PMUs until KVM gains a way to let userspace opt-in.
- */
- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
- enable_pmu = false;
-
- if (enable_pmu) {
- perf_get_x86_pmu_capability(&kvm_pmu_cap);
-
- /*
- * WARN if perf did NOT disable hardware PMU if the number of
- * architecturally required GP counters aren't present, i.e. if
- * there are a non-zero number of counters, but fewer than what
- * is architecturally required.
- */
- if (!kvm_pmu_cap.num_counters_gp ||
- WARN_ON_ONCE(kvm_pmu_cap.num_counters_gp < min_nr_gp_ctrs))
- enable_pmu = false;
- else if (is_intel && !kvm_pmu_cap.version)
- enable_pmu = false;
- }
-
- if (!enable_pmu) {
- memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
- return;
- }
-
- kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
- kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
- pmu_ops->MAX_NR_GP_COUNTERS);
- kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
- KVM_MAX_NR_FIXED_COUNTERS);
-
- kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
- perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
- kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
- perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-}
+void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc);
static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
{
+ kvm_pmu_recalc_pmc_emulation(pmc_to_pmu(pmc), pmc);
+
set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
@@ -272,7 +225,8 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);
+void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu);
+void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index c53b92379e6e..743ab25ba787 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -25,6 +25,9 @@
#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
#define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11)
+/* Intel-defined sub-features, CPUID level 0x00000007:1 (ECX) */
+#define KVM_X86_FEATURE_MSR_IMM KVM_X86_FEATURE(CPUID_7_1_ECX, 5)
+
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
@@ -87,6 +90,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
[CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
+ [CPUID_7_1_ECX] = { 7, 1, CPUID_ECX},
};
/*
@@ -128,6 +132,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
+ KVM_X86_TRANSLATE_FEATURE(MSR_IMM);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 9864c057187d..5dd8a1646800 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -529,7 +529,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
vcpu->arch.smbase = smstate->smbase;
- if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
+ if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;
rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
@@ -620,7 +620,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
/* And finally go back to 32-bit mode. */
efer = 0;
- kvm_set_msr(vcpu, MSR_EFER, efer);
+ __kvm_emulate_msr_write(vcpu, MSR_EFER, efer);
}
#endif
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 288f7f2a46f2..bc062285fbf5 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -41,7 +41,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
unsigned int idx;
- if (!vcpu->kvm->arch.enable_pmu)
+ if (!pmu->version)
return NULL;
switch (msr) {
@@ -113,6 +113,7 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
return pmu->version > 1;
default:
if (msr > MSR_F15H_PERF_CTR5 &&
@@ -199,17 +200,16 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
kvm_pmu_cap.num_counters_gp);
if (pmu->version > 1) {
- pmu->global_ctrl_rsvd = ~((1ull << pmu->nr_arch_gp_counters) - 1);
+ pmu->global_ctrl_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1);
pmu->global_status_rsvd = pmu->global_ctrl_rsvd;
}
- pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
+ pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(48) - 1;
pmu->reserved_bits = 0xfffffff000280000ull;
pmu->raw_event_mask = AMD64_RAW_EVENT_MASK;
/* not applicable to AMD; but clean them to prevent any fall out */
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
- bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
}
static void amd_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b8504f1fe4ef..81db26b2a1bd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1008,7 +1008,7 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
}
}
-static void svm_recalc_intercepts_after_set_cpuid(struct kvm_vcpu *vcpu)
+static void svm_recalc_intercepts(struct kvm_vcpu *vcpu)
{
svm_recalc_instruction_intercepts(vcpu);
svm_recalc_msr_intercepts(vcpu);
@@ -1156,7 +1156,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
svm_hv_init_vmcb(vmcb);
- svm_recalc_intercepts_after_set_cpuid(vcpu);
+ kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu);
vmcb_mark_all_dirty(vmcb);
@@ -4093,17 +4093,27 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+
+ /*
+ * Next RIP must be provided as IRQs are disabled, and accessing guest
+ * memory to decode the instruction might fault, i.e. might sleep.
+ */
+ if (!nrips || !control->next_rip)
+ return EXIT_FASTPATH_NONE;
if (is_guest_mode(vcpu))
return EXIT_FASTPATH_NONE;
- switch (svm->vmcb->control.exit_code) {
+ switch (control->exit_code) {
case SVM_EXIT_MSR:
- if (!svm->vmcb->control.exit_info_1)
+ if (!control->exit_info_1)
break;
- return handle_fastpath_set_msr_irqoff(vcpu);
+ return handle_fastpath_wrmsr(vcpu);
case SVM_EXIT_HLT:
return handle_fastpath_hlt(vcpu);
+ case SVM_EXIT_INVD:
+ return handle_fastpath_invd(vcpu);
default:
break;
}
@@ -4380,8 +4390,6 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (sev_guest(vcpu->kvm))
sev_vcpu_after_set_cpuid(svm);
-
- svm_recalc_intercepts_after_set_cpuid(vcpu);
}
static bool svm_has_wbinvd_exit(void)
@@ -5083,7 +5091,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
- .recalc_msr_intercepts = svm_recalc_msr_intercepts,
+ .recalc_intercepts = svm_recalc_intercepts,
.complete_emulated_msr = svm_complete_emulated_msr,
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
@@ -5213,8 +5221,12 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x8000001F (SME/SEV features) */
sev_set_cpu_caps();
- /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
+ /*
+ * Clear capabilities that are automatically configured by common code,
+ * but that require explicit SVM support (that isn't yet implemented).
+ */
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
+ kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 5316c27f6099..f614428dbeda 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -20,9 +20,6 @@ extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
#define PT_MODE_HOST_GUEST 1
-#define PMU_CAP_FW_WRITES (1ULL << 13)
-#define PMU_CAP_LBR_FMT 0x3f
-
struct nested_vmx_msrs {
/*
* We only store the "true" versions of the VMX capability MSRs. We
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index bb5f182f6788..0eb2773b2ae2 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -188,18 +188,18 @@ static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return vmx_get_msr(vcpu, msr_info);
}
-static void vt_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
+static void vt_recalc_intercepts(struct kvm_vcpu *vcpu)
{
/*
- * TDX doesn't allow VMM to configure interception of MSR accesses.
- * TDX guest requests MSR accesses by calling TDVMCALL. The MSR
- * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR
- * if the userspace has set any.
+ * TDX doesn't allow VMM to configure interception of instructions or
+ * MSR accesses. TDX guest requests MSR accesses by calling TDVMCALL.
+ * The MSR filters will be applied when handling the TDVMCALL for
+ * RDMSR/WRMSR if the userspace has set any.
*/
if (is_td_vcpu(vcpu))
return;
- vmx_recalc_msr_intercepts(vcpu);
+ vmx_recalc_intercepts(vcpu);
}
static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
@@ -996,7 +996,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.apic_init_signal_blocked = vt_op(apic_init_signal_blocked),
.migrate_timers = vmx_migrate_timers,
- .recalc_msr_intercepts = vt_op(recalc_msr_intercepts),
+ .recalc_intercepts = vt_op(recalc_intercepts),
.complete_emulated_msr = vt_op(complete_emulated_msr),
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b8ea1969113d..2156c9a854f4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -997,7 +997,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
__func__, i, e.index, e.reserved);
goto fail;
}
- if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) {
+ if (kvm_emulate_msr_write(vcpu, e.index, e.value)) {
pr_debug_ratelimited(
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
__func__, i, e.index, e.value);
@@ -1033,7 +1033,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
}
}
- if (kvm_get_msr_with_filter(vcpu, msr_index, data)) {
+ if (kvm_emulate_msr_read(vcpu, msr_index, data)) {
pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
msr_index);
return false;
@@ -2770,8 +2770,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
- WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->guest_ia32_perf_global_ctrl))) {
+ WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
@@ -3690,7 +3690,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+ kvm_pmu_branch_retired(vcpu);
if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
return nested_vmx_failInvalid(vcpu);
@@ -4758,8 +4758,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
}
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
- WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
- vmcs12->host_ia32_perf_global_ctrl));
+ WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+ vmcs12->host_ia32_perf_global_ctrl));
/* Set L1 segment info according to Intel SDM
27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -4937,7 +4937,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
goto vmabort;
}
- if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) {
+ if (kvm_emulate_msr_write(vcpu, h.index, h.value)) {
pr_debug_ratelimited(
"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
__func__, j, h.index, h.value);
@@ -6216,19 +6216,26 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
union vmx_exit_reason exit_reason)
{
- u32 msr_index = kvm_rcx_read(vcpu);
+ u32 msr_index;
gpa_t bitmap;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return true;
+ if (exit_reason.basic == EXIT_REASON_MSR_READ_IMM ||
+ exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+ msr_index = vmx_get_exit_qual(vcpu);
+ else
+ msr_index = kvm_rcx_read(vcpu);
+
/*
* The MSR_BITMAP page is divided into four 1024-byte bitmaps,
* for the four combinations of read/write and low/high MSR numbers.
* First we need to figure out which of the four to use:
*/
bitmap = vmcs12->msr_bitmap;
- if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
+ if (exit_reason.basic == EXIT_REASON_MSR_WRITE ||
+ exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
bitmap += 2048;
if (msr_index >= 0xc0000000) {
msr_index -= 0xc0000000;
@@ -6527,6 +6534,8 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
+ case EXIT_REASON_MSR_READ_IMM:
+ case EXIT_REASON_MSR_WRITE_IMM:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
case EXIT_REASON_INVALID_STATE:
return true;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 0b173602821b..de1d9785c01f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -138,7 +138,7 @@ static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0;
+ return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0;
}
static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
@@ -478,8 +478,8 @@ static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
};
u64 eventsel;
- BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUTNERS);
- BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUTNERS);
+ BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUNTERS);
+ BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUNTERS);
/*
* Yell if perf reports support for a fixed counter but perf doesn't
@@ -536,29 +536,44 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
kvm_pmu_cap.num_counters_gp);
eax.split.bit_width = min_t(int, eax.split.bit_width,
kvm_pmu_cap.bit_width_gp);
- pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(eax.split.bit_width) - 1;
eax.split.mask_length = min_t(int, eax.split.mask_length,
kvm_pmu_cap.events_mask_len);
- pmu->available_event_types = ~entry->ebx &
- ((1ull << eax.split.mask_length) - 1);
-
- if (pmu->version == 1) {
- pmu->nr_arch_fixed_counters = 0;
- } else {
- pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
- kvm_pmu_cap.num_counters_fixed);
- edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
- kvm_pmu_cap.bit_width_fixed);
- pmu->counter_bitmask[KVM_PMC_FIXED] =
- ((u64)1 << edx.split.bit_width_fixed) - 1;
+ pmu->available_event_types = ~entry->ebx & (BIT_ULL(eax.split.mask_length) - 1);
+
+ entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
+ if (entry &&
+ (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
+ (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
+ pmu->reserved_bits ^= HSW_IN_TX;
+ pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
}
+ perf_capabilities = vcpu_get_perf_capabilities(vcpu);
+ if (intel_pmu_lbr_is_compatible(vcpu) &&
+ (perf_capabilities & PERF_CAP_LBR_FMT))
+ memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
+ else
+ lbr_desc->records.nr = 0;
+
+ if (lbr_desc->records.nr)
+ bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
+
+ if (pmu->version == 1)
+ return;
+
+ pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
+ kvm_pmu_cap.num_counters_fixed);
+ edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
+ kvm_pmu_cap.bit_width_fixed);
+ pmu->counter_bitmask[KVM_PMC_FIXED] = BIT_ULL(edx.split.bit_width_fixed) - 1;
+
intel_pmu_enable_fixed_counter_bits(pmu, INTEL_FIXED_0_KERNEL |
INTEL_FIXED_0_USER |
INTEL_FIXED_0_ENABLE_PMI);
- counter_rsvd = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
- (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
+ counter_rsvd = ~((BIT_ULL(pmu->nr_arch_gp_counters) - 1) |
+ ((BIT_ULL(pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
pmu->global_ctrl_rsvd = counter_rsvd;
/*
@@ -573,29 +588,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_status_rsvd &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
- entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
- if (entry &&
- (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
- (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
- pmu->reserved_bits ^= HSW_IN_TX;
- pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
- }
-
- bitmap_set(pmu->all_valid_pmc_idx,
- 0, pmu->nr_arch_gp_counters);
- bitmap_set(pmu->all_valid_pmc_idx,
- INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
-
- perf_capabilities = vcpu_get_perf_capabilities(vcpu);
- if (intel_pmu_lbr_is_compatible(vcpu) &&
- (perf_capabilities & PMU_CAP_LBR_FMT))
- memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
- else
- lbr_desc->records.nr = 0;
-
- if (lbr_desc->records.nr)
- bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
-
if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
pmu->pebs_enable_rsvd = counter_rsvd;
@@ -603,8 +595,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_data_cfg_rsvd = ~0xff00000full;
intel_pmu_enable_fixed_counter_bits(pmu, ICL_FIXED_0_ADAPTIVE);
} else {
- pmu->pebs_enable_rsvd =
- ~((1ull << pmu->nr_arch_gp_counters) - 1);
+ pmu->pebs_enable_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1);
}
}
}
@@ -625,7 +616,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].current_config = 0;
}
- for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUTNERS; i++) {
+ for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUNTERS; i++) {
pmu->fixed_counters[i].type = KVM_PMC_FIXED;
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;
@@ -762,7 +753,7 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
int bit, hw_idx;
kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
- if (!pmc_speculative_in_use(pmc) ||
+ if (!pmc_is_locally_enabled(pmc) ||
!pmc_is_globally_enabled(pmc) || !pmc->perf_event)
continue;
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index be3516eaa513..097304bf1e1d 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -629,6 +629,11 @@ int tdx_vm_init(struct kvm *kvm)
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
kvm->arch.has_protected_state = true;
+ /*
+ * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap,
+ * i.e. all EOIs are accelerated and never trigger exits.
+ */
+ kvm->arch.has_protected_eoi = true;
kvm->arch.has_private_mem = true;
kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d5ddf33be8c0..089d38c047a7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2140,7 +2140,7 @@ u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
(host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) &&
+ if ((kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT) &&
(host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
@@ -2425,9 +2425,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
case MSR_IA32_PERF_CAPABILITIES:
- if (data & PMU_CAP_LBR_FMT) {
- if ((data & PMU_CAP_LBR_FMT) !=
- (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
+ if (data & PERF_CAP_LBR_FMT) {
+ if ((data & PERF_CAP_LBR_FMT) !=
+ (kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT))
return 1;
if (!cpuid_model_is_consistent(vcpu))
return 1;
@@ -4081,7 +4081,7 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
}
}
-void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
+static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
{
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -4134,6 +4134,11 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
*/
}
+void vmx_recalc_intercepts(struct kvm_vcpu *vcpu)
+{
+ vmx_recalc_msr_intercepts(vcpu);
+}
+
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
int vector)
{
@@ -4317,7 +4322,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
return pin_based_exec_ctrl;
}
-static u32 vmx_vmentry_ctrl(void)
+static u32 vmx_get_initial_vmentry_ctrl(void)
{
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
@@ -4334,7 +4339,7 @@ static u32 vmx_vmentry_ctrl(void)
return vmentry_ctrl;
}
-static u32 vmx_vmexit_ctrl(void)
+static u32 vmx_get_initial_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
@@ -4364,19 +4369,13 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
- if (kvm_vcpu_apicv_active(vcpu)) {
- secondary_exec_controls_setbit(vmx,
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
- if (enable_ipiv)
- tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT);
- } else {
- secondary_exec_controls_clearbit(vmx,
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
- if (enable_ipiv)
- tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT);
- }
+ secondary_exec_controls_changebit(vmx,
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY,
+ kvm_vcpu_apicv_active(vcpu));
+ if (enable_ipiv)
+ tertiary_exec_controls_changebit(vmx, TERTIARY_EXEC_IPI_VIRT,
+ kvm_vcpu_apicv_active(vcpu));
vmx_update_msr_bitmap_x2apic(vcpu);
}
@@ -4699,10 +4698,10 @@ static void init_vmcs(struct vcpu_vmx *vmx)
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
- vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
+ vm_exit_controls_set(vmx, vmx_get_initial_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
- vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
+ vm_entry_controls_set(vmx, vmx_get_initial_vmentry_ctrl());
vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
@@ -6023,6 +6022,23 @@ static int handle_notify(struct kvm_vcpu *vcpu)
return 1;
}
+static int vmx_get_msr_imm_reg(struct kvm_vcpu *vcpu)
+{
+ return vmx_get_instr_info_reg(vmcs_read32(VMX_INSTRUCTION_INFO));
+}
+
+static int handle_rdmsr_imm(struct kvm_vcpu *vcpu)
+{
+ return kvm_emulate_rdmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+ vmx_get_msr_imm_reg(vcpu));
+}
+
+static int handle_wrmsr_imm(struct kvm_vcpu *vcpu)
+{
+ return kvm_emulate_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+ vmx_get_msr_imm_reg(vcpu));
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6081,6 +6097,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_ENCLS] = handle_encls,
[EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
[EXIT_REASON_NOTIFY] = handle_notify,
+ [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm,
+ [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm,
};
static const int kvm_vmx_max_exit_handlers =
@@ -6515,6 +6533,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
#ifdef CONFIG_MITIGATION_RETPOLINE
if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
return kvm_emulate_wrmsr(vcpu);
+ else if (exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM)
+ return handle_wrmsr_imm(vcpu);
else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
return handle_preemption_timer(vcpu);
else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW)
@@ -7190,11 +7210,16 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
switch (vmx_get_exit_reason(vcpu).basic) {
case EXIT_REASON_MSR_WRITE:
- return handle_fastpath_set_msr_irqoff(vcpu);
+ return handle_fastpath_wrmsr(vcpu);
+ case EXIT_REASON_MSR_WRITE_IMM:
+ return handle_fastpath_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu),
+ vmx_get_msr_imm_reg(vcpu));
case EXIT_REASON_PREEMPTION_TIMER:
return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
case EXIT_REASON_HLT:
return handle_fastpath_hlt(vcpu);
+ case EXIT_REASON_INVD:
+ return handle_fastpath_invd(vcpu);
default:
return EXIT_FASTPATH_NONE;
}
@@ -7795,16 +7820,13 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx->msr_ia32_feature_control_valid_bits &=
~FEAT_CTL_SGX_LC_ENABLED;
- /* Recalc MSR interception to account for feature changes. */
- vmx_recalc_msr_intercepts(vcpu);
-
/* Refresh #PF interception to account for MAXPHYADDR changes. */
vmx_update_exception_bitmap(vcpu);
}
static __init u64 vmx_get_perf_capabilities(void)
{
- u64 perf_cap = PMU_CAP_FW_WRITES;
+ u64 perf_cap = PERF_CAP_FW_WRITES;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -7824,7 +7846,7 @@ static __init u64 vmx_get_perf_capabilities(void)
if (!vmx_lbr_caps.has_callstack)
memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
else if (vmx_lbr_caps.nr)
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ perf_cap |= host_perf_cap & PERF_CAP_LBR_FMT;
}
if (vmx_pebs_supported()) {
@@ -8353,8 +8375,6 @@ __init int vmx_hardware_setup(void)
vmx_setup_user_return_msrs();
- if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
- return -EIO;
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -8580,11 +8600,18 @@ int __init vmx_init(void)
return -EOPNOTSUPP;
/*
- * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
- * to unwind if a later step fails.
+ * Note, VMCS and eVMCS configuration only touch VMX knobs/variables,
+ * i.e. there's nothing to unwind if a later step fails.
*/
hv_init_evmcs();
+ /*
+ * Parse the VMCS config and VMX capabilities before anything else, so
+ * that the information is available to all setup flows.
+ */
+ if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
+ return -EIO;
+
r = kvm_x86_vendor_init(&vt_init_ops);
if (r)
return r;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index d3389baf3ab3..23d6e89b96f2 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -608,6 +608,14 @@ static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##b
{ \
BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
+} \
+static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \
+ bool set) \
+{ \
+ if (set) \
+ lname##_controls_setbit(vmx, val); \
+ else \
+ lname##_controls_clearbit(vmx, val); \
}
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
@@ -706,6 +714,11 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(struct kvm_vcpu *vcpu);
+static inline int vmx_get_instr_info_reg(u32 vmx_instr_info)
+{
+ return (vmx_instr_info >> 3) & 0xf;
+}
+
static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
{
return (vmx_instr_info >> 28) & 0xf;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 4c70f56c57c8..9697368d65b3 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -52,7 +52,7 @@ void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
int trig_mode, int vector);
void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
bool vmx_has_emulated_msr(struct kvm *kvm, u32 index);
-void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu);
+void vmx_recalc_intercepts(struct kvm_vcpu *vcpu);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
int vmx_get_feature_msr(u32 msr, u64 *data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77bfee41e273..c34981d28e9c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -164,9 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, 0644);
-static bool __read_mostly vector_hashing = true;
-module_param(vector_hashing, bool, 0444);
-
bool __read_mostly enable_vmware_backdoor = false;
module_param(enable_vmware_backdoor, bool, 0444);
EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
@@ -367,6 +364,7 @@ static const u32 msrs_to_save_pmu[] = {
MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
@@ -1579,10 +1577,10 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_rcx_read(vcpu);
+ u32 pmc = kvm_rcx_read(vcpu);
u64 data;
- if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
+ if (kvm_pmu_rdpmc(vcpu, pmc, &data)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -1905,8 +1903,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
- bool host_initiated)
+static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+ bool host_initiated)
{
struct msr_data msr;
int ret;
@@ -1932,6 +1930,16 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return ret;
}
+int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+ return __kvm_set_msr(vcpu, index, data, true);
+}
+
+int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+ return __kvm_get_msr(vcpu, index, data, true);
+}
+
static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 *data, bool host_initiated)
{
@@ -1939,33 +1947,36 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
__kvm_get_msr);
}
-int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
- return KVM_MSR_RET_FILTERED;
return kvm_get_msr_ignored_check(vcpu, index, data, false);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter);
+EXPORT_SYMBOL_GPL(__kvm_emulate_msr_read);
-int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
- return KVM_MSR_RET_FILTERED;
return kvm_set_msr_ignored_check(vcpu, index, data, false);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter);
+EXPORT_SYMBOL_GPL(__kvm_emulate_msr_write);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
- return kvm_get_msr_ignored_check(vcpu, index, data, false);
+ if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
+ return KVM_MSR_RET_FILTERED;
+
+ return __kvm_emulate_msr_read(vcpu, index, data);
}
-EXPORT_SYMBOL_GPL(kvm_get_msr);
+EXPORT_SYMBOL_GPL(kvm_emulate_msr_read);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
- return kvm_set_msr_ignored_check(vcpu, index, data, false);
+ if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
+ return KVM_MSR_RET_FILTERED;
+
+ return __kvm_emulate_msr_write(vcpu, index, data);
}
-EXPORT_SYMBOL_GPL(kvm_set_msr);
+EXPORT_SYMBOL_GPL(kvm_emulate_msr_write);
+
static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
{
@@ -1997,6 +2008,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
return complete_fast_msr_access(vcpu);
}
+static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->run->msr.error)
+ kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg,
+ vcpu->run->msr.data);
+
+ return complete_fast_msr_access(vcpu);
+}
+
static u64 kvm_msr_reason(int r)
{
switch (r) {
@@ -2031,56 +2051,83 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
return 1;
}
-int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg,
+ int (*complete_rdmsr)(struct kvm_vcpu *))
{
- u32 ecx = kvm_rcx_read(vcpu);
u64 data;
int r;
- r = kvm_get_msr_with_filter(vcpu, ecx, &data);
+ r = kvm_emulate_msr_read(vcpu, msr, &data);
if (!r) {
- trace_kvm_msr_read(ecx, data);
+ trace_kvm_msr_read(msr, data);
- kvm_rax_write(vcpu, data & -1u);
- kvm_rdx_write(vcpu, (data >> 32) & -1u);
+ if (reg < 0) {
+ kvm_rax_write(vcpu, data & -1u);
+ kvm_rdx_write(vcpu, (data >> 32) & -1u);
+ } else {
+ kvm_register_write(vcpu, reg, data);
+ }
} else {
/* MSR read failed? See if we should ask user space */
- if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
- complete_fast_rdmsr, r))
+ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0,
+ complete_rdmsr, r))
return 0;
- trace_kvm_msr_read_ex(ecx);
+ trace_kvm_msr_read_ex(msr);
}
return kvm_x86_call(complete_emulated_msr)(vcpu, r);
}
+
+int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+{
+ return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1,
+ complete_fast_rdmsr);
+}
EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
-int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
{
- u32 ecx = kvm_rcx_read(vcpu);
- u64 data = kvm_read_edx_eax(vcpu);
- int r;
+ vcpu->arch.cui_rdmsr_imm_reg = reg;
+
+ return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm);
- r = kvm_set_msr_with_filter(vcpu, ecx, data);
+static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ int r;
+ r = kvm_emulate_msr_write(vcpu, msr, data);
if (!r) {
- trace_kvm_msr_write(ecx, data);
+ trace_kvm_msr_write(msr, data);
} else {
/* MSR write failed? See if we should ask user space */
- if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
+ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data,
complete_fast_msr_access, r))
return 0;
/* Signal all other negative errors to userspace */
if (r < 0)
return r;
- trace_kvm_msr_write_ex(ecx, data);
+ trace_kvm_msr_write_ex(msr, data);
}
return kvm_x86_call(complete_emulated_msr)(vcpu, r);
}
+
+int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu),
+ kvm_read_edx_eax(vcpu));
+}
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm);
+
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
{
return kvm_skip_emulated_instruction(vcpu);
@@ -2093,6 +2140,15 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_invd);
+fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_emulate_invd(vcpu))
+ return EXIT_FASTPATH_EXIT_USERSPACE;
+
+ return EXIT_FASTPATH_REENTER_GUEST;
+}
+EXPORT_SYMBOL_GPL(handle_fastpath_invd);
+
int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -2140,74 +2196,41 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending();
}
-/*
- * The fast path for frequent and performance sensitive wrmsr emulation,
- * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces
- * the latency of virtual IPI by avoiding the expensive bits of transitioning
- * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the
- * other cases which must be called after interrupts are enabled on the host.
- */
-static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
-{
- if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
- return 1;
-
- if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
- ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
- ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
- ((u32)(data >> 32) != X2APIC_BROADCAST))
- return kvm_x2apic_icr_write(vcpu->arch.apic, data);
-
- return 1;
-}
-
-static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
-{
- if (!kvm_can_use_hv_timer(vcpu))
- return 1;
-
- kvm_set_lapic_tscdeadline_msr(vcpu, data);
- return 0;
-}
-
-fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
+static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
- u32 msr = kvm_rcx_read(vcpu);
- u64 data;
- fastpath_t ret;
- bool handled;
-
- kvm_vcpu_srcu_read_lock(vcpu);
-
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
- data = kvm_read_edx_eax(vcpu);
- handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data);
+ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) ||
+ kvm_x2apic_icr_write_fast(vcpu->arch.apic, data))
+ return EXIT_FASTPATH_NONE;
break;
case MSR_IA32_TSC_DEADLINE:
- data = kvm_read_edx_eax(vcpu);
- handled = !handle_fastpath_set_tscdeadline(vcpu, data);
+ kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
default:
- handled = false;
- break;
+ return EXIT_FASTPATH_NONE;
}
- if (handled) {
- if (!kvm_skip_emulated_instruction(vcpu))
- ret = EXIT_FASTPATH_EXIT_USERSPACE;
- else
- ret = EXIT_FASTPATH_REENTER_GUEST;
- trace_kvm_msr_write(msr, data);
- } else {
- ret = EXIT_FASTPATH_NONE;
- }
+ trace_kvm_msr_write(msr, data);
- kvm_vcpu_srcu_read_unlock(vcpu);
+ if (!kvm_skip_emulated_instruction(vcpu))
+ return EXIT_FASTPATH_EXIT_USERSPACE;
- return ret;
+ return EXIT_FASTPATH_REENTER_GUEST;
}
-EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
+
+fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu),
+ kvm_read_edx_eax(vcpu));
+}
+EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr);
+
+fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
+{
+ return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
+}
+EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr_imm);
/*
* Adapt set_msr() to msr_io()'s calling convention
@@ -6778,7 +6801,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
kvm_free_msr_filter(old_filter);
- kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
+ /*
+ * Recalc MSR intercepts as userspace may want to intercept accesses to
+ * MSRs that KVM would otherwise pass through to the guest.
+ */
+ kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS);
return 0;
}
@@ -6973,6 +7000,15 @@ set_identity_unlock:
if (irqchip_in_kernel(kvm))
goto create_irqchip_unlock;
+ /*
+ * Disallow an in-kernel I/O APIC if the VM has protected EOIs,
+ * i.e. if KVM can't intercept EOIs and thus can't properly
+ * emulate level-triggered interrupts.
+ */
+ r = -ENOTTY;
+ if (kvm->arch.has_protected_eoi)
+ goto create_irqchip_unlock;
+
r = -EINVAL;
if (kvm->created_vcpus)
goto create_irqchip_unlock;
@@ -7360,6 +7396,7 @@ static void kvm_probe_msr_to_save(u32 msr_index)
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET:
if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
return;
break;
@@ -8360,7 +8397,7 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r;
- r = kvm_get_msr_with_filter(vcpu, msr_index, pdata);
+ r = kvm_emulate_msr_read(vcpu, msr_index, pdata);
if (r < 0)
return X86EMUL_UNHANDLEABLE;
@@ -8383,7 +8420,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
int r;
- r = kvm_set_msr_with_filter(vcpu, msr_index, data);
+ r = kvm_emulate_msr_write(vcpu, msr_index, data);
if (r < 0)
return X86EMUL_UNHANDLEABLE;
@@ -8403,7 +8440,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 *pdata)
{
- return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+ return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata);
}
static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc)
@@ -8477,11 +8514,6 @@ static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt)
return is_smm(emul_to_vcpu(ctxt));
}
-static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt)
-{
- return is_guest_mode(emul_to_vcpu(ctxt));
-}
-
#ifndef CONFIG_KVM_SMM
static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
{
@@ -8565,7 +8597,6 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible,
.set_nmi_mask = emulator_set_nmi_mask,
.is_smm = emulator_is_smm,
- .is_guest_mode = emulator_is_guest_mode,
.leave_smm = emulator_leave_smm,
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
@@ -8871,7 +8902,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (unlikely(!r))
return 0;
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+ kvm_pmu_instruction_retired(vcpu);
/*
* rflags is the old, "raw" value of the flags. The new value has
@@ -9150,7 +9181,14 @@ restart:
ctxt->exception.address = 0;
}
- r = x86_emulate_insn(ctxt);
+ /*
+ * Check L1's instruction intercepts when emulating instructions for
+ * L2, unless KVM is re-emulating a previously decoded instruction,
+ * e.g. to complete userspace I/O, in which case KVM has already
+ * checked the intercepts.
+ */
+ r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) &&
+ !(emulation_type & EMULTYPE_NO_DECODE));
if (r == EMULATION_INTERCEPTED)
return 1;
@@ -9205,9 +9243,9 @@ writeback:
*/
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
+ kvm_pmu_instruction_retired(vcpu);
if (ctxt->is_branch)
- kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
+ kvm_pmu_branch_retired(vcpu);
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
@@ -10803,13 +10841,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
- /*
- * Recalc MSR intercepts as userspace may want to intercept
- * accesses to MSRs that KVM would otherwise pass through to
- * the guest.
- */
- if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
- kvm_x86_call(recalc_msr_intercepts)(vcpu);
+ if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu))
+ kvm_x86_call(recalc_intercepts)(vcpu);
if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
kvm_x86_call(update_cpu_dirty_logging)(vcpu);
@@ -11310,13 +11343,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_halt);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu)
{
- int ret;
-
- kvm_vcpu_srcu_read_lock(vcpu);
- ret = kvm_emulate_halt(vcpu);
- kvm_vcpu_srcu_read_unlock(vcpu);
-
- if (!ret)
+ if (!kvm_emulate_halt(vcpu))
return EXIT_FASTPATH_EXIT_USERSPACE;
if (kvm_vcpu_running(vcpu))
@@ -12395,6 +12422,41 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvfree(vcpu->arch.cpuid_entries);
}
+static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+ struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
+ u64 xfeatures_mask;
+ int i;
+
+ /*
+ * Guest FPU state is zero allocated and so doesn't need to be manually
+ * cleared on RESET, i.e. during vCPU creation.
+ */
+ if (!init_event || !fpstate)
+ return;
+
+ /*
+ * On INIT, only select XSTATE components are zeroed, most components
+ * are unchanged. Currently, the only components that are zeroed and
+ * supported by KVM are MPX related.
+ */
+ xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) &
+ (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+ if (!xfeatures_mask)
+ return;
+
+ BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX);
+
+ /*
+ * All paths that lead to INIT are required to load the guest's FPU
+ * state (because most paths are buried in KVM_RUN).
+ */
+ kvm_put_guest_fpu(vcpu);
+ for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX)
+ fpstate_clear_xstate_component(fpstate, i);
+ kvm_load_guest_fpu(vcpu);
+}
+
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_cpuid_entry2 *cpuid_0x1;
@@ -12452,22 +12514,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
- struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
-
- /*
- * All paths that lead to INIT are required to load the guest's
- * FPU state (because most paths are buried in KVM_RUN).
- */
- if (init_event)
- kvm_put_guest_fpu(vcpu);
-
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
- fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
-
- if (init_event)
- kvm_load_guest_fpu(vcpu);
- }
+ kvm_xstate_reset(vcpu, init_event);
if (!init_event) {
vcpu->arch.smbase = 0x30000;
@@ -12479,7 +12526,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
__kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
- __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
+ kvm_msr_write(vcpu, MSR_IA32_XSS, 0);
}
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
@@ -13526,11 +13573,6 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
-bool kvm_vector_hashing_enabled(void)
-{
- return vector_hashing;
-}
-
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index bcfd9b719ada..786e36fcd0fb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -431,14 +431,15 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
-bool kvm_vector_hashing_enabled(void);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code);
int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
void *insn, int insn_len);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
-fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu);
+fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu);
extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host;
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index bb215230cc8a..3eaa216b96c0 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -14,10 +14,10 @@
#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 4
+#define NUM_INSNS_PER_LOOP 6
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -226,9 +226,11 @@ do { \
__asm__ __volatile__("wrmsr\n\t" \
" mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
"1:\n\t" \
+ FEP "enter $0, $0\n\t" \
clflush "\n\t" \
"mfence\n\t" \
"mov %[m], %%eax\n\t" \
+ FEP "leave\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \