diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-11-08 04:01:38 -0500 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-11-08 05:57:00 -0500 |
commit | 9893deb08b0f08b114458fe4966b049786fb876f (patch) | |
tree | e1449ed0149d11147322d8c089d77083223f99c4 | |
parent | 59b723cd2adbac2a34fc8e12c74ae26ae45bf230 (diff) | |
parent | e5d253c60e9627a22940e00a05a6115d722f07ed (diff) |
Merge tag 'kvm-x86-fixes-6.12-rcN' of https://github.com/kvm-x86/linux into HEAD
KVM x86 and selftests fixes for 6.12:
- Increase the timeout for the memslot performance selftest to avoid false
failures on arm64 and nested x86 platforms.
- Fix a goof in the guest_memfd selftest where a for-loop initialized a
bit mask to zero instead of BIT(0).
- Disable strict aliasing when building KVM selftests to prevent the
compiler from treating things like "u64 *" to "uint64_t *" cases as
undefined behavior, which can lead to nasty, hard to debug failures.
- Force -march=x86-64-v2 for KVM x86 selftests if and only if the uarch
is supported by the compiler.
- When emulating a guest TLB flush for a nested guest, flush vpid01, not
vpid02, if L2 is active but VPID is disabled in vmcs12, i.e. if L2 and
L1 are sharing VPID '0' (from L1's perspective).
- Fix a bug in the SNP initialization flow where KVM would return '0' to
userspace instead of -errno on failure.
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 7 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 30 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 10 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/guest_memfd_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/memslot_perf_test.c | 2 |
6 files changed, 39 insertions, 14 deletions
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0b851ef937f2..03c7db3b4cc3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, goto e_free; /* This needs to happen after SEV/SNP firmware initialization. */ - if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm)) - goto e_free; + if (vm_type == KVM_X86_SNP_VM) { + ret = snp_guest_req_init(kvm); + if (ret) + goto e_free; + } INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a8e7bc04d9bf..931a7361c30f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept); /* - * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings - * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a - * full TLB flush from the guest's perspective. This is required even - * if VPID is disabled in the host as KVM may need to synchronize the - * MMU in response to the guest TLB flush. + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the + * same VPID as the host, and so architecturally, linear and combined + * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM + * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2, + * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This + * is required if VPID is disabled in KVM, as a TLB flush (there are no + * VPIDs) still occurs from L1's perspective, and KVM may need to + * synchronize the MMU in response to the guest TLB flush. * * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use. * EPT is a special snowflake, as guest-physical mappings aren't @@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); + /* + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the + * same VPID as the host. Emulate this behavior by using vpid01 for L2 + * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter + * and VM-Exit are architecturally required to flush VPID=0, but *only* + * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the + * required flushes), but doing so would cause KVM to over-flush. E.g. + * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled, + * and then runs L2 X again, then KVM can and should retain TLB entries + * for VPID12=1. + */ if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); @@ -5950,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + /* + * Always flush the effective vpid02, i.e. never flush the current VPID + * and never explicitly flush vpid01. INVVPID targets a VPID, not a + * VMCS, and so whether or not the current vmcs12 has VPID enabled is + * irrelevant (and there may not be a loaded vmcs12). + */ vpid02 = nested_get_vpid02(vcpu); switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 81ed596e4454..9886d67d9512 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3216,7 +3216,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu))) return nested_get_vpid02(vcpu); return to_vmx(vcpu)->vpid; } diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 156fbfae940f..48645a2e29da 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -241,16 +241,18 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ - -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ - $(KHDR_INCLUDES) + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) ifeq ($(ARCH),s390) CFLAGS += -march=z10 endif ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) CFLAGS += -march=x86-64-v2 endif +endif ifeq ($(ARCH),arm64) tools_dir := $(top_srcdir)/tools arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ba0c8e996035..ce687f8d248f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -134,7 +134,7 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm) size); } - for (flag = 0; flag; flag <<= 1) { + for (flag = BIT(0); flag; flag <<= 1) { fd = __vm_create_guest_memfd(vm, page_size, flag); TEST_ASSERT(fd == -1 && errno == EINVAL, "guest_memfd() with flag '0x%lx' should fail with EINVAL", diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 989ffe0d047f..e3711beff7f3 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -417,7 +417,7 @@ static bool _guest_should_exit(void) */ static noinline void host_perform_sync(struct sync_area *sync) { - alarm(2); + alarm(10); atomic_store_explicit(&sync->sync_flag, true, memory_order_release); while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) |