diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2025-07-28 11:41:06 -0400 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-07-29 08:36:46 -0400 |
| commit | beafd7ecf2255e8b62a42dc04f54843033db3d24 (patch) | |
| tree | 89f6235968ea1413f96be213442c57f4c238ad33 | |
| parent | a10accaef40f1947cf8ad5ec01d306c37aa85897 (diff) | |
| parent | 6f38f8c574642a822f2e85f079fa29a49176c49c (diff) | |
Merge tag 'kvm-x86-sev-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM SEV cache maintenance changes for 6.17
- Drop a superfluous WBINVD (on all CPUs!) when destroying a VM.
- Use WBNOINVD instead of WBINVD when possible, for SEV cache maintenance,
e.g. to minimize collateral damage when reclaiming memory from an SEV guest.
- When reclaiming memory from an SEV guest, only do cache flushes on CPUs that
have ever run a vCPU for the guest, i.e. don't flush the caches for CPUs
that can't possibly have cache lines with dirty, encrypted data.
| -rw-r--r-- | arch/x86/kvm/svm/sev.c | 110 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 8 |
3 files changed, 84 insertions, 35 deletions
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fde328ed3f78..2fbdebf79fbb 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -117,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) */ down_write(&sev_deactivate_lock); + /* SNP firmware requires use of WBINVD for ASID recycling. */ wbinvd_on_all_cpus(); if (sev_snp_enabled) @@ -446,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, init_args.probe = false; ret = sev_platform_init(&init_args); if (ret) - goto e_free; + goto e_free_asid; + + if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto e_free_asid; + } /* This needs to happen after SEV/SNP firmware initialization. */ if (vm_type == KVM_X86_SNP_VM) { @@ -464,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, return 0; e_free: + free_cpumask_var(sev->have_run_cpus); +e_free_asid: argp->error = init_args.error; sev_asid_free(sev); sev->asid = 0; @@ -708,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) } } +static void sev_writeback_caches(struct kvm *kvm) +{ + /* + * Note, the caller is responsible for ensuring correctness if the mask + * can be modified, e.g. if a CPU could be doing VMRUN. + */ + if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) + return; + + /* + * Ensure that all dirty guest tagged cache entries are written back + * before releasing the pages back to the system for use. CLFLUSH will + * not do this without SME_COHERENT, and flushing many cache lines + * individually is slower than blasting WBINVD for large VMs, so issue + * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported) + * on CPUs that have done VMRUN, i.e. may have dirtied data using the + * VM's ASID. + * + * For simplicity, never remove CPUs from the bitmap. Ideally, KVM + * would clear the mask when flushing caches, but doing so requires + * serializing multiple calls and having responding CPUs (to the IPI) + * mark themselves as still running if they are running (or about to + * run) a vCPU for the VM. + */ + wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus); +} + static unsigned long get_num_contig_pages(unsigned long idx, struct page **inpages, unsigned long npages) { @@ -2037,6 +2072,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_source_vcpu; + /* + * Allocate a new have_run_cpus for the destination, i.e. don't copy + * the set of CPUs from the source. If a CPU was used to run a vCPU in + * the source VM but is never used for the destination VM, then the CPU + * can only have cached memory that was accessible to the source VM. + */ + if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto out_source_vcpu; + } + sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); cg_cleanup_sev = src_sev; @@ -2694,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, goto failed; } - /* - * Ensure that all guest tagged cache entries are flushed before - * releasing the pages back to the system for use. CLFLUSH will - * not do this, so issue a WBINVD. - */ - wbinvd_on_all_cpus(); + sev_writeback_caches(kvm); __unregister_enc_region_locked(kvm, region); @@ -2741,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) goto e_unlock; } + mirror_sev = to_kvm_sev_info(kvm); + if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) { + ret = -ENOMEM; + goto e_unlock; + } + /* * The mirror kvm holds an enc_context_owner ref so its asid can't * disappear until we're done with it */ source_sev = to_kvm_sev_info(source_kvm); kvm_get_kvm(source_kvm); - mirror_sev = to_kvm_sev_info(kvm); list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); /* Set enc_context_owner and copy its encryption context over */ @@ -2809,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm) WARN_ON(!list_empty(&sev->mirror_vms)); - /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ + free_cpumask_var(sev->have_run_cpus); + + /* + * If this is a mirror VM, remove it from the owner's list of a mirrors + * and skip ASID cleanup (the ASID is tied to the lifetime of the owner). + * Note, mirror VMs don't support registering encrypted regions. + */ if (is_mirroring_enc_context(kvm)) { struct kvm *owner_kvm = sev->enc_context_owner; @@ -2820,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm) return; } - /* - * Ensure that all guest tagged cache entries are flushed before - * releasing the pages back to the system for use. CLFLUSH will - * not do this, so issue a WBINVD. - */ - wbinvd_on_all_cpus(); /* * if userspace was terminated before unregistering the memory regions @@ -3095,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) /* * VM Page Flush takes a host virtual address and a guest ASID. Fall - * back to WBINVD if this faults so as not to make any problems worse - * by leaving stale encrypted data in the cache. + * back to full writeback of caches if this faults so as not to make + * any problems worse by leaving stale encrypted data in the cache. */ if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) - goto do_wbinvd; + goto do_sev_writeback_caches; return; -do_wbinvd: - wbinvd_on_all_cpus(); +do_sev_writeback_caches: + sev_writeback_caches(vcpu->kvm); } void sev_guest_memory_reclaimed(struct kvm *kvm) { /* * With SNP+gmem, private/encrypted memory is unreachable via the - * hva-based mmu notifiers, so these events are only actually - * pertaining to shared pages where there is no need to perform - * the WBINVD to flush associated caches. + * hva-based mmu notifiers, i.e. these events are explicitly scoped to + * shared pages, where there's no need to flush caches. */ if (!sev_guest(kvm) || sev_snp_guest(kvm)) return; - wbinvd_on_all_cpus(); + sev_writeback_caches(kvm); } void sev_free_vcpu(struct kvm_vcpu *vcpu) @@ -3450,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu) if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa)) return -EINVAL; + /* + * To optimize cache flushes when memory is reclaimed from an SEV VM, + * track physical CPUs that enter the guest for SEV VMs and thus can + * have encrypted, dirty data in the cache, and flush caches only for + * CPUs that have entered the guest. + */ + if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus)) + cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus); + /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3882,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu) * From this point forward, the VMSA will always be a guest-mapped page * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but - * that involves cleanups like wbinvd_on_all_cpus() which would ideally - * be handled during teardown rather than guest boot. Deferring that - * also allows the existing logic for SEV-ES VMSAs to be re-used with + * that involves cleanups like flushing caches, which would ideally be + * handled during teardown rather than guest boot. Deferring that also + * allows the existing logic for SEV-ES VMSAs to be re-used with * minimal SNP-specific changes. */ svm->sev_es.snp_has_guest_vmsa = true; @@ -4875,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) /* * SEV-ES avoids host/guest cache coherency issues through - * WBINVD hooks issued via MMU notifiers during run-time, and + * WBNOINVD hooks issued via MMU notifiers during run-time, and * KVM's VM destroy path at shutdown. Those MMU notifier events * don't cover gmem since there is no requirement to map pages * to a HVA in order to use them for a running guest. While the diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 81c7b147b731..58b9d168e0c8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -110,6 +110,7 @@ struct kvm_sev_info { void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */ void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */ struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */ + cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */ }; #define SEV_POLICY_NODBG BIT_ULL(0) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 71314746ddd1..a1c49bc681c4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4994,11 +4994,6 @@ out: return r; } -static void wbinvd_ipi(void *garbage) -{ - wbinvd(); -} - static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) { return kvm_arch_has_noncoherent_dma(vcpu->kvm); @@ -5022,8 +5017,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (kvm_x86_call(has_wbinvd_exit)()) cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); else if (vcpu->cpu != -1 && vcpu->cpu != cpu) - smp_call_function_single(vcpu->cpu, - wbinvd_ipi, NULL, 1); + wbinvd_on_cpu(vcpu->cpu); } kvm_x86_call(vcpu_load)(vcpu, cpu); |
