From e750f85391286a4c8100275516973324b621a269 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 15 Jul 2025 12:06:38 -0700 Subject: KVM: x86: Don't (re)check L1 intercepts when completing userspace I/O When completing emulation of instruction that generated a userspace exit for I/O, don't recheck L1 intercepts as KVM has already finished that phase of instruction execution, i.e. has already committed to allowing L2 to perform I/O. If L1 (or host userspace) modifies the I/O permission bitmaps during the exit to userspace, KVM will treat the access as being intercepted despite already having emulated the I/O access. Pivot on EMULTYPE_NO_DECODE to detect that KVM is completing emulation. Of the three users of EMULTYPE_NO_DECODE, only complete_emulated_io() (the intended "recipient") can reach the code in question. gp_interception()'s use is mutually exclusive with is_guest_mode(), and complete_emulated_insn_gp() unconditionally pairs EMULTYPE_NO_DECODE with EMULTYPE_SKIP. The bad behavior was detected by a syzkaller program that toggles port I/O interception during the userspace I/O exit, ultimately resulting in a WARN on vcpu->arch.pio.count being non-zero due to KVM no completing emulation of the I/O instruction. WARNING: CPU: 23 PID: 1083 at arch/x86/kvm/x86.c:8039 emulator_pio_in_out+0x154/0x170 [kvm] Modules linked in: kvm_intel kvm irqbypass CPU: 23 UID: 1000 PID: 1083 Comm: repro Not tainted 6.16.0-rc5-c1610d2d66b1-next-vm #74 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:emulator_pio_in_out+0x154/0x170 [kvm] PKRU: 55555554 Call Trace: kvm_fast_pio+0xd6/0x1d0 [kvm] vmx_handle_exit+0x149/0x610 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xda8/0x1ac0 [kvm] kvm_vcpu_ioctl+0x244/0x8c0 [kvm] __x64_sys_ioctl+0x8a/0xd0 do_syscall_64+0x5d/0xc60 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Reported-by: syzbot+cc2032ba16cc2018ca25@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68790db4.a00a0220.3af5df.0020.GAE@google.com Fixes: 8a76d7f25f8f ("KVM: x86: Add x86 callback for intercept check") Cc: stable@vger.kernel.org Cc: Jim Mattson Link: https://lore.kernel.org/r/20250715190638.1899116-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..79057622fa76 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8470,11 +8470,6 @@ static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt) return is_smm(emul_to_vcpu(ctxt)); } -static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt) -{ - return is_guest_mode(emul_to_vcpu(ctxt)); -} - #ifndef CONFIG_KVM_SMM static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) { @@ -8558,7 +8553,6 @@ static const struct x86_emulate_ops emulate_ops = { .guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible, .set_nmi_mask = emulator_set_nmi_mask, .is_smm = emulator_is_smm, - .is_guest_mode = emulator_is_guest_mode, .leave_smm = emulator_leave_smm, .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, @@ -9143,7 +9137,14 @@ restart: ctxt->exception.address = 0; } - r = x86_emulate_insn(ctxt); + /* + * Check L1's instruction intercepts when emulating instructions for + * L2, unless KVM is re-emulating a previously decoded instruction, + * e.g. to complete userspace I/O, in which case KVM has already + * checked the intercepts. + */ + r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) && + !(emulation_type & EMULTYPE_NO_DECODE)); if (r == EMULATION_INTERCEPTED) return 1; -- cgit From 68e61f6fd65610e73b17882f86fedfd784d99229 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Jul 2025 10:27:46 -0700 Subject: KVM: SVM: Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2 Emulate PERF_CNTR_GLOBAL_STATUS_SET when PerfMonV2 is enumerated to the guest, as the MSR is supposed to exist in all AMD v2 PMUs. Fixes: 4a2771895ca6 ("KVM: x86/svm/pmu: Add AMD PerfMonV2 support") Cc: stable@vger.kernel.org Cc: Sandipan Das Link: https://lore.kernel.org/r/20250711172746.1579423-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 79057622fa76..5dc32f2fe391 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -367,6 +367,7 @@ static const u32 msrs_to_save_pmu[] = { MSR_AMD64_PERF_CNTR_GLOBAL_CTL, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + @@ -7353,6 +7354,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) return; break; -- cgit From 777414340085711cafd3807a72c531107c0ff7f6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:11 -0700 Subject: KVM: x86: Only allow "fast" IPIs in fastpath WRMSR(X2APIC_ICR) handler Explicitly restrict fastpath ICR writes to IPIs that are "fast", i.e. can be delivered without having to walk all vCPUs, and that target at most 16 vCPUs. Artificially restricting ICR writes to physical mode guarantees at most one vCPU will receive in IPI (because x2APIC IDs are read-only), but that delivery might not be "fast". E.g. even if the vCPU exists, KVM might have to iterate over 4096 vCPUs to find the right one. Limiting delivery to fast IPIs aligns the WRMSR fastpath with kvm_arch_set_irq_inatomic() (which also runs with IRQs disabled), and will allow dropping the semi-arbitrary restrictions on delivery mode and type. Link: https://lore.kernel.org/r/20250805190526.1453366-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5dc32f2fe391..1b64c71458a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2150,7 +2150,7 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && ((u32)(data >> 32) != X2APIC_BROADCAST)) - return kvm_x2apic_icr_write(vcpu->arch.apic, data); + return kvm_x2apic_icr_write_fast(vcpu->arch.apic, data); return 1; } -- cgit From aeeb4c7fff525e0fd71ec28162b713b8cb1ec943 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:12 -0700 Subject: KVM: x86: Drop semi-arbitrary restrictions on IPI type in fastpath Drop the restrictions on fastpath IPIs only working for fixed IRQs with a physical destination now that the fastpath is explicitly limited to "fast" delivery. Limiting delivery to a single physical APIC ID guarantees only one vCPU will receive the event, but that isn't necessary "fast", e.g. if the targeted vCPU is the last of 4096 vCPUs. And logical destination mode or shorthand (to self) can also be fast, e.g. if only a few vCPUs are being targeted. Lastly, there's nothing inherently slow about delivering an NMI, INIT, SIPI, SMI, etc., i.e. there's no reason to artificially limit fastpath delivery to fixed vector IRQs. Link: https://lore.kernel.org/r/20250805190526.1453366-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b64c71458a2..6d93547526e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2146,13 +2146,7 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic)) return 1; - if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) && - ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && - ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && - ((u32)(data >> 32) != X2APIC_BROADCAST)) - return kvm_x2apic_icr_write_fast(vcpu->arch.apic, data); - - return 1; + return kvm_x2apic_icr_write_fast(vcpu->arch.apic, data); } static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) -- cgit From 0a94b2042419f7896f5d362465731506e43bc319 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:13 -0700 Subject: KVM: x86: Unconditionally handle MSR_IA32_TSC_DEADLINE in fastpath exits Drop the fastpath VM-Exit requirement that KVM can use the hypervisor timer to emulate the APIC timer in TSC deadline mode. I.e. unconditionally handle MSR_IA32_TSC_DEADLINE WRMSRs in the fastpath. Restricting the fastpath to *maybe* using the VMX preemption timer is ineffective and unnecessary. If the requested deadline can't be programmed into the VMX preemption timer, KVM will fall back to hrtimers, i.e. the restriction is ineffective as far as preventing any kind of worst case scenario. But guarding against a worst case scenario is completely unnecessary as the "slow" path, start_sw_tscdeadline() => hrtimer_start(), explicitly disables IRQs. In fact, the worst case scenario is when KVM thinks it can use the VMX preemption timer, as KVM will eat the overhead of calling into vmx_set_hv_timer() and falling back to hrtimers. Opportunistically limit kvm_can_use_hv_timer() to lapic.c as the fastpath code was the only external user. Stating the obvious, this allows handling MSR_IA32_TSC_DEADLINE writes in the fastpath on AMD CPUs. Link: https://lore.kernel.org/r/20250805190526.1453366-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d93547526e6..366c8c7f2e43 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2151,9 +2151,6 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) { - if (!kvm_can_use_hv_timer(vcpu)) - return 1; - kvm_set_lapic_tscdeadline_msr(vcpu, data); return 0; } -- cgit From aebcbb60977323c21c8d89eb4298e454f8e89299 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:14 -0700 Subject: KVM: x86: Acquire SRCU in WRMSR fastpath iff instruction needs to be skipped Acquire SRCU in the WRMSR fastpath if and only if an instruction needs to be skipped, i.e. only if the fastpath succeeds. The reasoning in commit 3f2739bd1e0b ("KVM: x86: Acquire SRCU read lock when handling fastpath MSR writes") about "avoid having to play whack-a-mole" seems sound, but in hindsight unconditionally acquiring SRCU does more harm than good. While acquiring/releasing SRCU isn't slow per se, the things that are _protected_ by kvm->srcu are generally safe to access only in the "slow" VM-Exit path. E.g. accessing memslots in generic helpers is never safe, because accessing guest memory with IRQs disabled is unless unsafe (except when kvm_vcpu_read_guest_atomic() is used, but that API should never be used in emulation helpers). In other words, playing whack-a-mole is actually desirable in this case, because every access to an asset protected by kvm->srcu warrants further scrutiny. Link: https://lore.kernel.org/r/20250805190526.1453366-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 366c8c7f2e43..a5d7ab23d432 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2159,10 +2159,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u32 msr = kvm_rcx_read(vcpu); u64 data; - fastpath_t ret; bool handled; - - kvm_vcpu_srcu_read_lock(vcpu); + int r; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): @@ -2178,19 +2176,16 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) break; } - if (handled) { - if (!kvm_skip_emulated_instruction(vcpu)) - ret = EXIT_FASTPATH_EXIT_USERSPACE; - else - ret = EXIT_FASTPATH_REENTER_GUEST; - trace_kvm_msr_write(msr, data); - } else { - ret = EXIT_FASTPATH_NONE; - } + if (!handled) + return EXIT_FASTPATH_NONE; + kvm_vcpu_srcu_read_lock(vcpu); + r = kvm_skip_emulated_instruction(vcpu); kvm_vcpu_srcu_read_unlock(vcpu); - return ret; + trace_kvm_msr_write(msr, data); + + return r ? EXIT_FASTPATH_REENTER_GUEST : EXIT_FASTPATH_EXIT_USERSPACE; } EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); -- cgit From aa2e4f029341c0b56645d49cd5959946cdab31b9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:15 -0700 Subject: KVM: x86: Unconditionally grab data from EDX:EAX in WRMSR fastpath Always grab EDX:EAX in the WRMSR fastpath to deduplicate and simplify the case statements, and to prepare for handling immediate variants of WRMSRNS in the fastpath (the data register is explicitly provided in that case). There's no harm in reading the registers, as their values are always available, i.e. don't require VMREADs (or similarly slow operations). No real functional change intended. Cc: Xin Li Link: https://lore.kernel.org/r/20250805190526.1453366-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a5d7ab23d432..343bb38840dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2157,18 +2157,16 @@ static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { + u64 data = kvm_read_edx_eax(vcpu); u32 msr = kvm_rcx_read(vcpu); - u64 data; bool handled; int r; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): - data = kvm_read_edx_eax(vcpu); handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); break; case MSR_IA32_TSC_DEADLINE: - data = kvm_read_edx_eax(vcpu); handled = !handle_fastpath_set_tscdeadline(vcpu, data); break; default: -- cgit From d618fb4e43a0287a54551aa01be58c75ac668671 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:16 -0700 Subject: KVM: x86: Fold WRMSR fastpath helpers into the main handler Fold the per-MSR WRMSR fastpath helpers into the main handler now that the IPI path in particular is relatively tiny. In addition to eliminating a decent amount of boilerplate, this removes the ugly -errno/1/0 => bool conversion (which is "necessitated" by kvm_x2apic_icr_write_fast()). Opportunistically drop the comment about IPIs, as the purpose of the fastpath is hopefully self-evident, and _if_ it needs more documentation, the documentation (and rules!) should be placed in a more central location. No functional change intended. Link: https://lore.kernel.org/r/20250805190526.1453366-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 343bb38840dc..f5e933f0e21a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2134,48 +2134,24 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } -/* - * The fast path for frequent and performance sensitive wrmsr emulation, - * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces - * the latency of virtual IPI by avoiding the expensive bits of transitioning - * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the - * other cases which must be called after interrupts are enabled on the host. - */ -static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data) -{ - if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic)) - return 1; - - return kvm_x2apic_icr_write_fast(vcpu->arch.apic, data); -} - -static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) -{ - kvm_set_lapic_tscdeadline_msr(vcpu, data); - return 0; -} - fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u64 data = kvm_read_edx_eax(vcpu); u32 msr = kvm_rcx_read(vcpu); - bool handled; int r; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): - handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) || + kvm_x2apic_icr_write_fast(vcpu->arch.apic, data)) + return EXIT_FASTPATH_NONE; break; case MSR_IA32_TSC_DEADLINE: - handled = !handle_fastpath_set_tscdeadline(vcpu, data); + kvm_set_lapic_tscdeadline_msr(vcpu, data); break; default: - handled = false; - break; - } - - if (!handled) return EXIT_FASTPATH_NONE; + } kvm_vcpu_srcu_read_lock(vcpu); r = kvm_skip_emulated_instruction(vcpu); -- cgit From 43f5bea2639ccca59541f33d62342543b4461937 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:18 -0700 Subject: KVM: x86/pmu: Add wrappers for counting emulated instructions/branches Add wrappers for triggering instruction retired and branch retired PMU events in anticipation of reworking the internal mechanisms to track which PMCs need to be evaluated, e.g. to avoid having to walk and check every PMC. Opportunistically bury "struct kvm_pmu_emulated_event_selectors" in pmu.c. No functional change intended. Link: https://lore.kernel.org/r/20250805190526.1453366-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f5e933f0e21a..c44d3d64270b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8820,7 +8820,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) if (unlikely(!r)) return 0; - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); + kvm_pmu_instruction_retired(vcpu); /* * rflags is the old, "raw" value of the flags. The new value has @@ -9161,9 +9161,9 @@ writeback: */ if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); + kvm_pmu_instruction_retired(vcpu); if (ctxt->is_branch) - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED); + kvm_pmu_branch_retired(vcpu); kvm_rip_write(vcpu, ctxt->eip); if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) r = kvm_vcpu_do_singlestep(vcpu); -- cgit From 8bb8b60c95c55c13f9924f3f090232e14d035d43 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:25 -0700 Subject: KVM: x86: Push acquisition of SRCU in fastpath into kvm_pmu_trigger_event() Acquire SRCU in the VM-Exit fastpath if and only if KVM needs to check the PMU event filter, to further trim the amount of code that is executed with SRCU protection in the fastpath. Counter-intuitively, holding SRCU can do more harm than good due to masking potential bugs, and introducing a new SRCU-protected asset to code reachable via kvm_skip_emulated_instruction() would be quite notable, i.e. definitely worth auditing. E.g. the primary user of kvm->srcu is KVM's memslots, accessing memslots all but guarantees guest memory may be accessed, accessing guest memory can fault, and page faults might sleep, which isn't allowed while IRQs are disabled. Not acquiring SRCU means the (hypothetical) illegal sleep would be flagged when running with PROVE_RCU=y, even if DEBUG_ATOMIC_SLEEP=n. Note, performance is NOT a motivating factor, as SRCU lock/unlock only adds ~15 cycles of latency to fastpath VM-Exits. I.e. overhead isn't a concern _if_ SRCU protection needs to be extended beyond PMU events, e.g. to honor userspace MSR filters. Reviewed-by: Dapeng Mi Link: https://lore.kernel.org/r/20250805190526.1453366-18-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c44d3d64270b..093bfc8d00b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2138,7 +2138,6 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u64 data = kvm_read_edx_eax(vcpu); u32 msr = kvm_rcx_read(vcpu); - int r; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): @@ -2153,13 +2152,12 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } - kvm_vcpu_srcu_read_lock(vcpu); - r = kvm_skip_emulated_instruction(vcpu); - kvm_vcpu_srcu_read_unlock(vcpu); - trace_kvm_msr_write(msr, data); - return r ? EXIT_FASTPATH_REENTER_GUEST : EXIT_FASTPATH_EXIT_USERSPACE; + if (!kvm_skip_emulated_instruction(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; + + return EXIT_FASTPATH_REENTER_GUEST; } EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); @@ -11254,13 +11252,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_halt); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) { - int ret; - - kvm_vcpu_srcu_read_lock(vcpu); - ret = kvm_emulate_halt(vcpu); - kvm_vcpu_srcu_read_unlock(vcpu); - - if (!ret) + if (!kvm_emulate_halt(vcpu)) return EXIT_FASTPATH_EXIT_USERSPACE; if (kvm_vcpu_running(vcpu)) -- cgit From 6c3d4b917995a17f515943ccd39ba11b81753b0d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 12:05:26 -0700 Subject: KVM: x86: Add a fastpath handler for INVD Add a fastpath handler for INVD so that the common fastpath logic can be trivially tested on both Intel and AMD. Under KVM, INVD is always: (a) intercepted, (b) available to the guest, and (c) emulated as a nop, with no side effects. Combined with INVD not having any inputs or outputs, i.e. no register constraints, INVD is the perfect instruction for exercising KVM's fastpath as it can be inserted into practically any guest-side code stream. Link: https://lore.kernel.org/r/20250805190526.1453366-19-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 093bfc8d00b3..6e56d5cff44d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2087,6 +2087,15 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_invd); +fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) +{ + if (!kvm_emulate_invd(vcpu)) + return EXIT_FASTPATH_EXIT_USERSPACE; + + return EXIT_FASTPATH_REENTER_GUEST; +} +EXPORT_SYMBOL_GPL(handle_fastpath_invd); + int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); -- cgit From ec400f6c2f2703cb6c698dd00b28cfdb8ee5cdcc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 Aug 2025 13:22:20 -0700 Subject: KVM: x86: Rename local "ecx" variables to "msr" and "pmc" as appropriate Rename "ecx" variables in {RD,WR}MSR and RDPMC helpers to "msr" and "pmc" respectively, in anticipation of adding support for the immediate variants of RDMSR and WRMSRNS, and to better document what the variables hold (versus where the data originated). No functional change intended. Link: https://lore.kernel.org/r/20250805202224.1475590-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6e56d5cff44d..f7c5db3d2652 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1573,10 +1573,10 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); + u32 pmc = kvm_rcx_read(vcpu); u64 data; - if (kvm_pmu_rdpmc(vcpu, ecx, &data)) { + if (kvm_pmu_rdpmc(vcpu, pmc, &data)) { kvm_inject_gp(vcpu, 0); return 1; } @@ -2027,23 +2027,23 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); + u32 msr = kvm_rcx_read(vcpu); u64 data; int r; - r = kvm_get_msr_with_filter(vcpu, ecx, &data); + r = kvm_get_msr_with_filter(vcpu, msr, &data); if (!r) { - trace_kvm_msr_read(ecx, data); + trace_kvm_msr_read(msr, data); kvm_rax_write(vcpu, data & -1u); kvm_rdx_write(vcpu, (data >> 32) & -1u); } else { /* MSR read failed? See if we should ask user space */ - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0, + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0, complete_fast_rdmsr, r)) return 0; - trace_kvm_msr_read_ex(ecx); + trace_kvm_msr_read_ex(msr); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); @@ -2052,23 +2052,23 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); + u32 msr = kvm_rcx_read(vcpu); u64 data = kvm_read_edx_eax(vcpu); int r; - r = kvm_set_msr_with_filter(vcpu, ecx, data); + r = kvm_set_msr_with_filter(vcpu, msr, data); if (!r) { - trace_kvm_msr_write(ecx, data); + trace_kvm_msr_write(msr, data); } else { /* MSR write failed? See if we should ask user space */ - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data, + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data, complete_fast_msr_access, r)) return 0; /* Signal all other negative errors to userspace */ if (r < 0) return r; - trace_kvm_msr_write_ex(ecx, data); + trace_kvm_msr_write_ex(msr, data); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); -- cgit From 87a877de367d835b527d1086f75727123ef85fc4 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Aug 2025 13:22:21 -0700 Subject: KVM: x86: Rename handle_fastpath_set_msr_irqoff() to handle_fastpath_wrmsr() Rename the WRMSR fastpath API to drop "irqoff", as that information is redundant (the fastpath always runs with IRQs disabled), and to prepare for adding a fastpath for the immediate variant of WRMSRNS. No functional change intended. Signed-off-by: Xin Li (Intel) [sean: split to separate patch, write changelog] Link: https://lore.kernel.org/r/20250805202224.1475590-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f7c5db3d2652..85e40d61d18b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2143,7 +2143,7 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } -fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) { u64 data = kvm_read_edx_eax(vcpu); u32 msr = kvm_rcx_read(vcpu); @@ -2168,7 +2168,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_REENTER_GUEST; } -EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); +EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr); /* * Adapt set_msr() to msr_io()'s calling convention -- cgit From 885df2d2109a60f84d84639ce6d95a91045f6c45 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Aug 2025 13:22:22 -0700 Subject: KVM: x86: Add support for RDMSR/WRMSRNS w/ immediate on Intel Add support for the immediate forms of RDMSR and WRMSRNS (currently Intel-only). The immediate variants are only valid in 64-bit mode, and use a single general purpose register for the data (the register is also encoded in the instruction, i.e. not implicit like regular RDMSR/WRMSR). The immediate variants are primarily motivated by performance, not code size: by having the MSR index in an immediate, it is available *much* earlier in the CPU pipeline, which allows hardware much more leeway about how a particular MSR is handled. Intel VMX support for the immediate forms of MSR accesses communicates exit information to the host as follows: 1) The immediate form of RDMSR uses VM-Exit Reason 84. 2) The immediate form of WRMSRNS uses VM-Exit Reason 85. 3) For both VM-Exit reasons 84 and 85, the Exit Qualification field is set to the MSR index that triggered the VM-Exit. 4) Bits 3 ~ 6 of the VM-Exit Instruction Information field are set to the register encoding used by the immediate form of the instruction, i.e. the destination register for RDMSR, and the source for WRMSRNS. 5) The VM-Exit Instruction Length field records the size of the immediate form of the MSR instruction. To deal with userspace RDMSR exits, stash the destination register in a new kvm_vcpu_arch field, similar to cui_linear_rip, pio, etc. Alternatively, the register could be saved in kvm_run.msr or re-retrieved from the VMCS, but the former would require sanitizing the value to ensure userspace doesn't clobber the value to an out-of-bounds index, and the latter would require a new one-off kvm_x86_ops hook. Don't bother adding support for the instructions in KVM's emulator, as the only way for RDMSR/WRMSR to be encountered is if KVM is emulating large swaths of code due to invalid guest state, and a vCPU cannot have invalid guest state while in 64-bit mode. Signed-off-by: Xin Li (Intel) [sean: minor tweaks, massage and expand changelog] Link: https://lore.kernel.org/r/20250805202224.1475590-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 55 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 10 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 85e40d61d18b..efd45b2e8f45 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1991,6 +1991,15 @@ static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) return complete_fast_msr_access(vcpu); } +static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu) +{ + if (!vcpu->run->msr.error) + kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg, + vcpu->run->msr.data); + + return complete_fast_msr_access(vcpu); +} + static u64 kvm_msr_reason(int r) { switch (r) { @@ -2025,39 +2034,53 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index, return 1; } -int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg, + int (*complete_rdmsr)(struct kvm_vcpu *)) { - u32 msr = kvm_rcx_read(vcpu); u64 data; int r; r = kvm_get_msr_with_filter(vcpu, msr, &data); - if (!r) { trace_kvm_msr_read(msr, data); - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); + if (reg < 0) { + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { + kvm_register_write(vcpu, reg, data); + } } else { /* MSR read failed? See if we should ask user space */ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0, - complete_fast_rdmsr, r)) + complete_rdmsr, r)) return 0; trace_kvm_msr_read_ex(msr); } return kvm_x86_call(complete_emulated_msr)(vcpu, r); } + +int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1, + complete_fast_rdmsr); +} EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); -int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + vcpu->arch.cui_rdmsr_imm_reg = reg; + + return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm); +} +EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm); + +static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { - u32 msr = kvm_rcx_read(vcpu); - u64 data = kvm_read_edx_eax(vcpu); int r; r = kvm_set_msr_with_filter(vcpu, msr, data); - if (!r) { trace_kvm_msr_write(msr, data); } else { @@ -2073,8 +2096,20 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) return kvm_x86_call(complete_emulated_msr)(vcpu, r); } + +int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) +{ + return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); +} EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); +int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); +} +EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm); + int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { return kvm_skip_emulated_instruction(vcpu); -- cgit From ec93675a325191c95bc322a4471c2f194f7211fc Mon Sep 17 00:00:00 2001 From: Xin Li Date: Tue, 5 Aug 2025 13:22:23 -0700 Subject: KVM: VMX: Support the immediate form of WRMSRNS in the VM-Exit fastpath Add support for handling "WRMSRNS with an immediate" VM-Exits in KVM's fastpath. On Intel, all writes to the x2APIC ICR and to the TSC Deadline MSR are non-serializing, i.e. it's highly likely guest kernels will switch to using WRMSRNS when possible. And in general, any MSR written via WRMSRNS is probably worth handling in the fastpath, as the entire point of WRMSRNS is to shave cycles in hot paths. Signed-off-by: Xin Li (Intel) [sean: rewrite changelog, split rename to separate patch] Link: https://lore.kernel.org/r/20250805202224.1475590-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efd45b2e8f45..b47a6a4ced15 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2178,11 +2178,8 @@ static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } -fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) +static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { - u64 data = kvm_read_edx_eax(vcpu); - u32 msr = kvm_rcx_read(vcpu); - switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) || @@ -2203,8 +2200,20 @@ fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_REENTER_GUEST; } + +fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) +{ + return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu), + kvm_read_edx_eax(vcpu)); +} EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr); +fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) +{ + return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); +} +EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr_imm); + /* * Adapt set_msr() to msr_io()'s calling convention */ -- cgit From d2dcf25a4cf2d9058a866c2237884287209b8d19 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Mon, 11 Aug 2025 19:55:09 -0700 Subject: KVM: x86: Rename kvm_{g,s}et_msr()* to show that they emulate guest accesses Rename kvm_{g,s}et_msr_with_filter() kvm_{g,s}et_msr() to kvm_emulate_msr_{read,write} __kvm_emulate_msr_{read,write} to make it more obvious that KVM uses these helpers to emulate guest behaviors, i.e., host_initiated == false in these helpers. Suggested-by: Sean Christopherson Suggested-by: Chao Gao Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Reviewed-by: Chao Gao Tested-by: Mathias Krause Tested-by: John Allen Signed-off-by: Chao Gao Tested-by: Rick Edgecombe Link: https://lore.kernel.org/r/20250812025606.74625-2-chao.gao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b47a6a4ced15..e1eff02f37c7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1933,33 +1933,33 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, __kvm_get_msr); } -int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; return kvm_get_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); +EXPORT_SYMBOL_GPL(kvm_emulate_msr_read); -int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) return KVM_MSR_RET_FILTERED; return kvm_set_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); +EXPORT_SYMBOL_GPL(kvm_emulate_msr_write); -int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { return kvm_get_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_get_msr); +EXPORT_SYMBOL_GPL(__kvm_emulate_msr_read); -int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { return kvm_set_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(kvm_set_msr); +EXPORT_SYMBOL_GPL(__kvm_emulate_msr_write); static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu) { @@ -2040,7 +2040,8 @@ static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg, u64 data; int r; - r = kvm_get_msr_with_filter(vcpu, msr, &data); + r = kvm_emulate_msr_read(vcpu, msr, &data); + if (!r) { trace_kvm_msr_read(msr, data); @@ -2080,7 +2081,7 @@ static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { int r; - r = kvm_set_msr_with_filter(vcpu, msr, data); + r = kvm_emulate_msr_write(vcpu, msr, data); if (!r) { trace_kvm_msr_write(msr, data); } else { @@ -8366,7 +8367,7 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt, struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); + r = kvm_emulate_msr_read(vcpu, msr_index, pdata); if (r < 0) return X86EMUL_UNHANDLEABLE; @@ -8389,7 +8390,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_set_msr_with_filter(vcpu, msr_index, data); + r = kvm_emulate_msr_write(vcpu, msr_index, data); if (r < 0) return X86EMUL_UNHANDLEABLE; @@ -8409,7 +8410,7 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata) { - return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); + return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); } static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc) -- cgit From db07f3d0eb19663d8fb61b40c19b26703c9a1b1a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 11 Aug 2025 19:55:10 -0700 Subject: KVM: x86: Use double-underscore read/write MSR helpers as appropriate Use the double-underscore helpers for emulating MSR reads and writes in he no-underscore versions to better capture the relationship between the two sets of APIs (the double-underscore versions don't honor userspace MSR filters). No functional change intended. Signed-off-by: Chao Gao Tested-by: Rick Edgecombe Link: https://lore.kernel.org/r/20250812025606.74625-3-chao.gao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1eff02f37c7..52ad2d2f41cb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1933,11 +1933,24 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, __kvm_get_msr); } +int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) +{ + return kvm_get_msr_ignored_check(vcpu, index, data, false); +} +EXPORT_SYMBOL_GPL(__kvm_emulate_msr_read); + +int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ + return kvm_set_msr_ignored_check(vcpu, index, data, false); +} +EXPORT_SYMBOL_GPL(__kvm_emulate_msr_write); + int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; - return kvm_get_msr_ignored_check(vcpu, index, data, false); + + return __kvm_emulate_msr_read(vcpu, index, data); } EXPORT_SYMBOL_GPL(kvm_emulate_msr_read); @@ -1945,21 +1958,11 @@ int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) return KVM_MSR_RET_FILTERED; - return kvm_set_msr_ignored_check(vcpu, index, data, false); -} -EXPORT_SYMBOL_GPL(kvm_emulate_msr_write); -int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) -{ - return kvm_get_msr_ignored_check(vcpu, index, data, false); + return __kvm_emulate_msr_write(vcpu, index, data); } -EXPORT_SYMBOL_GPL(__kvm_emulate_msr_read); +EXPORT_SYMBOL_GPL(kvm_emulate_msr_write); -int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) -{ - return kvm_set_msr_ignored_check(vcpu, index, data, false); -} -EXPORT_SYMBOL_GPL(__kvm_emulate_msr_write); static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu) { -- cgit From c2aa58b226abf5ac6d355fb1f3b7c4284a7b5cab Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Mon, 11 Aug 2025 19:55:11 -0700 Subject: KVM: x86: Add kvm_msr_{read,write}() helpers Wrap __kvm_{get,set}_msr() into two new helpers for KVM usage and use the helpers to replace existing usage of the raw functions. kvm_msr_{read,write}() are KVM-internal helpers, i.e. used when KVM needs to get/set a MSR value for emulating CPU behavior, i.e., host_initiated == %true in the helpers. Suggested-by: Sean Christopherson Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Tested-by: Mathias Krause Tested-by: John Allen Signed-off-by: Chao Gao Tested-by: Rick Edgecombe Link: https://lore.kernel.org/r/20250812025606.74625-4-chao.gao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 52ad2d2f41cb..59a431dce015 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1899,8 +1899,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, - bool host_initiated) +static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, + bool host_initiated) { struct msr_data msr; int ret; @@ -1926,6 +1926,16 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, return ret; } +int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ + return __kvm_set_msr(vcpu, index, data, true); +} + +int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) +{ + return __kvm_get_msr(vcpu, index, data, true); +} + static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated) { @@ -12472,7 +12482,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); - __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); + kvm_msr_write(vcpu, MSR_IA32_XSS, 0); } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ -- cgit From 41f6710f99f4337924e3929e8e7a51c74f800b91 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 11 Aug 2025 19:55:12 -0700 Subject: KVM: x86: Manually clear MPX state only on INIT Don't manually clear/zero MPX state on RESET, as the guest FPU state is zero allocated and KVM only does RESET during vCPU creation, i.e. the relevant state is guaranteed to be all zeroes. Opportunistically move the relevant code into a helper in anticipation of adding support for CET shadow stacks, which also has state that is zeroed on INIT. Signed-off-by: Yang Weijiang Tested-by: Mathias Krause Tested-by: John Allen Signed-off-by: Chao Gao Tested-by: Rick Edgecombe Link: https://lore.kernel.org/r/20250812025606.74625-5-chao.gao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59a431dce015..d398ee84c8f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12398,6 +12398,35 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvfree(vcpu->arch.cpuid_entries); } +static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) +{ + struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; + + /* + * Guest FPU state is zero allocated and so doesn't need to be manually + * cleared on RESET, i.e. during vCPU creation. + */ + if (!init_event || !fpstate) + return; + + /* + * On INIT, only select XSTATE components are zeroed, most components + * are unchanged. Currently, the only components that are zeroed and + * supported by KVM are MPX related. + */ + if (!kvm_mpx_supported()) + return; + + /* + * All paths that lead to INIT are required to load the guest's FPU + * state (because most paths are buried in KVM_RUN). + */ + kvm_put_guest_fpu(vcpu); + fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); + fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); + kvm_load_guest_fpu(vcpu); +} + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_cpuid_entry2 *cpuid_0x1; @@ -12455,22 +12484,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) { - struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; - - /* - * All paths that lead to INIT are required to load the guest's - * FPU state (because most paths are buried in KVM_RUN). - */ - if (init_event) - kvm_put_guest_fpu(vcpu); - - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); - - if (init_event) - kvm_load_guest_fpu(vcpu); - } + kvm_xstate_reset(vcpu, init_event); if (!init_event) { vcpu->arch.smbase = 0x30000; -- cgit From c26675447faff8c4ddc1dc5d2cd28326b8181aaf Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Mon, 11 Aug 2025 19:55:13 -0700 Subject: KVM: x86: Zero XSTATE components on INIT by iterating over supported features Tweak the code a bit to facilitate resetting more xstate components in the future, e.g., CET's xstate-managed MSRs. No functional change intended. Suggested-by: Sean Christopherson Tested-by: Mathias Krause Tested-by: John Allen Signed-off-by: Chao Gao Tested-by: Rick Edgecombe Link: https://lore.kernel.org/r/20250812025606.74625-6-chao.gao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d398ee84c8f3..8bfba7d8f750 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12401,6 +12401,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) { struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; + u64 xfeatures_mask; + int i; /* * Guest FPU state is zero allocated and so doesn't need to be manually @@ -12414,16 +12416,20 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) * are unchanged. Currently, the only components that are zeroed and * supported by KVM are MPX related. */ - if (!kvm_mpx_supported()) + xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) & + (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); + if (!xfeatures_mask) return; + BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); + /* * All paths that lead to INIT are required to load the guest's FPU * state (because most paths are buried in KVM_RUN). */ kvm_put_guest_fpu(vcpu); - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); + for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) + fpstate_clear_xstate_component(fpstate, i); kvm_load_guest_fpu(vcpu); } -- cgit From aac057dd623132a1776be37b471e30b4589fdf76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 21 Aug 2025 14:42:09 -0700 Subject: KVM: x86: Move vector_hashing into lapic.c Move the vector_hashing module param into lapic.c now that all usage is contained within the local APIC emulation code. Opportunistically drop the accessor and append "_enabled" to the variable to help capture that it's a boolean module param. No functional change intended. Link: https://lore.kernel.org/r/20250821214209.3463350-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8bfba7d8f750..bf386f2ebba3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -164,9 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444); static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, 0644); -static bool __read_mostly vector_hashing = true; -module_param(vector_hashing, bool, 0444); - bool __read_mostly enable_vmware_backdoor = false; module_param(enable_vmware_backdoor, bool, 0444); EXPORT_SYMBOL_GPL(enable_vmware_backdoor); @@ -13549,11 +13546,6 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); -bool kvm_vector_hashing_enabled(void) -{ - return vector_hashing; -} - bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { return (vcpu->arch.msr_kvm_poll_control & 1) == 0; -- cgit From b3a37bff8daf50cdd6fa9ebe4a503d4261d99796 Mon Sep 17 00:00:00 2001 From: Sagi Shahar Date: Tue, 26 Aug 2025 18:17:26 -0700 Subject: KVM: TDX: Reject fully in-kernel irqchip if EOIs are protected, i.e. for TDX VMs Reject KVM_CREATE_IRQCHIP if the VM type has protected EOIs, i.e. if KVM can't intercept EOI and thus can't faithfully emulate level-triggered interrupts that are routed through the I/O APIC. For TDX VMs, the TDX-Module owns the VMX EOI-bitmap and configures all IRQ vectors to have the CPU accelerate EOIs, i.e. doesn't allow KVM to intercept any EOIs. KVM already requires a split irqchip[1], but does so during vCPU creation, which is both too late to allow userspace to fallback to a split irqchip and a less-than-stellar experience for userspace since an -EINVAL on KVM_VCPU_CREATE is far harder to debug/triage than failure exactly on KVM_CREATE_IRQCHIP. And of course, allowing an action that ultimately fails is arguably a bug regardless of the impact on userspace. Link: https://lore.kernel.org/lkml/20250222014757.897978-11-binbin.wu@linux.intel.com [1] Link: https://lore.kernel.org/lkml/aK3vZ5HuKKeFuuM4@google.com Suggested-by: Sean Christopherson Signed-off-by: Sagi Shahar Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250827011726.2451115-1-sagis@google.com [sean: massage shortlog+changelog, relocate setting has_protected_eoi] Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bf386f2ebba3..99f2a150ca78 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6989,6 +6989,15 @@ set_identity_unlock: if (irqchip_in_kernel(kvm)) goto create_irqchip_unlock; + /* + * Disallow an in-kernel I/O APIC if the VM has protected EOIs, + * i.e. if KVM can't intercept EOIs and thus can't properly + * emulate level-triggered interrupts. + */ + r = -ENOTTY; + if (kvm->arch.has_protected_eoi) + goto create_irqchip_unlock; + r = -EINVAL; if (kvm->created_vcpus) goto create_irqchip_unlock; -- cgit From 6057497336bbfabd3a2f632bba2cd2bfbcb7b304 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Aug 2025 12:56:46 -0700 Subject: KVM: x86: Rework KVM_REQ_MSR_FILTER_CHANGED into a generic RECALC_INTERCEPTS Rework the MSR_FILTER_CHANGED request into a more generic RECALC_INTERCEPTS request, and expand the responsibilities of vendor code to recalculate all intercepts that vary based on userspace input, e.g. instruction intercepts that are tied to guest CPUID. Providing a generic recalc request will allow the upcoming mediated PMU support to trigger a recalc when PMU features, e.g. PERF_CAPABILITIES, are set by userspace, without having to make multiple calls to/from PMU code. As a bonus, using a request will effectively coalesce recalcs, e.g. will reduce the number of recalcs for normal usage from 3+ to 1 (vCPU create, set CPUID, set PERF_CAPABILITIES (Intel only), set filter). The downside is that MSR filter changes that are done in isolation will do a small amount of unnecessary work, but that's already a relatively slow path, and the cost of recalculating instruction intercepts is negligible. Tested-by: Xudong Hao Link: https://lore.kernel.org/r/20250806195706.1650976-25-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99f2a150ca78..64e08148909b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6794,7 +6794,11 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, kvm_free_msr_filter(old_filter); - kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); + /* + * Recalc MSR intercepts as userspace may want to intercept accesses to + * MSRs that KVM would otherwise pass through to the guest. + */ + kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS); return 0; } @@ -10827,13 +10831,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) kvm_check_async_pf_completion(vcpu); - /* - * Recalc MSR intercepts as userspace may want to intercept - * accesses to MSRs that KVM would otherwise pass through to - * the guest. - */ - if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) - kvm_x86_call(recalc_msr_intercepts)(vcpu); + if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu)) + kvm_x86_call(recalc_intercepts)(vcpu); if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) kvm_x86_call(update_cpu_dirty_logging)(vcpu); -- cgit From 4135a9a8ccba2b685f2301429ea765fa0f78eb89 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Sep 2025 15:32:10 -0700 Subject: KVM: SEV: Validate XCR0 provided by guest in GHCB Use __kvm_set_xcr() to propagate XCR0 changes from the GHCB to KVM's software model in order to validate the new XCR0 against KVM's view of the supported XCR0. Allowing garbage is thankfully mostly benign, as kvm_load_{guest,host}_xsave_state() bail early for vCPUs with protected state, xstate_required_size() will simply provide garbage back to the guest, and attempting to save/restore the bad value via KVM_{G,S}ET_XCRS will only harm the guest (setting XCR0 will fail). However, allowing the guest to put junk into a field that KVM assumes is valid is a CVE waiting to happen. And as a bonus, using the proper API eliminates the ugly open coding of setting arch.cpuid_dynamic_bits_dirty. Simply ignore bad values, as either the guest managed to get an unsupported value into hardware, or the guest is misbehaving and providing pure garbage. In either case, KVM can't fix the broken guest. Note, using __kvm_set_xcr() also avoids recomputing dynamic CPUID bits if XCR0 isn't actually changing (relatively to KVM's previous snapshot). Cc: Tom Lendacky Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20250919223258.1604852-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..1d7faf8bc785 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1237,7 +1237,7 @@ static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) } #endif -static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0 = xcr; u64 old_xcr0 = vcpu->arch.xcr0; @@ -1281,6 +1281,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.cpuid_dynamic_bits_dirty = true; return 0; } +EXPORT_SYMBOL_GPL(__kvm_set_xcr); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { -- cgit From 9bc366350734246301b090802fc71f9924daad39 Mon Sep 17 00:00:00 2001 From: Hou Wenlong Date: Tue, 23 Sep 2025 08:37:37 -0700 Subject: KVM: x86: Add helper to retrieve current value of user return MSR In the user return MSR support, the cached value is always the hardware value of the specific MSR. Therefore, add a helper to retrieve the cached value, which can replace the need for RDMSR, for example, to allow SEV-ES guests to restore the correct host hardware value without using RDMSR. Cc: stable@vger.kernel.org Signed-off-by: Hou Wenlong [sean: drop "cache" from the name, make it a one-liner, tag for stable] Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250923153738.1875174-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d7faf8bc785..5ac2183b9993 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -677,6 +677,12 @@ void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) } EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); +u64 kvm_get_user_return_msr(unsigned int slot) +{ + return this_cpu_ptr(user_return_msrs)->values[slot].curr; +} +EXPORT_SYMBOL_GPL(kvm_get_user_return_msr); + static void drop_user_return_notifiers(void) { struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); -- cgit From 06f2969c6a1237f05f8ba4324b6ddc2570a808d0 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:11 -0700 Subject: KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support Enable KVM_{G,S}ET_ONE_REG uAPIs so that userspace can access MSRs and other non-MSR registers through them, along with support for KVM_GET_REG_LIST to enumerate support for KVM-defined registers. This is in preparation for allowing userspace to read/write the guest SSP register, which is needed for the upcoming CET virtualization support. Currently, two types of registers are supported: KVM_X86_REG_TYPE_MSR and KVM_X86_REG_TYPE_KVM. All MSRs are in the former type; the latter type is added for registers that lack existing KVM uAPIs to access them. The "KVM" in the name is intended to be vague to give KVM flexibility to include other potential registers. More precise names like "SYNTHETIC" and "SYNTHETIC_MSR" were considered, but were deemed too confusing (e.g. can be conflated with synthetic guest-visible MSRs) and may put KVM into a corner (e.g. if KVM wants to change how a KVM-defined register is modeled internally). Enumerate only KVM-defined registers in KVM_GET_REG_LIST to avoid duplicating KVM_GET_MSR_INDEX_LIST, and so that KVM can return _only_ registers that are fully supported (KVM_GET_REG_LIST is vCPU-scoped, i.e. can be precise, whereas KVM_GET_MSR_INDEX_LIST is system-scoped). Suggested-by: Sean Christopherson Signed-off-by: Yang Weijiang Link: https://lore.kernel.org/all/20240219074733.122080-18-weijiang.yang@intel.com [1] Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20250919223258.1604852-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f718834bc00b..bc245e0b0443 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4735,6 +4735,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IRQFD_RESAMPLE: case KVM_CAP_MEMORY_FAULT_INFO: case KVM_CAP_X86_GUEST_MODE: + case KVM_CAP_ONE_REG: r = 1; break; case KVM_CAP_PRE_FAULT_MEMORY: @@ -5913,6 +5914,98 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, } } +struct kvm_x86_reg_id { + __u32 index; + __u8 type; + __u8 rsvd1; + __u8 rsvd2:4; + __u8 size:4; + __u8 x86; +}; + +static int kvm_translate_kvm_reg(struct kvm_x86_reg_id *reg) +{ + return -EINVAL; +} + +static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) +{ + u64 val; + + if (do_get_msr(vcpu, msr, &val)) + return -EINVAL; + + if (put_user(val, user_val)) + return -EFAULT; + + return 0; +} + +static int kvm_set_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) +{ + u64 val; + + if (get_user(val, user_val)) + return -EFAULT; + + if (do_set_msr(vcpu, msr, &val)) + return -EINVAL; + + return 0; +} + +static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, + void __user *argp) +{ + struct kvm_one_reg one_reg; + struct kvm_x86_reg_id *reg; + u64 __user *user_val; + int r; + + if (copy_from_user(&one_reg, argp, sizeof(one_reg))) + return -EFAULT; + + if ((one_reg.id & KVM_REG_ARCH_MASK) != KVM_REG_X86) + return -EINVAL; + + reg = (struct kvm_x86_reg_id *)&one_reg.id; + if (reg->rsvd1 || reg->rsvd2) + return -EINVAL; + + if (reg->type == KVM_X86_REG_TYPE_KVM) { + r = kvm_translate_kvm_reg(reg); + if (r) + return r; + } + + if (reg->type != KVM_X86_REG_TYPE_MSR) + return -EINVAL; + + if ((one_reg.id & KVM_REG_SIZE_MASK) != KVM_REG_SIZE_U64) + return -EINVAL; + + guard(srcu)(&vcpu->kvm->srcu); + + user_val = u64_to_user_ptr(one_reg.addr); + if (ioctl == KVM_GET_ONE_REG) + r = kvm_get_one_msr(vcpu, reg->index, user_val); + else + r = kvm_set_one_msr(vcpu, reg->index, user_val); + + return r; +} + +static int kvm_get_reg_list(struct kvm_vcpu *vcpu, + struct kvm_reg_list __user *user_list) +{ + u64 nr_regs = 0; + + if (put_user(nr_regs, &user_list->n)) + return -EFAULT; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -6029,6 +6122,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } + case KVM_GET_ONE_REG: + case KVM_SET_ONE_REG: + r = kvm_get_set_one_reg(vcpu, ioctl, argp); + break; + case KVM_GET_REG_LIST: + r = kvm_get_reg_list(vcpu, argp); + break; case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; -- cgit From c0a5f298912222f3bb4e8f5dc67c6a1f0e93d83f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Sep 2025 15:32:12 -0700 Subject: KVM: x86: Report XSS as to-be-saved if there are supported features Add MSR_IA32_XSS to list of MSRs reported to userspace if supported_xss is non-zero, i.e. KVM supports at least one XSS based feature. Before enabling CET virtualization series, guest IA32_MSR_XSS is guaranteed to be 0, i.e., XSAVES/XRSTORS is executed in non-root mode with XSS == 0, which equals to the effect of XSAVE/XRSTOR. Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Reviewed-by: Chao Gao Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Reviewed-by: Xiaoyao Li Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20250919223258.1604852-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bc245e0b0443..757878a222a7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -332,7 +332,7 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, MSR_IA32_UMWAIT_CONTROL, - MSR_IA32_XFD, MSR_IA32_XFD_ERR, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, MSR_IA32_XSS, }; static const u32 msrs_to_save_pmu[] = { @@ -7503,6 +7503,10 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR)) return; break; + case MSR_IA32_XSS: + if (!kvm_caps.supported_xss) + return; + break; default: break; } -- cgit From 338543cbe033e56dcc8c13adcdf6c228953c0829 Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Fri, 19 Sep 2025 15:32:13 -0700 Subject: KVM: x86: Check XSS validity against guest CPUIDs Maintain per-guest valid XSS bits and check XSS validity against them rather than against KVM capabilities. This is to prevent bits that are supported by KVM but not supported for a guest from being set. Opportunistically return KVM_MSR_RET_UNSUPPORTED on IA32_XSS MSR accesses if guest CPUID doesn't enumerate X86_FEATURE_XSAVES. Since KVM_MSR_RET_UNSUPPORTED takes care of host_initiated cases, drop the host_initiated check. Signed-off-by: Chao Gao Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20250919223258.1604852-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 757878a222a7..6ae12e8c9d05 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3984,15 +3984,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_XSS: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) - return 1; + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + return KVM_MSR_RET_UNSUPPORTED; /* * KVM supports exposing PT to the guest, but does not support * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than * XSAVES/XRSTORS to save/restore PT MSRs. */ - if (data & ~kvm_caps.supported_xss) + if (data & ~vcpu->arch.guest_supported_xss) return 1; vcpu->arch.ia32_xss = data; vcpu->arch.cpuid_dynamic_bits_dirty = true; -- cgit From 9622e116d0d25f1ddc47bf0328612cc7d87d20f2 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:14 -0700 Subject: KVM: x86: Refresh CPUID on write to guest MSR_IA32_XSS Update CPUID.(EAX=0DH,ECX=1).EBX to reflect current required xstate size due to XSS MSR modification. CPUID(EAX=0DH,ECX=1).EBX reports the required storage size of all enabled xstate features in (XCR0 | IA32_XSS). The CPUID value can be used by guest before allocate sufficient xsave buffer. Note, KVM does not yet support any XSS based features, i.e. supported_xss is guaranteed to be zero at this time. Opportunistically skip CPUID updates if XSS value doesn't change. Suggested-by: Sean Christopherson Co-developed-by: Zhang Yi Z Signed-off-by: Zhang Yi Z Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Reviewed-by: Chao Gao Reviewed-by: Xiaoyao Li Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20250919223258.1604852-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6ae12e8c9d05..d142cbc71aaa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3993,6 +3993,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ if (data & ~vcpu->arch.guest_supported_xss) return 1; + if (vcpu->arch.ia32_xss == data) + break; vcpu->arch.ia32_xss = data; vcpu->arch.cpuid_dynamic_bits_dirty = true; break; -- cgit From 779ed05511f2931b89fcd0087aba2fcca8890715 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:15 -0700 Subject: KVM: x86: Initialize kvm_caps.supported_xss Set original kvm_caps.supported_xss to (host_xss & KVM_SUPPORTED_XSS) if XSAVES is supported. host_xss contains the host supported xstate feature bits for thread FPU context switch, KVM_SUPPORTED_XSS includes all KVM enabled XSS feature bits, the resulting value represents the supervisor xstates that are available to guest and are backed by host FPU framework for swapping {guest,host} XSAVE-managed registers/MSRs. [sean: relocate and enhance comment about PT / XSS[8] ] Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Reviewed-by: Chao Gao Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Reviewed-by: Xiaoyao Li Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20250919223258.1604852-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d142cbc71aaa..831c5e488de3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -217,6 +217,14 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \ | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE) +/* + * Note, KVM supports exposing PT to the guest, but does not support context + * switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping + * PT via guest XSTATE would clobber perf state), i.e. KVM doesn't support + * IA32_XSS[bit 8] (guests can/must use RDMSR/WRMSR to save/restore PT MSRs). + */ +#define KVM_SUPPORTED_XSS 0 + bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); @@ -3986,11 +3994,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_XSS: if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) return KVM_MSR_RET_UNSUPPORTED; - /* - * KVM supports exposing PT to the guest, but does not support - * IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than - * XSAVES/XRSTORS to save/restore PT MSRs. - */ + if (data & ~vcpu->arch.guest_supported_xss) return 1; if (vcpu->arch.ia32_xss == data) @@ -9822,14 +9826,17 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0; } + + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + rdmsrq(MSR_IA32_XSS, kvm_host.xss); + kvm_caps.supported_xss = kvm_host.xss & KVM_SUPPORTED_XSS; + } + kvm_caps.supported_quirks = KVM_X86_VALID_QUIRKS; kvm_caps.inapplicable_quirks = KVM_X86_CONDITIONAL_QUIRKS; rdmsrq_safe(MSR_EFER, &kvm_host.efer); - if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrq(MSR_IA32_XSS, kvm_host.xss); - kvm_init_pmu_capability(ops->pmu_ops); if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) -- cgit From e44eb58334bbc28d790ed2851385cc86ea76dc12 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Sep 2025 15:32:16 -0700 Subject: KVM: x86: Load guest FPU state when access XSAVE-managed MSRs Load the guest's FPU state if userspace is accessing MSRs whose values are managed by XSAVES. Introduce two helpers, kvm_{get,set}_xstate_msr(), to facilitate access to such kind of MSRs. If MSRs supported in kvm_caps.supported_xss are passed through to guest, the guest MSRs are swapped with host's before vCPU exits to userspace and after it reenters kernel before next VM-entry. Because the modified code is also used for the KVM_GET_MSRS device ioctl(), explicitly check @vcpu is non-null before attempting to load guest state. The XSAVE-managed MSRs cannot be retrieved via the device ioctl() without loading guest FPU state (which doesn't exist). Note that guest_cpuid_has() is not queried as host userspace is allowed to access MSRs that have not been exposed to the guest, e.g. it might do KVM_SET_MSRS prior to KVM_SET_CPUID2. The two helpers are put here in order to manifest accessing xsave-managed MSRs requires special check and handling to guarantee the correctness of read/write to the MSRs. Co-developed-by: Yang Weijiang Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao [sean: drop S_CET, add big comment, move accessors to x86.c] Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Reviewed-by: Xin Li (Intel) Link: https://lore.kernel.org/r/20250919223258.1604852-10-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 831c5e488de3..c2e11f3d50fb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -136,6 +136,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static DEFINE_MUTEX(vendor_module_lock); +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); + struct kvm_x86_ops kvm_x86_ops __read_mostly; #define KVM_X86_OP(func) \ @@ -3801,6 +3804,67 @@ static void record_steal_time(struct kvm_vcpu *vcpu) mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } +/* + * Returns true if the MSR in question is managed via XSTATE, i.e. is context + * switched with the rest of guest FPU state. Note! S_CET is _not_ context + * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. + * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, + * the value saved/restored via XSTATE is always the host's value. That detail + * is _extremely_ important, as the guest's S_CET must _never_ be resident in + * hardware while executing in the host. Loading guest values for U_CET and + * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to + * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower + * privilege levels, i.e. are effectively only consumed by userspace as well. + */ +static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) +{ + if (!vcpu) + return false; + + switch (msr) { + case MSR_IA32_U_CET: + return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) || + guest_cpu_cap_has(vcpu, X86_FEATURE_IBT); + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK); + default: + return false; + } +} + +/* + * Lock (and if necessary, re-load) the guest FPU, i.e. XSTATE, and access an + * MSR that is managed via XSTATE. Note, the caller is responsible for doing + * the initial FPU load, this helper only ensures that guest state is resident + * in hardware (the kernel can load its FPU state in IRQ context). + */ +static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, + struct msr_data *msr_info, + int access) +{ + BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W); + + KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm); + KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm); + + kvm_fpu_get(); + if (access == MSR_TYPE_R) + rdmsrq(msr_info->index, msr_info->data); + else + wrmsrq(msr_info->index, msr_info->data); + kvm_fpu_put(); +} + +static __maybe_unused void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W); +} + +static __maybe_unused void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +{ + kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R); +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr = msr_info->index; @@ -4551,11 +4615,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { + bool fpu_loaded = false; int i; - for (i = 0; i < msrs->nmsrs; ++i) + for (i = 0; i < msrs->nmsrs; ++i) { + /* + * If userspace is accessing one or more XSTATE-managed MSRs, + * temporarily load the guest's FPU state so that the guest's + * MSR value(s) is resident in hardware and thus can be accessed + * via RDMSR/WRMSR. + */ + if (!fpu_loaded && is_xstate_managed_msr(vcpu, entries[i].index)) { + kvm_load_guest_fpu(vcpu); + fpu_loaded = true; + } if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; + } + if (fpu_loaded) + kvm_put_guest_fpu(vcpu); return i; } @@ -5965,6 +6043,7 @@ static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, struct kvm_one_reg one_reg; struct kvm_x86_reg_id *reg; u64 __user *user_val; + bool load_fpu; int r; if (copy_from_user(&one_reg, argp, sizeof(one_reg))) @@ -5991,12 +6070,18 @@ static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, guard(srcu)(&vcpu->kvm->srcu); + load_fpu = is_xstate_managed_msr(vcpu, reg->index); + if (load_fpu) + kvm_load_guest_fpu(vcpu); + user_val = u64_to_user_ptr(one_reg.addr); if (ioctl == KVM_GET_ONE_REG) r = kvm_get_one_msr(vcpu, reg->index, user_val); else r = kvm_set_one_msr(vcpu, reg->index, user_val); + if (load_fpu) + kvm_put_guest_fpu(vcpu); return r; } -- cgit From 586ef9dcbb28fc0bc6beb496e6dc8a54276e7a32 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:17 -0700 Subject: KVM: x86: Add fault checks for guest CR4.CET setting Check potential faults for CR4.CET setting per Intel SDM requirements. CET can be enabled if and only if CR0.WP == 1, i.e. setting CR4.CET == 1 faults if CR0.WP == 0 and setting CR0.WP == 0 fails if CR4.CET == 1. Signed-off-by: Yang Weijiang Reviewed-by: Chao Gao Reviewed-by: Maxim Levitsky Reviewed-by: Xiaoyao Li Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20250919223258.1604852-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c2e11f3d50fb..6d67c969e18a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1176,6 +1176,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) (is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE))) return 1; + if (!(cr0 & X86_CR0_WP) && kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) + return 1; + kvm_x86_call(set_cr0)(vcpu, cr0); kvm_post_set_cr0(vcpu, old_cr0, cr0); @@ -1376,6 +1379,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } + if ((cr4 & X86_CR4_CET) && !kvm_is_cr0_bit_set(vcpu, X86_CR0_WP)) + return 1; + kvm_x86_call(set_cr4)(vcpu, cr4); kvm_post_set_cr4(vcpu, old_cr4, cr4); -- cgit From 6a11c860d8a4aa1c7351246bd1ee7b41f26399b6 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:18 -0700 Subject: KVM: x86: Report KVM supported CET MSRs as to-be-saved Add CET MSRs to the list of MSRs reported to userspace if the feature, i.e. IBT or SHSTK, associated with the MSRs is supported by KVM. Suggested-by: Chao Gao Signed-off-by: Yang Weijiang Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250919223258.1604852-12-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d67c969e18a..5f23d2d2731d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -344,6 +344,10 @@ static const u32 msrs_to_save_base[] = { MSR_IA32_UMWAIT_CONTROL, MSR_IA32_XFD, MSR_IA32_XFD_ERR, MSR_IA32_XSS, + + MSR_IA32_U_CET, MSR_IA32_S_CET, + MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, + MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB, }; static const u32 msrs_to_save_pmu[] = { @@ -7603,6 +7607,20 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_caps.supported_xss) return; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + return; + break; + case MSR_IA32_INT_SSP_TAB: + if (!kvm_cpu_cap_has(X86_FEATURE_LM)) + return; + fallthrough; + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK)) + return; + break; default: break; } -- cgit From 9d6812d415358372aaaf1dfe95bc30d11e4e95db Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:20 -0700 Subject: KVM: x86: Enable guest SSP read/write interface with new uAPIs Add a KVM-defined ONE_REG register, KVM_REG_GUEST_SSP, to let userspace save and restore the guest's Shadow Stack Pointer (SSP). On both Intel and AMD, SSP is a hardware register that can only be accessed by software via dedicated ISA (e.g. RDSSP) or via VMCS/VMCB fields (used by hardware to context switch SSP at entry/exit). As a result, SSP doesn't fit in any of KVM's existing interfaces for saving/restoring state. Internally, treat SSP as a fake/synthetic MSR, as the semantics of writes to SSP follow that of several other Shadow Stack MSRs, e.g. the PLx_SSP MSRs. Use a translation layer to hide the KVM-internal MSR index so that the arbitrary index doesn't become ABI, e.g. so that KVM can rework its implementation as needed, so long as the ONE_REG ABI is maintained. Explicitly reject accesses to SSP if the vCPU doesn't have Shadow Stack support to avoid running afoul of ignore_msrs, which unfortunately applies to host-initiated accesses (which is a discussion for another day). I.e. ensure consistent behavior for KVM-defined registers irrespective of ignore_msrs. Link: https://lore.kernel.org/all/aca9d389-f11e-4811-90cf-d98e345a5cc2@intel.com Suggested-by: Sean Christopherson Signed-off-by: Yang Weijiang Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250919223258.1604852-14-seanjc@google.com Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f23d2d2731d..d85bb723f25a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6016,9 +6016,27 @@ struct kvm_x86_reg_id { __u8 x86; }; -static int kvm_translate_kvm_reg(struct kvm_x86_reg_id *reg) +static int kvm_translate_kvm_reg(struct kvm_vcpu *vcpu, + struct kvm_x86_reg_id *reg) { - return -EINVAL; + switch (reg->index) { + case KVM_REG_GUEST_SSP: + /* + * FIXME: If host-initiated accesses are ever exempted from + * ignore_msrs (in kvm_do_msr_access()), drop this manual check + * and rely on KVM's standard checks to reject accesses to regs + * that don't exist. + */ + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return -EINVAL; + + reg->type = KVM_X86_REG_TYPE_MSR; + reg->index = MSR_KVM_INTERNAL_GUEST_SSP; + break; + default: + return -EINVAL; + } + return 0; } static int kvm_get_one_msr(struct kvm_vcpu *vcpu, u32 msr, u64 __user *user_val) @@ -6067,7 +6085,7 @@ static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, return -EINVAL; if (reg->type == KVM_X86_REG_TYPE_KVM) { - r = kvm_translate_kvm_reg(reg); + r = kvm_translate_kvm_reg(vcpu, reg); if (r) return r; } @@ -6098,11 +6116,22 @@ static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl, static int kvm_get_reg_list(struct kvm_vcpu *vcpu, struct kvm_reg_list __user *user_list) { - u64 nr_regs = 0; + u64 nr_regs = guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ? 1 : 0; + u64 user_nr_regs; + + if (get_user(user_nr_regs, &user_list->n)) + return -EFAULT; if (put_user(nr_regs, &user_list->n)) return -EFAULT; + if (user_nr_regs < nr_regs) + return -E2BIG; + + if (nr_regs && + put_user(KVM_X86_REG_KVM(KVM_REG_GUEST_SSP), &user_list->reg[0])) + return -EFAULT; + return 0; } -- cgit From 8b59d0275c964bcbe573dde46bd3c1f20ed2d2bd Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:21 -0700 Subject: KVM: VMX: Emulate read and write to CET MSRs Add emulation interface for CET MSR access. The emulation code is split into common part and vendor specific part. The former does common checks for MSRs, e.g., accessibility, data validity etc., then passes operation to either XSAVE-managed MSRs via the helpers or CET VMCS fields. SSP can only be read via RDSSP. Writing even requires destructive and potentially faulting operations such as SAVEPREVSSP/RSTORSSP or SETSSBSY/CLRSSBSY. Let the host use a pseudo-MSR that is just a wrapper for the GUEST_SSP field of the VMCS. Suggested-by: Sean Christopherson Signed-off-by: Yang Weijiang Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao [sean: drop call to kvm_set_xstate_msr() for S_CET, consolidate code] Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250919223258.1604852-15-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d85bb723f25a..54d280fe9a4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1890,6 +1890,44 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, data = (u32)data; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT)) + return KVM_MSR_RET_UNSUPPORTED; + if (!kvm_is_valid_u_s_cet(vcpu, data)) + return 1; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + if (!host_initiated) + return 1; + fallthrough; + /* + * Note that the MSR emulation here is flawed when a vCPU + * doesn't support the Intel 64 architecture. The expected + * architectural behavior in this case is that the upper 32 + * bits do not exist and should always read '0'. However, + * because the actual hardware on which the virtual CPU is + * running does support Intel 64, XRSTORS/XSAVES in the + * guest could observe behavior that violates the + * architecture. Intercepting XRSTORS/XSAVES for this + * special case isn't deemed worthwhile. + */ + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return KVM_MSR_RET_UNSUPPORTED; + /* + * MSR_IA32_INT_SSP_TAB is not present on processors that do + * not support Intel 64 architecture. + */ + if (index == MSR_IA32_INT_SSP_TAB && !guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) + return KVM_MSR_RET_UNSUPPORTED; + if (is_noncanonical_msr_address(data, vcpu)) + return 1; + /* All SSP MSRs except MSR_IA32_INT_SSP_TAB must be 4-byte aligned */ + if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4)) + return 1; + break; } msr.data = data; @@ -1934,6 +1972,20 @@ static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) return 1; break; + case MSR_IA32_U_CET: + case MSR_IA32_S_CET: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT)) + return KVM_MSR_RET_UNSUPPORTED; + break; + case MSR_KVM_INTERNAL_GUEST_SSP: + if (!host_initiated) + return 1; + fallthrough; + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB: + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) + return KVM_MSR_RET_UNSUPPORTED; + break; } msr.index = index; @@ -3865,12 +3917,12 @@ static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, kvm_fpu_put(); } -static __maybe_unused void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +static void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W); } -static __maybe_unused void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +static void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R); } @@ -4256,6 +4308,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.guest_fpu.xfd_err = data; break; #endif + case MSR_IA32_U_CET: + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + kvm_set_xstate_msr(vcpu, msr_info); + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); @@ -4605,6 +4661,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.guest_fpu.xfd_err; break; #endif + case MSR_IA32_U_CET: + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP: + kvm_get_xstate_msr(vcpu, msr_info); + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); -- cgit From 584ba3ffb9843fd12d3b4a33cfe056e2264392a0 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:24 -0700 Subject: KVM: VMX: Set host constant supervisor states to VMCS fields Save constant values to HOST_{S_CET,SSP,INTR_SSP_TABLE} field explicitly. Kernel IBT is supported and the setting in MSR_IA32_S_CET is static after post-boot(The exception is BIOS call case but vCPU thread never across it) and KVM doesn't need to refresh HOST_S_CET field before every VM-Enter/ VM-Exit sequence. Host supervisor shadow stack is not enabled now and SSP is not accessible to kernel mode, thus it's safe to set host IA32_INT_SSP_TAB/SSP VMCS field to 0s. When shadow stack is enabled for CPL3, SSP is reloaded from PL3_SSP before it exits to userspace. Check SDM Vol 2A/B Chapter 3/4 for SYSCALL/ SYSRET/SYSENTER SYSEXIT/RDSSP/CALL etc. Prevent KVM module loading if host supervisor shadow stack SHSTK_EN is set in MSR_IA32_S_CET as KVM cannot co-exit with it correctly. Suggested-by: Sean Christopherson Suggested-by: Chao Gao Signed-off-by: Yang Weijiang Reviewed-by: Maxim Levitsky Reviewed-by: Chao Gao Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao [sean: snapshot host S_CET if SHSTK *or* IBT is supported] Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20250919223258.1604852-18-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54d280fe9a4b..0050509a7de2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9997,6 +9997,18 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) return -EIO; } + if (boot_cpu_has(X86_FEATURE_SHSTK) || boot_cpu_has(X86_FEATURE_IBT)) { + rdmsrq(MSR_IA32_S_CET, kvm_host.s_cet); + /* + * Linux doesn't yet support supervisor shadow stacks (SSS), so + * KVM doesn't save/restore the associated MSRs, i.e. KVM may + * clobber the host values. Yell and refuse to load if SSS is + * unexpectedly enabled, e.g. to avoid crashing the host. + */ + if (WARN_ON_ONCE(kvm_host.s_cet & CET_SHSTK_EN)) + return -EIO; + } + memset(&kvm_caps, 0, sizeof(kvm_caps)); x86_emulator_cache = kvm_alloc_emulator_cache(); -- cgit From 82c0ec02825814e1ef332d635d3441b07c05b1c9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Sep 2025 15:32:26 -0700 Subject: KVM: x86: Don't emulate task switches when IBT or SHSTK is enabled Exit to userspace with KVM_INTERNAL_ERROR_EMULATION if the guest triggers task switch emulation with Indirect Branch Tracking or Shadow Stacks enabled, as attempting to do the right thing would require non-trivial effort and complexity, KVM doesn't support emulating CET generally, and it's extremely unlikely that any guest will do task switches while also utilizing CET. Defer taking on the complexity until someone cares enough to put in the time and effort to add support. Per the SDM: If shadow stack is enabled, then the SSP of the task is located at the 4 bytes at offset 104 in the 32-bit TSS and is used by the processor to establish the SSP when a task switch occurs from a task associated with this TSS. Note that the processor does not write the SSP of the task initiating the task switch to the TSS of that task, and instead the SSP of the previous task is pushed onto the shadow stack of the new task. Note, per the SDM's pseudocode on TASK SWITCHING, IBT state for the new privilege level is updated. To keep things simple, check both S_CET and U_CET (again, anyone that wants more precise checking can have the honor of implementing support). Reported-by: Binbin Wu Closes: https://lore.kernel.org/all/819bd98b-2a60-4107-8e13-41f1e4c706b1@linux.intel.com Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250919223258.1604852-20-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0050509a7de2..31aaff9db083 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12175,6 +12175,25 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; int ret; + if (kvm_is_cr4_bit_set(vcpu, X86_CR4_CET)) { + u64 u_cet, s_cet; + + /* + * Check both User and Supervisor on task switches as inter- + * privilege level task switches are impacted by CET at both + * the current privilege level and the new privilege level, and + * that information is not known at this time. The expectation + * is that the guest won't require emulation of task switches + * while using IBT or Shadow Stacks. + */ + if (__kvm_emulate_msr_read(vcpu, MSR_IA32_U_CET, &u_cet) || + __kvm_emulate_msr_read(vcpu, MSR_IA32_S_CET, &s_cet)) + goto unhandled_task_switch; + + if ((u_cet | s_cet) & (CET_ENDBR_EN | CET_SHSTK_EN)) + goto unhandled_task_switch; + } + init_emulate_ctxt(vcpu); ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, @@ -12184,17 +12203,19 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, * Report an error userspace if MMIO is needed, as KVM doesn't support * MMIO during a task switch (or any other complex operation). */ - if (ret || vcpu->mmio_needed) { - vcpu->mmio_needed = false; - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + if (ret || vcpu->mmio_needed) + goto unhandled_task_switch; kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); return 1; + +unhandled_task_switch: + vcpu->mmio_needed = false; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; } EXPORT_SYMBOL_GPL(kvm_task_switch); -- cgit From d4c03f63957c66bc95f5f33052f8b4be804631c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Sep 2025 15:32:27 -0700 Subject: KVM: x86: Emulate SSP[63:32]!=0 #GP(0) for FAR JMP to 32-bit mode Emulate the Shadow Stack restriction that the current SSP must be a 32-bit value on a FAR JMP from 64-bit mode to compatibility mode. From the SDM's pseudocode for FAR JMP: IF ShadowStackEnabled(CPL) IF (IA32_EFER.LMA and DEST(segment selector).L) = 0 (* If target is legacy or compatibility mode then the SSP must be in low 4GB *) IF (SSP & 0xFFFFFFFF00000000 != 0); THEN #GP(0); FI; FI; FI; Note, only the current CPL needs to be considered, as FAR JMP can't be used for inter-privilege level transfers, and KVM rejects emulation of all other far branch instructions when Shadow Stacks are enabled. To give the emulator access to GUEST_SSP, special case handling MSR_KVM_INTERNAL_GUEST_SSP in emulator_get_msr() to treat the access as a host access (KVM doesn't allow guest accesses to internal "MSRs"). The ->get_msr() API is only used for implicit accesses from the emulator, i.e. is only used with hardcoded MSR indices, and so any access to MSR_KVM_INTERNAL_GUEST_SSP is guaranteed to be from KVM, i.e. not from the guest via RDMSR. Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250919223258.1604852-21-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 31aaff9db083..0a4e58dddf36 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8741,6 +8741,15 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata) { + /* + * Treat emulator accesses to the current shadow stack pointer as host- + * initiated, as they aren't true MSR accesses (SSP is a "just a reg"), + * and this API is used only for implicit accesses, i.e. not RDMSR, and + * so the index is fully KVM-controlled. + */ + if (unlikely(msr_index == MSR_KVM_INTERNAL_GUEST_SSP)) + return kvm_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); + return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); } -- cgit From 69cc3e886582891f9c4d5830f18a2664a7f7cf7c Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 19 Sep 2025 15:32:32 -0700 Subject: KVM: x86: Add XSS support for CET_KERNEL and CET_USER Add CET_KERNEL and CET_USER to KVM's set of supported XSS bits when IBT *or* SHSTK is supported. Like CR4.CET, XFEATURE support for IBT and SHSTK are bundle together under the CET umbrella, and thus prone to virtualization holes if KVM or the guest supports only one of IBT or SHSTK, but hardware supports both. However, again like CR4.CET, such virtualization holes are benign from the host's perspective so long as KVM takes care to always honor the "or" logic. Require CET_KERNEL and CET_USER to come as a pair, and refuse to support IBT or SHSTK if one (or both) features is missing, as the (host) kernel expects them to come as a pair, i.e. may get confused and corrupt state if only one of CET_KERNEL or CET_USER is supported. Signed-off-by: Yang Weijiang Signed-off-by: Mathias Krause Tested-by: Mathias Krause Tested-by: John Allen Tested-by: Rick Edgecombe Signed-off-by: Chao Gao [sean: split to separate patch, write changelog, add XFEATURE_MASK_CET_ALL] Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250919223258.1604852-26-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a4e58dddf36..8b4c69330a87 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -220,13 +220,14 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \ | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE) +#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL) /* * Note, KVM supports exposing PT to the guest, but does not support context * switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping * PT via guest XSTATE would clobber perf state), i.e. KVM doesn't support * IA32_XSS[bit 8] (guests can/must use RDMSR/WRMSR to save/restore PT MSRs). */ -#define KVM_SUPPORTED_XSS 0 +#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_ALL) bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); @@ -10104,6 +10105,16 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) kvm_caps.supported_xss = 0; + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) && + !kvm_cpu_cap_has(X86_FEATURE_IBT)) + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + + if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) { + kvm_cpu_cap_clear(X86_FEATURE_SHSTK); + kvm_cpu_cap_clear(X86_FEATURE_IBT); + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL; + } + if (kvm_caps.has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -12772,10 +12783,11 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) /* * On INIT, only select XSTATE components are zeroed, most components * are unchanged. Currently, the only components that are zeroed and - * supported by KVM are MPX related. + * supported by KVM are MPX and CET related. */ xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) & - (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); + (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR | + XFEATURE_MASK_CET_ALL); if (!xfeatures_mask) return; -- cgit From 6560468305da263c35ff51cde1dd74d6a228b286 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 18 Sep 2025 17:33:02 -0700 Subject: KVM: x86: Drop pointless exports of kvm_arch_xxx() hooks Drop the exporting of several kvm_arch_xxx() hooks that are only called from arch-neutral code, i.e. that are only called from kvm.ko. Link: https://lore.kernel.org/r/20250919003303.1355064-5-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5542b8d83602..a618a30423a7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13922,14 +13922,12 @@ void kvm_arch_register_noncoherent_dma(struct kvm *kvm) if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1) kvm_noncoherent_dma_assignment_start_or_stop(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) { if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count)) kvm_noncoherent_dma_assignment_start_or_stop(kvm); } -EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) { @@ -13941,7 +13939,6 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { return (vcpu->arch.msr_kvm_poll_control & 1) == 0; } -EXPORT_SYMBOL_GPL(kvm_arch_no_poll); #ifdef CONFIG_KVM_GUEST_MEMFD /* -- cgit From 6b36119b94d0b2bb8cea9d512017efafd461d6ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 18 Sep 2025 17:33:03 -0700 Subject: KVM: x86: Export KVM-internal symbols for sub-modules only Rework almost all of KVM x86's exports to expose symbols only to KVM's vendor modules, i.e. to kvm-{amd,intel}.ko. Keep the generic exports that are guarded by CONFIG_KVM_EXTERNAL_WRITE_TRACKING=y, as they're explicitly designed/intended for external usage. Link: https://lore.kernel.org/r/20250919003303.1355064-6-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 220 ++++++++++++++++++++++++++--------------------------- 1 file changed, 110 insertions(+), 110 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a618a30423a7..4b8138bd4857 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -97,10 +97,10 @@ * vendor module being reloaded with different module parameters. */ struct kvm_caps kvm_caps __read_mostly; -EXPORT_SYMBOL_GPL(kvm_caps); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_caps); struct kvm_host_values kvm_host __read_mostly; -EXPORT_SYMBOL_GPL(kvm_host); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_host); #define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) @@ -155,7 +155,7 @@ module_param(ignore_msrs, bool, 0644); bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, 0644); -EXPORT_SYMBOL_GPL(report_ignored_msrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(report_ignored_msrs); unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, 0644); @@ -169,7 +169,7 @@ module_param(tsc_tolerance_ppm, uint, 0644); bool __read_mostly enable_vmware_backdoor = false; module_param(enable_vmware_backdoor, bool, 0444); -EXPORT_SYMBOL_GPL(enable_vmware_backdoor); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_vmware_backdoor); /* * Flags to manipulate forced emulation behavior (any non-zero value will @@ -184,7 +184,7 @@ module_param(pi_inject_timer, bint, 0644); /* Enable/disable PMU virtualization */ bool __read_mostly enable_pmu = true; -EXPORT_SYMBOL_GPL(enable_pmu); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_pmu); module_param(enable_pmu, bool, 0444); bool __read_mostly eager_page_split = true; @@ -211,7 +211,7 @@ struct kvm_user_return_msrs { }; u32 __read_mostly kvm_nr_uret_msrs; -EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_nr_uret_msrs); static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; static struct kvm_user_return_msrs __percpu *user_return_msrs; @@ -230,16 +230,16 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs; #define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_ALL) bool __read_mostly allow_smaller_maxphyaddr = 0; -EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(allow_smaller_maxphyaddr); bool __read_mostly enable_apicv = true; -EXPORT_SYMBOL_GPL(enable_apicv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_apicv); bool __read_mostly enable_ipiv = true; -EXPORT_SYMBOL_GPL(enable_ipiv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_ipiv); bool __read_mostly enable_device_posted_irqs = true; -EXPORT_SYMBOL_GPL(enable_device_posted_irqs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_device_posted_irqs); const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS(), @@ -628,7 +628,7 @@ int kvm_add_user_return_msr(u32 msr) kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr; return kvm_nr_uret_msrs++; } -EXPORT_SYMBOL_GPL(kvm_add_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_add_user_return_msr); int kvm_find_user_return_msr(u32 msr) { @@ -640,7 +640,7 @@ int kvm_find_user_return_msr(u32 msr) } return -1; } -EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_find_user_return_msr); static void kvm_user_return_msr_cpu_online(void) { @@ -680,7 +680,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) kvm_user_return_register_notifier(msrs); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_user_return_msr); void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) { @@ -689,13 +689,13 @@ void kvm_user_return_msr_update_cache(unsigned int slot, u64 value) msrs->values[slot].curr = value; kvm_user_return_register_notifier(msrs); } -EXPORT_SYMBOL_GPL(kvm_user_return_msr_update_cache); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_user_return_msr_update_cache); u64 kvm_get_user_return_msr(unsigned int slot) { return this_cpu_ptr(user_return_msrs)->values[slot].curr; } -EXPORT_SYMBOL_GPL(kvm_get_user_return_msr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_user_return_msr); static void drop_user_return_notifiers(void) { @@ -717,7 +717,7 @@ noinstr void kvm_spurious_fault(void) /* Fault while not rebooting. We want the trace. */ BUG_ON(!kvm_rebooting); } -EXPORT_SYMBOL_GPL(kvm_spurious_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spurious_fault); #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 @@ -822,7 +822,7 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, ex->has_payload = false; ex->payload = 0; } -EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_deliver_exception_payload); static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector, bool has_error_code, u32 error_code, @@ -906,7 +906,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) { kvm_multiple_exception(vcpu, nr, false, 0, false, 0); } -EXPORT_SYMBOL_GPL(kvm_queue_exception); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, @@ -914,7 +914,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, { kvm_multiple_exception(vcpu, nr, false, 0, true, payload); } -EXPORT_SYMBOL_GPL(kvm_queue_exception_p); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_p); static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code, unsigned long payload) @@ -949,7 +949,7 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, vcpu->arch.exception.has_payload = false; vcpu->arch.exception.payload = 0; } -EXPORT_SYMBOL_GPL(kvm_requeue_exception); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_requeue_exception); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) { @@ -960,7 +960,7 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) return 1; } -EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_complete_insn_gp); static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err) { @@ -1010,7 +1010,7 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, fault_mmu->inject_page_fault(vcpu, fault); } -EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault); void kvm_inject_nmi(struct kvm_vcpu *vcpu) { @@ -1022,7 +1022,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { kvm_multiple_exception(vcpu, nr, true, error_code, false, 0); } -EXPORT_SYMBOL_GPL(kvm_queue_exception_e); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e); /* * Checks if cpl <= required_cpl; if true, return true. Otherwise queue @@ -1044,7 +1044,7 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) kvm_queue_exception(vcpu, UD_VECTOR); return false; } -EXPORT_SYMBOL_GPL(kvm_require_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr); static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu) { @@ -1099,7 +1099,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) return 1; } -EXPORT_SYMBOL_GPL(load_pdptrs); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(load_pdptrs); static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { @@ -1152,7 +1152,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS) kvm_mmu_reset_context(vcpu); } -EXPORT_SYMBOL_GPL(kvm_post_set_cr0); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr0); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { @@ -1196,13 +1196,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr0); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr0); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); } -EXPORT_SYMBOL_GPL(kvm_lmsw); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lmsw); void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { @@ -1225,7 +1225,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) wrpkru(vcpu->arch.pkru); } -EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { @@ -1251,7 +1251,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } } -EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state); #ifdef CONFIG_X86_64 static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) @@ -1304,7 +1304,7 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.cpuid_dynamic_bits_dirty = true; return 0; } -EXPORT_SYMBOL_GPL(__kvm_set_xcr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_set_xcr); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { @@ -1317,7 +1317,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_xsetbv); static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1365,7 +1365,7 @@ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned lon kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } -EXPORT_SYMBOL_GPL(kvm_post_set_cr4); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_post_set_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1399,7 +1399,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr4); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr4); static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) { @@ -1491,7 +1491,7 @@ handle_tlb_flush: return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr3); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr3); int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { @@ -1503,7 +1503,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) vcpu->arch.cr8 = cr8; return 0; } -EXPORT_SYMBOL_GPL(kvm_set_cr8); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cr8); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { @@ -1512,7 +1512,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) else return vcpu->arch.cr8; } -EXPORT_SYMBOL_GPL(kvm_get_cr8); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_cr8); static void kvm_update_dr0123(struct kvm_vcpu *vcpu) { @@ -1537,7 +1537,7 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu) if (dr7 & DR7_BP_EN_MASK) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } -EXPORT_SYMBOL_GPL(kvm_update_dr7); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_update_dr7); static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { @@ -1578,7 +1578,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) return 0; } -EXPORT_SYMBOL_GPL(kvm_set_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_dr); unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) { @@ -1595,7 +1595,7 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) return vcpu->arch.dr7; } } -EXPORT_SYMBOL_GPL(kvm_get_dr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr); int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) { @@ -1611,7 +1611,7 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) kvm_rdx_write(vcpu, data >> 32); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdpmc); /* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM @@ -1750,7 +1750,7 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) return __kvm_valid_efer(vcpu, efer); } -EXPORT_SYMBOL_GPL(kvm_valid_efer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_valid_efer); static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { @@ -1793,7 +1793,7 @@ void kvm_enable_efer_bits(u64 mask) { efer_reserved_bits &= ~mask; } -EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_enable_efer_bits); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) { @@ -1836,7 +1836,7 @@ out: return allowed; } -EXPORT_SYMBOL_GPL(kvm_msr_allowed); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_msr_allowed); /* * Write @data into the MSR specified by @index. Select MSR specific fault @@ -2025,13 +2025,13 @@ int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { return kvm_get_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(__kvm_emulate_msr_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_read); int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { return kvm_set_msr_ignored_check(vcpu, index, data, false); } -EXPORT_SYMBOL_GPL(__kvm_emulate_msr_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_emulate_msr_write); int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) { @@ -2040,7 +2040,7 @@ int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) return __kvm_emulate_msr_read(vcpu, index, data); } -EXPORT_SYMBOL_GPL(kvm_emulate_msr_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_read); int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) { @@ -2049,7 +2049,7 @@ int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) return __kvm_emulate_msr_write(vcpu, index, data); } -EXPORT_SYMBOL_GPL(kvm_emulate_msr_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_msr_write); static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu) @@ -2158,7 +2158,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1, complete_fast_rdmsr); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr); int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) { @@ -2166,7 +2166,7 @@ int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm); } -EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_rdmsr_imm); static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { @@ -2194,13 +2194,13 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr); int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) { return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); } -EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wrmsr_imm); int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { @@ -2212,7 +2212,7 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu) /* Treat an INVD instruction as a NOP and just skip it. */ return kvm_emulate_as_nop(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_invd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_invd); fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) { @@ -2221,14 +2221,14 @@ fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_REENTER_GUEST; } -EXPORT_SYMBOL_GPL(handle_fastpath_invd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_invd); int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -EXPORT_SYMBOL_GPL(kvm_handle_invalid_op); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invalid_op); static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn) @@ -2254,13 +2254,13 @@ int kvm_emulate_mwait(struct kvm_vcpu *vcpu) { return kvm_emulate_monitor_mwait(vcpu, "MWAIT"); } -EXPORT_SYMBOL_GPL(kvm_emulate_mwait); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_mwait); int kvm_emulate_monitor(struct kvm_vcpu *vcpu) { return kvm_emulate_monitor_mwait(vcpu, "MONITOR"); } -EXPORT_SYMBOL_GPL(kvm_emulate_monitor); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_monitor); static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { @@ -2298,13 +2298,13 @@ fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu)); } -EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr); fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) { return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); } -EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr_imm); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_wrmsr_imm); /* * Adapt set_msr() to msr_io()'s calling convention @@ -2670,7 +2670,7 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(host_tsc, vcpu->arch.l1_tsc_scaling_ratio); } -EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_l1_tsc); u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier) { @@ -2685,7 +2685,7 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier) nested_offset += l2_offset; return nested_offset; } -EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_offset); u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier) { @@ -2695,7 +2695,7 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier) return l1_multiplier; } -EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_calc_nested_tsc_multiplier); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset) { @@ -3773,7 +3773,7 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) kvm_vcpu_flush_tlb_guest(vcpu); } -EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_service_local_tlb_flush_requests); static void record_steal_time(struct kvm_vcpu *vcpu) { @@ -4327,7 +4327,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } return 0; } -EXPORT_SYMBOL_GPL(kvm_set_msr_common); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_msr_common); static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { @@ -4680,7 +4680,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } return 0; } -EXPORT_SYMBOL_GPL(kvm_get_msr_common); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_msr_common); /* * Read or write a bunch of msrs. All parameters are kernel addresses. @@ -7836,7 +7836,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u64 access = (kvm_x86_call(get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } -EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_read); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) @@ -7847,7 +7847,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, access |= PFERR_WRITE_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } -EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_gva_to_gpa_write); /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, @@ -7933,7 +7933,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -EXPORT_SYMBOL_GPL(kvm_read_guest_virt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_read_guest_virt); static int emulator_read_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, @@ -8005,7 +8005,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, PFERR_WRITE_MASK, exception); } -EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system); static int kvm_check_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len) @@ -8039,7 +8039,7 @@ int handle_ud(struct kvm_vcpu *vcpu) return kvm_emulate_instruction(vcpu, emul_type); } -EXPORT_SYMBOL_GPL(handle_ud); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_ud); static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t gpa, bool write) @@ -8518,7 +8518,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) kvm_emulate_wbinvd_noskip(vcpu); return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_wbinvd); @@ -9016,7 +9016,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) kvm_set_rflags(vcpu, ctxt->eflags); } } -EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_realmode_interrupt); static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, u8 ndata, u8 *insn_bytes, u8 insn_size) @@ -9081,13 +9081,13 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, { prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); } -EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_prepare_emulation_failure_exit); void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) { __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); } -EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_emulation_failure_exit); void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) { @@ -9109,7 +9109,7 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa) run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; run->internal.ndata = ndata; } -EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_prepare_event_vectoring_exit); static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { @@ -9233,7 +9233,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) r = kvm_vcpu_do_singlestep(vcpu); return r; } -EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction); static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu) { @@ -9364,7 +9364,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, return r; } -EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(x86_decode_emulated_instruction); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len) @@ -9588,14 +9588,14 @@ int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } -EXPORT_SYMBOL_GPL(kvm_emulate_instruction); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len) { return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); } -EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_instruction_from_buffer); static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu) { @@ -9690,7 +9690,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) ret = kvm_fast_pio_out(vcpu, size, port); return ret && kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_fast_pio); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fast_pio); static int kvmclock_cpu_down_prep(unsigned int cpu) { @@ -10147,7 +10147,7 @@ out_free_x86_emulator_cache: kmem_cache_destroy(x86_emulator_cache); return r; } -EXPORT_SYMBOL_GPL(kvm_x86_vendor_init); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_init); void kvm_x86_vendor_exit(void) { @@ -10181,7 +10181,7 @@ void kvm_x86_vendor_exit(void) kvm_x86_ops.enable_virtualization_cpu = NULL; mutex_unlock(&vendor_module_lock); } -EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_x86_vendor_exit); #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, @@ -10245,7 +10245,7 @@ bool kvm_apicv_activated(struct kvm *kvm) { return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0); } -EXPORT_SYMBOL_GPL(kvm_apicv_activated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apicv_activated); bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu) { @@ -10255,7 +10255,7 @@ bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu) return (vm_reasons | vcpu_reasons) == 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_apicv_activated); static void set_or_clear_apicv_inhibit(unsigned long *inhibits, enum kvm_apicv_inhibit reason, bool set) @@ -10431,7 +10431,7 @@ out: vcpu->run->hypercall.ret = ret; return 1; } -EXPORT_SYMBOL_GPL(____kvm_emulate_hypercall); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(____kvm_emulate_hypercall); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { @@ -10444,7 +10444,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) return __kvm_emulate_hypercall(vcpu, kvm_x86_call(get_cpl)(vcpu), complete_hypercall_exit); } -EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_hypercall); static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { @@ -10887,7 +10887,7 @@ out: preempt_enable(); up_read(&vcpu->kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_vcpu_update_apicv); static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) { @@ -10963,7 +10963,7 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, __kvm_set_or_clear_apicv_inhibit(kvm, reason, set); up_write(&kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit); static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { @@ -11517,7 +11517,7 @@ bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return false; } -EXPORT_SYMBOL_GPL(kvm_vcpu_has_events); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_has_events); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { @@ -11670,7 +11670,7 @@ int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) { return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); } -EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt_noskip); int kvm_emulate_halt(struct kvm_vcpu *vcpu) { @@ -11681,7 +11681,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) */ return kvm_emulate_halt_noskip(vcpu) && ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_halt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_halt); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) { @@ -11693,7 +11693,7 @@ fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_EXIT_HANDLED; } -EXPORT_SYMBOL_GPL(handle_fastpath_hlt); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(handle_fastpath_hlt); int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) { @@ -11702,7 +11702,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_emulate_ap_reset_hold); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) { @@ -12255,7 +12255,7 @@ unhandled_task_switch: vcpu->run->internal.ndata = 0; return 0; } -EXPORT_SYMBOL_GPL(kvm_task_switch); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_task_switch); static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -12956,7 +12956,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } -EXPORT_SYMBOL_GPL(kvm_vcpu_reset); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_reset); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) { @@ -12968,7 +12968,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); kvm_rip_write(vcpu, 0); } -EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_deliver_sipi_vector); void kvm_arch_enable_virtualization(void) { @@ -13086,7 +13086,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; } -EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_vcpu_is_reset_bsp); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { @@ -13250,7 +13250,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, return (void __user *)hva; } -EXPORT_SYMBOL_GPL(__x86_set_memory_region); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__x86_set_memory_region); void kvm_arch_pre_destroy_vm(struct kvm *kvm) { @@ -13658,13 +13658,13 @@ unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + kvm_rip_read(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_get_linear_rip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_linear_rip); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) { return kvm_get_linear_rip(vcpu) == linear_rip; } -EXPORT_SYMBOL_GPL(kvm_is_linear_rip); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_is_linear_rip); unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) { @@ -13675,7 +13675,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) rflags &= ~X86_EFLAGS_TF; return rflags; } -EXPORT_SYMBOL_GPL(kvm_get_rflags); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_rflags); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { @@ -13690,7 +13690,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) __kvm_set_rflags(vcpu, rflags); kvm_make_request(KVM_REQ_EVENT, vcpu); } -EXPORT_SYMBOL_GPL(kvm_set_rflags); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_rflags); static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) { @@ -13933,7 +13933,7 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) { return atomic_read(&kvm->arch.noncoherent_dma_count); } -EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_arch_has_noncoherent_dma); bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { @@ -13989,7 +13989,7 @@ int kvm_spec_ctrl_test_value(u64 value) return ret; } -EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_spec_ctrl_test_value); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code) { @@ -14014,7 +14014,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c } vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault); } -EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error); /* * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns @@ -14043,7 +14043,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, return 0; } -EXPORT_SYMBOL_GPL(kvm_handle_memory_failure); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_memory_failure); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) { @@ -14107,7 +14107,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) return 1; } } -EXPORT_SYMBOL_GPL(kvm_handle_invpcid); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_handle_invpcid); static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) { @@ -14192,7 +14192,7 @@ int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, return 0; } -EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_write); int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, void *data) @@ -14230,7 +14230,7 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, return 0; } -EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_mmio_read); static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size) { @@ -14318,7 +14318,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, return in ? kvm_sev_es_ins(vcpu, size, port) : kvm_sev_es_outs(vcpu, size, port); } -EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_sev_es_string_io); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); -- cgit