diff options
author | James Houghton <jthoughton@google.com> | 2025-02-04 00:40:32 +0000 |
---|---|---|
committer | Sean Christopherson <seanjc@google.com> | 2025-02-14 07:16:45 -0800 |
commit | 61d65f2dc766c70673d45a4b787f49317384642c (patch) | |
tree | b664abf8e2b7ffdf15aae5d75ddb95ee74f50b76 | |
parent | e29b74920e6f0e5a8a26f373b821a3a3e79ceb9a (diff) |
KVM: x86/mmu: Don't force atomic update if only the Accessed bit is volatile
Don't force SPTE modifications to be done atomically if the only volatile
bit in the SPTE is the Accessed bit. KVM and the primary MMU tolerate
stale aging state, and the probability of an Accessed bit A/D assist being
clobbered *and* affecting again is likely far lower than the probability
of consuming stale information due to not flushing TLBs when aging.
Rename spte_has_volatile_bits() to spte_needs_atomic_update() to better
capture the nature of the helper.
Opportunstically do s/write/update on the TDP MMU wrapper, as it's not
simply the "write" that needs to be done atomically, it's the entire
update, i.e. the entire read-modify-write operation needs to be done
atomically so that KVM has an accurate view of the old SPTE.
Leave kvm_tdp_mmu_write_spte_atomic() as is. While the name is imperfect,
it pairs with kvm_tdp_mmu_write_spte(), which in turn pairs with
kvm_tdp_mmu_read_spte(). And renaming all of those isn't obviously a net
positive, and would require significant churn.
Signed-off-by: James Houghton <jthoughton@google.com>
Link: https://lore.kernel.org/r/20250204004038.1680123-6-jthoughton@google.com
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
-rw-r--r-- | Documentation/virt/kvm/locking.rst | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 27 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_iter.h | 21 |
5 files changed, 28 insertions, 30 deletions
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index c56d5f26c750..ae8bce7fecbe 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -196,7 +196,7 @@ writable between reading spte and updating spte. Like below case: The Dirty bit is lost in this case. In order to avoid this kind of issue, we always treat the spte as "volatile" -if it can be updated out of mmu-lock [see spte_has_volatile_bits()]; it means +if it can be updated out of mmu-lock [see spte_needs_atomic_update()]; it means the spte is always atomically updated in this case. 3) flush tlbs due to spte updated @@ -212,7 +212,7 @@ function to update spte (present -> present). Since the spte is "volatile" if it can be updated out of mmu-lock, we always atomically update the spte and the race caused by fast page fault can be avoided. -See the comments in spte_has_volatile_bits() and mmu_spte_update(). +See the comments in spte_needs_atomic_update() and mmu_spte_update(). Lockless Access Tracking: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 74c20dbb92da..5b9ef3535f50 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -501,7 +501,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) return false; } - if (!spte_has_volatile_bits(old_spte)) + if (!spte_needs_atomic_update(old_spte)) __update_clear_spte_fast(sptep, new_spte); else old_spte = __update_clear_spte_slow(sptep, new_spte); @@ -524,7 +524,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) int level = sptep_to_sp(sptep)->role.level; if (!is_shadow_present_pte(old_spte) || - !spte_has_volatile_bits(old_spte)) + !spte_needs_atomic_update(old_spte)) __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); else old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 22551e2f1d00..9663ba867178 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -129,25 +129,30 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) } /* - * Returns true if the SPTE has bits that may be set without holding mmu_lock. - * The caller is responsible for checking if the SPTE is shadow-present, and - * for determining whether or not the caller cares about non-leaf SPTEs. + * Returns true if the SPTE needs to be updated atomically due to having bits + * that may be changed without holding mmu_lock, and for which KVM must not + * lose information. E.g. KVM must not drop Dirty bit information. The caller + * is responsible for checking if the SPTE is shadow-present, and for + * determining whether or not the caller cares about non-leaf SPTEs. */ -bool spte_has_volatile_bits(u64 spte) +bool spte_needs_atomic_update(u64 spte) { + /* SPTEs can be made Writable bit by KVM's fast page fault handler. */ if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) return true; + /* Access-tracked SPTEs can be restored by KVM's fast page fault handler. */ if (is_access_track_spte(spte)) return true; - if (spte_ad_enabled(spte)) { - if (!(spte & shadow_accessed_mask) || - (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) - return true; - } - - return false; + /* + * Dirty and Accessed bits can be set by the CPU. Ignore the Accessed + * bit, as KVM tolerates false negatives/positives, e.g. KVM doesn't + * invalidate TLBs when aging SPTEs, and so it's safe to clobber the + * Accessed bit (and rare in practice). + */ + return spte_ad_enabled(spte) && is_writable_pte(spte) && + !(spte & shadow_dirty_mask); } bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 59746854c0af..79cdceba9857 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -519,7 +519,7 @@ static inline u64 get_mmio_spte_generation(u64 spte) return gen; } -bool spte_has_volatile_bits(u64 spte); +bool spte_needs_atomic_update(u64 spte); bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 9135b035fa40..364c5da6c499 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -39,28 +39,21 @@ static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte) } /* - * SPTEs must be modified atomically if they are shadow-present, leaf - * SPTEs, and have volatile bits, i.e. has bits that can be set outside - * of mmu_lock. The Writable bit can be set by KVM's fast page fault - * handler, and Accessed and Dirty bits can be set by the CPU. - * - * Note, non-leaf SPTEs do have Accessed bits and those bits are - * technically volatile, but KVM doesn't consume the Accessed bit of - * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This - * logic needs to be reassessed if KVM were to use non-leaf Accessed - * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs. + * SPTEs must be modified atomically if they are shadow-present, leaf SPTEs, + * and have volatile bits (bits that can be set outside of mmu_lock) that + * must not be clobbered. */ -static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level) +static inline bool kvm_tdp_mmu_spte_need_atomic_update(u64 old_spte, int level) { return is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) && - spte_has_volatile_bits(old_spte); + spte_needs_atomic_update(old_spte); } static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, u64 new_spte, int level) { - if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) + if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level)) return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte); __kvm_tdp_mmu_write_spte(sptep, new_spte); @@ -70,7 +63,7 @@ static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte, u64 mask, int level) { - if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) + if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level)) return tdp_mmu_clear_spte_bits_atomic(sptep, mask); __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask); |