diff options
-rw-r--r-- | Documentation/virt/kvm/locking.rst | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 27 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_iter.h | 21 |
5 files changed, 28 insertions, 30 deletions
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index c56d5f26c750..ae8bce7fecbe 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -196,7 +196,7 @@ writable between reading spte and updating spte. Like below case: The Dirty bit is lost in this case. In order to avoid this kind of issue, we always treat the spte as "volatile" -if it can be updated out of mmu-lock [see spte_has_volatile_bits()]; it means +if it can be updated out of mmu-lock [see spte_needs_atomic_update()]; it means the spte is always atomically updated in this case. 3) flush tlbs due to spte updated @@ -212,7 +212,7 @@ function to update spte (present -> present). Since the spte is "volatile" if it can be updated out of mmu-lock, we always atomically update the spte and the race caused by fast page fault can be avoided. -See the comments in spte_has_volatile_bits() and mmu_spte_update(). +See the comments in spte_needs_atomic_update() and mmu_spte_update(). Lockless Access Tracking: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 74c20dbb92da..5b9ef3535f50 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -501,7 +501,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) return false; } - if (!spte_has_volatile_bits(old_spte)) + if (!spte_needs_atomic_update(old_spte)) __update_clear_spte_fast(sptep, new_spte); else old_spte = __update_clear_spte_slow(sptep, new_spte); @@ -524,7 +524,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) int level = sptep_to_sp(sptep)->role.level; if (!is_shadow_present_pte(old_spte) || - !spte_has_volatile_bits(old_spte)) + !spte_needs_atomic_update(old_spte)) __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); else old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 22551e2f1d00..9663ba867178 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -129,25 +129,30 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) } /* - * Returns true if the SPTE has bits that may be set without holding mmu_lock. - * The caller is responsible for checking if the SPTE is shadow-present, and - * for determining whether or not the caller cares about non-leaf SPTEs. + * Returns true if the SPTE needs to be updated atomically due to having bits + * that may be changed without holding mmu_lock, and for which KVM must not + * lose information. E.g. KVM must not drop Dirty bit information. The caller + * is responsible for checking if the SPTE is shadow-present, and for + * determining whether or not the caller cares about non-leaf SPTEs. */ -bool spte_has_volatile_bits(u64 spte) +bool spte_needs_atomic_update(u64 spte) { + /* SPTEs can be made Writable bit by KVM's fast page fault handler. */ if (!is_writable_pte(spte) && is_mmu_writable_spte(spte)) return true; + /* Access-tracked SPTEs can be restored by KVM's fast page fault handler. */ if (is_access_track_spte(spte)) return true; - if (spte_ad_enabled(spte)) { - if (!(spte & shadow_accessed_mask) || - (is_writable_pte(spte) && !(spte & shadow_dirty_mask))) - return true; - } - - return false; + /* + * Dirty and Accessed bits can be set by the CPU. Ignore the Accessed + * bit, as KVM tolerates false negatives/positives, e.g. KVM doesn't + * invalidate TLBs when aging SPTEs, and so it's safe to clobber the + * Accessed bit (and rare in practice). + */ + return spte_ad_enabled(spte) && is_writable_pte(spte) && + !(spte & shadow_dirty_mask); } bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 59746854c0af..79cdceba9857 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -519,7 +519,7 @@ static inline u64 get_mmio_spte_generation(u64 spte) return gen; } -bool spte_has_volatile_bits(u64 spte); +bool spte_needs_atomic_update(u64 spte); bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, const struct kvm_memory_slot *slot, diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 9135b035fa40..364c5da6c499 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -39,28 +39,21 @@ static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte) } /* - * SPTEs must be modified atomically if they are shadow-present, leaf - * SPTEs, and have volatile bits, i.e. has bits that can be set outside - * of mmu_lock. The Writable bit can be set by KVM's fast page fault - * handler, and Accessed and Dirty bits can be set by the CPU. - * - * Note, non-leaf SPTEs do have Accessed bits and those bits are - * technically volatile, but KVM doesn't consume the Accessed bit of - * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This - * logic needs to be reassessed if KVM were to use non-leaf Accessed - * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs. + * SPTEs must be modified atomically if they are shadow-present, leaf SPTEs, + * and have volatile bits (bits that can be set outside of mmu_lock) that + * must not be clobbered. */ -static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level) +static inline bool kvm_tdp_mmu_spte_need_atomic_update(u64 old_spte, int level) { return is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) && - spte_has_volatile_bits(old_spte); + spte_needs_atomic_update(old_spte); } static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, u64 new_spte, int level) { - if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) + if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level)) return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte); __kvm_tdp_mmu_write_spte(sptep, new_spte); @@ -70,7 +63,7 @@ static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte, static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte, u64 mask, int level) { - if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) + if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level)) return tdp_mmu_clear_spte_bits_atomic(sptep, mask); __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask); |