summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r--arch/x86/kvm/mmu/mmu.c39
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h5
-rw-r--r--arch/x86/kvm/mmu/page_track.c3
-rw-r--r--arch/x86/kvm/mmu/spte.c29
-rw-r--r--arch/x86/kvm/mmu/spte.h1
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c49
6 files changed, 73 insertions, 53 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8d1b632e33d2..7b3f1783ab3c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -110,6 +110,7 @@ static bool __ro_after_init tdp_mmu_allowed;
#ifdef CONFIG_X86_64
bool __read_mostly tdp_mmu_enabled = true;
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444);
+EXPORT_SYMBOL_GPL(tdp_mmu_enabled);
#endif
static int max_huge_page_level __read_mostly;
@@ -1456,15 +1457,15 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
* enabled but it chooses between clearing the Dirty bit and Writeable
* bit based on the context.
*/
- if (kvm_x86_ops.cpu_dirty_log_size)
+ if (kvm->arch.cpu_dirty_log_size)
kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
else
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-int kvm_cpu_dirty_log_size(void)
+int kvm_cpu_dirty_log_size(struct kvm *kvm)
{
- return kvm_x86_ops.cpu_dirty_log_size;
+ return kvm->arch.cpu_dirty_log_size;
}
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
@@ -4835,19 +4836,6 @@ out_unlock:
}
#endif
-bool kvm_mmu_may_ignore_guest_pat(void)
-{
- /*
- * When EPT is enabled (shadow_memtype_mask is non-zero), and the VM
- * has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to
- * honor the memtype from the guest's PAT so that guest accesses to
- * memory that is DMA'd aren't cached against the guest's wishes. As a
- * result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA,
- * KVM _always_ ignores guest PAT (when EPT is enabled).
- */
- return shadow_memtype_mask;
-}
-
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
#ifdef CONFIG_X86_64
@@ -4858,8 +4846,7 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
return direct_page_fault(vcpu, fault);
}
-static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
- u8 *level)
+int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level)
{
int r;
@@ -4873,6 +4860,10 @@ static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
do {
if (signal_pending(current))
return -EINTR;
+
+ if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu))
+ return -EIO;
+
cond_resched();
r = kvm_mmu_do_page_fault(vcpu, gpa, error_code, true, NULL, level);
} while (r == RET_PF_RETRY);
@@ -4897,6 +4888,7 @@ static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
return -EIO;
}
}
+EXPORT_SYMBOL_GPL(kvm_tdp_map_page);
long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
struct kvm_pre_fault_memory *range)
@@ -5589,12 +5581,19 @@ void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
{
+ int maxpa;
+
+ if (vcpu->kvm->arch.vm_type == KVM_X86_TDX_VM)
+ maxpa = cpuid_query_maxguestphyaddr(vcpu);
+ else
+ maxpa = cpuid_maxphyaddr(vcpu);
+
/* tdp_root_level is architecture forced level, use it if nonzero */
if (tdp_root_level)
return tdp_root_level;
/* Use 5-level TDP if and only if it's useful/necessary. */
- if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
+ if (max_tdp_level == 5 && maxpa <= 48)
return 4;
return max_tdp_level;
@@ -5913,6 +5912,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
out:
return r;
}
+EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
@@ -7239,6 +7239,7 @@ static void kvm_mmu_zap_memslot(struct kvm *kvm,
.start = slot->base_gfn,
.end = slot->base_gfn + slot->npages,
.may_block = true,
+ .attr_filter = KVM_FILTER_PRIVATE | KVM_FILTER_SHARED,
};
bool flush;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 75f00598289d..db8f33e4de62 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -187,7 +187,8 @@ static inline gfn_t kvm_gfn_root_bits(const struct kvm *kvm, const struct kvm_mm
return kvm_gfn_direct_bits(kvm);
}
-static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
+static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm *kvm,
+ struct kvm_mmu_page *sp)
{
/*
* When using the EPT page-modification log, the GPAs in the CPU dirty
@@ -197,7 +198,7 @@ static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
* being enabled is mandatory as the bits used to denote WP-only SPTEs
* are reserved for PAE paging (32-bit KVM).
*/
- return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
+ return kvm->arch.cpu_dirty_log_size && sp->role.guest_mode;
}
static inline gfn_t gfn_round_for_level(gfn_t gfn, int level)
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 561c331fd6ec..1b17b12393a8 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -172,6 +172,9 @@ static int kvm_enable_external_write_tracking(struct kvm *kvm)
struct kvm_memory_slot *slot;
int r = 0, i, bkt;
+ if (kvm->arch.vm_type == KVM_X86_TDX_VM)
+ return -EOPNOTSUPP;
+
mutex_lock(&kvm->slots_arch_lock);
/*
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 0f9f47b4ab0e..cfce03d8f123 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -37,7 +37,6 @@ u64 __read_mostly shadow_mmio_value;
u64 __read_mostly shadow_mmio_mask;
u64 __read_mostly shadow_mmio_access_mask;
u64 __read_mostly shadow_present_mask;
-u64 __read_mostly shadow_memtype_mask;
u64 __read_mostly shadow_me_value;
u64 __read_mostly shadow_me_mask;
u64 __read_mostly shadow_acc_track_mask;
@@ -96,8 +95,6 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
u64 spte = generation_mmio_spte_mask(gen);
u64 gpa = gfn << PAGE_SHIFT;
- WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value);
-
access &= shadow_mmio_access_mask;
spte |= vcpu->kvm->arch.shadow_mmio_value | access;
spte |= gpa | shadow_nonpresent_or_rsvd_mask;
@@ -177,7 +174,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (sp->role.ad_disabled)
spte |= SPTE_TDP_AD_DISABLED;
- else if (kvm_mmu_page_ad_need_write_protect(sp))
+ else if (kvm_mmu_page_ad_need_write_protect(vcpu->kvm, sp))
spte |= SPTE_TDP_AD_WRPROT_ONLY;
spte |= shadow_present_mask;
@@ -212,9 +209,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (level > PG_LEVEL_4K)
spte |= PT_PAGE_SIZE_MASK;
- if (shadow_memtype_mask)
- spte |= kvm_x86_call(get_mt_mask)(vcpu, gfn,
- kvm_is_mmio_pfn(pfn));
+ spte |= kvm_x86_call(get_mt_mask)(vcpu, gfn, kvm_is_mmio_pfn(pfn));
if (host_writable)
spte |= shadow_host_writable_mask;
else
@@ -440,6 +435,12 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
+{
+ kvm->arch.shadow_mmio_value = mmio_value;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_value);
+
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
{
/* shadow_me_value must be a subset of shadow_me_mask */
@@ -463,13 +464,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
/* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
shadow_present_mask =
(has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
- /*
- * EPT overrides the host MTRRs, and so KVM must program the desired
- * memtype directly into the SPTEs. Note, this mask is just the mask
- * of all bits that factor into the memtype, the actual memtype must be
- * dynamically calculated, e.g. to ensure host MMIO is mapped UC.
- */
- shadow_memtype_mask = VMX_EPT_MT_MASK | VMX_EPT_IPAT_BIT;
+
shadow_acc_track_mask = VMX_EPT_RWX_MASK;
shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
shadow_mmu_writable_mask = EPT_SPTE_MMU_WRITABLE;
@@ -521,12 +516,6 @@ void kvm_mmu_reset_all_pte_masks(void)
shadow_x_mask = 0;
shadow_present_mask = PT_PRESENT_MASK;
- /*
- * For shadow paging and NPT, KVM uses PAT entry '0' to encode WB
- * memtype in the SPTEs, i.e. relies on host MTRRs to provide the
- * correct memtype (WB is the "weakest" memtype).
- */
- shadow_memtype_mask = 0;
shadow_acc_track_mask = 0;
shadow_me_mask = 0;
shadow_me_value = 0;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 79cdceba9857..1e94f081bdaf 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -187,7 +187,6 @@ extern u64 __read_mostly shadow_mmio_value;
extern u64 __read_mostly shadow_mmio_mask;
extern u64 __read_mostly shadow_mmio_access_mask;
extern u64 __read_mostly shadow_present_mask;
-extern u64 __read_mostly shadow_memtype_mask;
extern u64 __read_mostly shadow_me_value;
extern u64 __read_mostly shadow_me_mask;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 21a3b8166242..405874f4d088 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1630,21 +1630,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
}
}
-static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
+static bool tdp_mmu_need_write_protect(struct kvm *kvm, struct kvm_mmu_page *sp)
{
/*
* All TDP MMU shadow pages share the same role as their root, aside
* from level, so it is valid to key off any shadow page to determine if
* write protection is needed for an entire tree.
*/
- return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled;
+ return kvm_mmu_page_ad_need_write_protect(kvm, sp) || !kvm_ad_enabled;
}
static void clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
- const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
- shadow_dirty_mask;
+ const u64 dbit = tdp_mmu_need_write_protect(kvm, root) ?
+ PT_WRITABLE_MASK : shadow_dirty_mask;
struct tdp_iter iter;
rcu_read_lock();
@@ -1689,8 +1689,8 @@ void kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
- const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
- shadow_dirty_mask;
+ const u64 dbit = (wrprot || tdp_mmu_need_write_protect(kvm, root)) ?
+ PT_WRITABLE_MASK : shadow_dirty_mask;
struct tdp_iter iter;
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -1911,16 +1911,13 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
*
* Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
*/
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
- int *root_level)
+static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ struct kvm_mmu_page *root)
{
- struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
struct tdp_iter iter;
gfn_t gfn = addr >> PAGE_SHIFT;
int leaf = -1;
- *root_level = vcpu->arch.mmu->root_role.level;
-
for_each_tdp_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
leaf = iter.level;
sptes[leaf] = iter.old_spte;
@@ -1929,6 +1926,36 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
return leaf;
}
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level)
+{
+ struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
+ *root_level = vcpu->arch.mmu->root_role.level;
+
+ return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root);
+}
+
+bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa)
+{
+ struct kvm *kvm = vcpu->kvm;
+ bool is_direct = kvm_is_addr_direct(kvm, gpa);
+ hpa_t root = is_direct ? vcpu->arch.mmu->root.hpa :
+ vcpu->arch.mmu->mirror_root_hpa;
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1], spte;
+ int leaf;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ rcu_read_lock();
+ leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, root_to_sp(root));
+ rcu_read_unlock();
+ if (leaf < 0)
+ return false;
+
+ spte = sptes[leaf];
+ return is_shadow_present_pte(spte) && is_last_spte(spte, leaf);
+}
+EXPORT_SYMBOL_GPL(kvm_tdp_mmu_gpa_is_mapped);
+
/*
* Returns the last level spte pointer of the shadow page walk for the given
* gpa, and sets *spte to the spte value. This spte may be non-preset. If no