summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/kvm_host.h5
-rw-r--r--arch/arm64/include/asm/kvm_nested.h3
-rw-r--r--arch/arm64/kvm/arm.c4
-rw-r--r--arch/arm64/kvm/nested.c72
-rw-r--r--arch/arm64/kvm/reset.c1
5 files changed, 85 insertions, 0 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 12adab97e7f2..c762919a2072 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -731,6 +731,8 @@ struct vcpu_reset_state {
bool reset;
};
+struct vncr_tlb;
+
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
@@ -825,6 +827,9 @@ struct kvm_vcpu_arch {
/* Per-vcpu CCSIDR override or NULL */
u32 *ccsidr;
+
+ /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
+ struct vncr_tlb *vncr_tlb;
};
/*
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 9d56fd946e5e..98b3d6b58966 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -333,4 +333,7 @@ struct s1_walk_result {
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va);
+/* VNCR management */
+int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 68fec8c95fee..528743587360 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -843,6 +843,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
if (vcpu_has_nv(vcpu)) {
+ ret = kvm_vcpu_allocate_vncr_tlb(vcpu);
+ if (ret)
+ return ret;
+
ret = kvm_vgic_vcpu_nv_init(vcpu);
if (ret)
return ret;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 0513f1367219..806e9cf6049a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -16,6 +16,24 @@
#include "sys_regs.h"
+struct vncr_tlb {
+ /* The guest's VNCR_EL2 */
+ u64 gva;
+ struct s1_walk_info wi;
+ struct s1_walk_result wr;
+
+ u64 hpa;
+
+ /* -1 when not mapped on a CPU */
+ int cpu;
+
+ /*
+ * true if the TLB is valid. Can only be changed with the
+ * mmu_lock held.
+ */
+ bool valid;
+};
+
/*
* Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
* memory usage and potential number of different sets of S2 PTs in
@@ -812,6 +830,60 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
}
/*
+ * Dealing with VNCR_EL2 exposed by the *guest* is a complicated matter:
+ *
+ * - We introduce an internal representation of a vcpu-private TLB,
+ * representing the mapping between the guest VA contained in VNCR_EL2,
+ * the IPA the guest's EL2 PTs point to, and the actual PA this lives at.
+ *
+ * - On translation fault from a nested VNCR access, we create such a TLB.
+ * If there is no mapping to describe, the guest inherits the fault.
+ * Crucially, no actual mapping is done at this stage.
+ *
+ * - On vcpu_load() in a non-HYP context with HCR_EL2.NV==1, if the above
+ * TLB exists, we map it in the fixmap for this CPU, and run with it. We
+ * have to respect the permissions dictated by the guest, but not the
+ * memory type (FWB is a must).
+ *
+ * - Note that we usually don't do a vcpu_load() on the back of a fault
+ * (unless we are preempted), so the resolution of a translation fault
+ * must go via a request that will map the VNCR page in the fixmap.
+ * vcpu_load() might as well use the same mechanism.
+ *
+ * - On vcpu_put() in a non-HYP context with HCR_EL2.NV==1, if the TLB was
+ * mapped, we unmap it. Yes it is that simple. The TLB still exists
+ * though, and may be reused at a later load.
+ *
+ * - On permission fault, we simply forward the fault to the guest's EL2.
+ * Get out of my way.
+ *
+ * - On any TLBI for the EL2&0 translation regime, we must find any TLB that
+ * intersects with the TLBI request, invalidate it, and unmap the page
+ * from the fixmap. Because we need to look at all the vcpu-private TLBs,
+ * this requires some wide-ranging locking to ensure that nothing races
+ * against it. This may require some refcounting to avoid the search when
+ * no such TLB is present.
+ *
+ * - On MMU notifiers, we must invalidate our TLB in a similar way, but
+ * looking at the IPA instead. The funny part is that there may not be a
+ * stage-2 mapping for this page if L1 hasn't accessed it using LD/ST
+ * instructions.
+ */
+
+int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
+ return 0;
+
+ vcpu->arch.vncr_tlb = kzalloc(sizeof(*vcpu->arch.vncr_tlb),
+ GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.vncr_tlb)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
* of feature bits we don't intend to support for the time being.
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 965e1429b9f6..959532422d3a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -159,6 +159,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
+ kfree(vcpu->arch.vncr_tlb);
kfree(vcpu->arch.ccsidr);
}