diff options
authorPaolo Bonzini <>2021-02-01 05:12:11 -0500
committerPaolo Bonzini <>2021-02-04 05:27:14 -0500
commitbd2fae8da794b55bf2ac02632da3a151b10e664c (patch)
parent87aa9ec939ec7277b730786e19c161c9194cc8ca (diff)
KVM: do not assume PTE is writable after follow_pfn
In order to convert an HVA to a PFN, KVM usually tries to use the get_user_pages family of functinso. This however is not possible for VM_IO vmas; in that case, KVM instead uses follow_pfn. In doing this however KVM loses the information on whether the PFN is writable. That is usually not a problem because the main use of VM_IO vmas with KVM is for BARs in PCI device assignment, however it is a bug. To fix it, use follow_pte and check pte_write while under the protection of the PTE lock. The information can be used to fail hva_to_pfn_remapped or passed back to the caller via *writable. Usage of follow_pfn was introduced in commit add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05); however, even older version have the same issue, all the way back to commit 2e2e3738af33 ("KVM: Handle vma regions with no backing page", 2008-07-20), as they also did not check whether the PFN was writable. Fixes: 2e2e3738af33 ("KVM: Handle vma regions with no backing page") Reported-by: David Stevens <> Cc: Cc: Jann Horn <> Cc: Jason Gunthorpe <> Cc: Signed-off-by: Paolo Bonzini <>
1 files changed, 12 insertions, 3 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8367d88ce39b..335a1a2b8edc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1904,9 +1904,11 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
kvm_pfn_t *p_pfn)
unsigned long pfn;
+ pte_t *ptep;
+ spinlock_t *ptl;
int r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl);
if (r) {
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -1921,14 +1923,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
if (r)
return r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, NULL, &ptep, NULL, &ptl);
if (r)
return r;
+ }
+ if (write_fault && !pte_write(*ptep)) {
+ goto out;
if (writable)
- *writable = true;
+ *writable = pte_write(*ptep);
+ pfn = pte_pfn(*ptep);
* Get a reference here because callers of *hva_to_pfn* and
@@ -1943,6 +1950,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+ pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
return 0;