diff options
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 3 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 10 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_igd.c | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_priv.h | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/virtio/Kconfig | 6 | ||||
-rw-r--r-- | drivers/vfio/pci/virtio/legacy_io.c | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/virtio/main.c | 5 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 123 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | mm/memory.c | 1 |
12 files changed, 106 insertions, 66 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6f7ae7e5b7b0..5ba39f7623bb 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -111,9 +111,7 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev) if (ret) return ret; - if (vfio_pci_is_vga(pdev) && - pdev->vendor == PCI_VENDOR_ID_INTEL && - IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { + if (vfio_pci_is_intel_display(pdev)) { ret = vfio_pci_igd_init(vdev); if (ret && ret != -ENODEV) { pci_warn(pdev, "Failed to setup Intel IGD regions\n"); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 94142581c98c..14437396d721 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1814,7 +1814,8 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) cpu_to_le16(PCI_COMMAND_MEMORY); } - if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx || + vdev->pdev->irq == IRQ_NOTCONNECTED) vconfig[PCI_INTERRUPT_PIN] = 0; ret = vfio_cap_init(vdev); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index c8586d47704c..35f9046af315 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -727,15 +727,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { - u8 pin; - - if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || - vdev->nointx || vdev->pdev->is_virtfn) - return 0; - - pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - - return pin ? 1 : 0; + return vdev->vconfig[PCI_INTERRUPT_PIN] ? 1 : 0; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { u8 pos; u16 flags; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index dd70e2431bd7..ef490a4545f4 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -435,6 +435,12 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev) return 0; } +bool vfio_pci_is_intel_display(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_INTEL) && + ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY); +} + int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { int ret; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 8382c5834335..565966351dfa 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -259,7 +259,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, if (!is_irq_none(vdev)) return -EINVAL; - if (!pdev->irq) + if (!pdev->irq || pdev->irq == IRQ_NOTCONNECTED) return -ENODEV; name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index 5e4fa69aee16..a9972eacb293 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -67,8 +67,14 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd); #ifdef CONFIG_VFIO_PCI_IGD +bool vfio_pci_is_intel_display(struct pci_dev *pdev); int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); #else +static inline bool vfio_pci_is_intel_display(struct pci_dev *pdev) +{ + return false; +} + static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) { return -ENODEV; diff --git a/drivers/vfio/pci/virtio/Kconfig b/drivers/vfio/pci/virtio/Kconfig index 2770f7eb702c..33e04e65bec6 100644 --- a/drivers/vfio/pci/virtio/Kconfig +++ b/drivers/vfio/pci/virtio/Kconfig @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config VIRTIO_VFIO_PCI - tristate "VFIO support for VIRTIO NET PCI VF devices" + tristate "VFIO support for VIRTIO PCI VF devices" depends on VIRTIO_PCI select VFIO_PCI_CORE help - This provides migration support for VIRTIO NET PCI VF devices - using the VFIO framework. Migration support requires the + This provides migration support for VIRTIO NET and BLOCK PCI VF + devices using the VFIO framework. Migration support requires the SR-IOV PF device to support specific VIRTIO extensions, otherwise this driver provides no additional functionality beyond vfio-pci. diff --git a/drivers/vfio/pci/virtio/legacy_io.c b/drivers/vfio/pci/virtio/legacy_io.c index 20382ee15fac..832af5ba267c 100644 --- a/drivers/vfio/pci/virtio/legacy_io.c +++ b/drivers/vfio/pci/virtio/legacy_io.c @@ -382,7 +382,9 @@ static bool virtiovf_bar0_exists(struct pci_dev *pdev) bool virtiovf_support_legacy_io(struct pci_dev *pdev) { - return virtio_pci_admin_has_legacy_io(pdev) && !virtiovf_bar0_exists(pdev); + /* For now, the legacy IO functionality is supported only for virtio-net */ + return pdev->device == 0x1041 && virtio_pci_admin_has_legacy_io(pdev) && + !virtiovf_bar0_exists(pdev); } int virtiovf_init_legacy_io(struct virtiovf_pci_core_device *virtvdev) diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c index d534d48c4163..515fe1b9f94d 100644 --- a/drivers/vfio/pci/virtio/main.c +++ b/drivers/vfio/pci/virtio/main.c @@ -187,8 +187,9 @@ static void virtiovf_pci_remove(struct pci_dev *pdev) } static const struct pci_device_id virtiovf_pci_table[] = { - /* Only virtio-net is supported/tested so far */ + /* Only virtio-net and virtio-block are supported/tested so far */ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1042) }, {} }; @@ -221,4 +222,4 @@ module_pci_driver(virtiovf_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>"); MODULE_DESCRIPTION( - "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices"); + "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET and BLOCK devices"); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 50ebc9593c9d..0ac56072af9f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -103,9 +103,9 @@ struct vfio_dma { struct vfio_batch { struct page **pages; /* for pin_user_pages_remote */ struct page *fallback_page; /* if pages alloc fails */ - int capacity; /* length of pages array */ - int size; /* of batch currently */ - int offset; /* of next entry in pages */ + unsigned int capacity; /* length of pages array */ + unsigned int size; /* of batch currently */ + unsigned int offset; /* of next entry in pages */ }; struct vfio_iommu_group { @@ -471,12 +471,12 @@ static int put_pfn(unsigned long pfn, int prot) #define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *)) -static void vfio_batch_init(struct vfio_batch *batch) +static void __vfio_batch_init(struct vfio_batch *batch, bool single) { batch->size = 0; batch->offset = 0; - if (unlikely(disable_hugepages)) + if (single || unlikely(disable_hugepages)) goto fallback; batch->pages = (struct page **) __get_free_page(GFP_KERNEL); @@ -491,6 +491,16 @@ fallback: batch->capacity = 1; } +static void vfio_batch_init(struct vfio_batch *batch) +{ + __vfio_batch_init(batch, false); +} + +static void vfio_batch_init_single(struct vfio_batch *batch) +{ + __vfio_batch_init(batch, true); +} + static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma) { while (batch->size) { @@ -510,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch) static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, - bool write_fault) + unsigned long *addr_mask, bool write_fault) { struct follow_pfnmap_args args = { .vma = vma, .address = vaddr }; int ret; @@ -534,10 +544,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, return ret; } - if (write_fault && !args.writable) + if (write_fault && !args.writable) { ret = -EFAULT; - else + } else { *pfn = args.pfn; + *addr_mask = args.addr_mask; + } follow_pfnmap_end(&args); return ret; @@ -545,25 +557,33 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, /* * Returns the positive number of pfns successfully obtained or a negative - * error code. + * error code. The initial pfn is stored in the pfn arg. For page-backed + * pfns, the provided batch is also updated to indicate the filled pages and + * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and + * returned initial pfn are provided; subsequent pfns are contiguous. */ -static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, - long npages, int prot, unsigned long *pfn, - struct page **pages) +static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, + unsigned long npages, int prot, unsigned long *pfn, + struct vfio_batch *batch) { + unsigned long pin_pages = min_t(unsigned long, npages, batch->capacity); struct vm_area_struct *vma; unsigned int flags = 0; - int ret; + long ret; if (prot & IOMMU_WRITE) flags |= FOLL_WRITE; mmap_read_lock(mm); - ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM, - pages, NULL); + ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM, + batch->pages, NULL); if (ret > 0) { - *pfn = page_to_pfn(pages[0]); + *pfn = page_to_pfn(batch->pages[0]); + batch->size = ret; + batch->offset = 0; goto done; + } else if (!ret) { + ret = -EFAULT; } vaddr = untagged_addr_remote(mm, vaddr); @@ -572,15 +592,22 @@ retry: vma = vma_lookup(mm, vaddr); if (vma && vma->vm_flags & VM_PFNMAP) { - ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE); + unsigned long addr_mask; + + ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask, + prot & IOMMU_WRITE); if (ret == -EAGAIN) goto retry; if (!ret) { - if (is_invalid_reserved_pfn(*pfn)) - ret = 1; - else + if (is_invalid_reserved_pfn(*pfn)) { + unsigned long epfn; + + epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1; + ret = min_t(long, npages, epfn - *pfn); + } else { ret = -EFAULT; + } } } done: @@ -594,7 +621,7 @@ done: * first page and all consecutive pages with the same locking. */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, - long npage, unsigned long *pfn_base, + unsigned long npage, unsigned long *pfn_base, unsigned long limit, struct vfio_batch *batch) { unsigned long pfn; @@ -616,32 +643,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, *pfn_base = 0; } + if (unlikely(disable_hugepages)) + npage = 1; + while (npage) { if (!batch->size) { /* Empty batch, so refill it. */ - long req_pages = min_t(long, npage, batch->capacity); - - ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot, - &pfn, batch->pages); + ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot, + &pfn, batch); if (ret < 0) goto unpin_out; - batch->size = ret; - batch->offset = 0; - if (!*pfn_base) { *pfn_base = pfn; rsvd = is_invalid_reserved_pfn(*pfn_base); } + + /* Handle pfnmap */ + if (!batch->size) { + if (pfn != *pfn_base + pinned || !rsvd) + goto out; + + pinned += ret; + npage -= ret; + vaddr += (PAGE_SIZE * ret); + iova += (PAGE_SIZE * ret); + continue; + } } /* - * pfn is preset for the first iteration of this inner loop and - * updated at the end to handle a VM_PFNMAP pfn. In that case, - * batch->pages isn't valid (there's no struct page), so allow - * batch->pages to be touched only when there's more than one - * pfn to check, which guarantees the pfns are from a - * !VM_PFNMAP vma. + * pfn is preset for the first iteration of this inner loop + * due to the fact that vaddr_get_pfns() needs to provide the + * initial pfn for pfnmaps. Therefore to reduce redundancy, + * the next pfn is fetched at the end of the loop. + * A PageReserved() page could still qualify as page backed + * and rsvd here, and therefore continues to use the batch. */ while (true) { if (pfn != *pfn_base + pinned || @@ -676,21 +713,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, pfn = page_to_pfn(batch->pages[batch->offset]); } - - if (unlikely(disable_hugepages)) - break; } out: ret = vfio_lock_acct(dma, lock_acct, false); unpin_out: - if (batch->size == 1 && !batch->offset) { - /* May be a VM_PFNMAP pfn, which the batch can't remember. */ - put_pfn(pfn, dma->prot); - batch->size = 0; - } - if (ret < 0) { if (pinned && !rsvd) { for (pfn = *pfn_base ; pinned ; pfn++, pinned--) @@ -705,7 +733,7 @@ unpin_out: } static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, - unsigned long pfn, long npage, + unsigned long pfn, unsigned long npage, bool do_accounting) { long unlocked = 0, locked = 0; @@ -728,7 +756,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { - struct page *pages[1]; + struct vfio_batch batch; struct mm_struct *mm; int ret; @@ -736,7 +764,9 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, if (!mmget_not_zero(mm)) return -ENODEV; - ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); + vfio_batch_init_single(&batch); + + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch); if (ret != 1) goto out; @@ -755,6 +785,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, } out: + vfio_batch_fini(&batch); mmput(mm); return ret; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 32ba0e33422b..d66bc0e97632 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2495,11 +2495,13 @@ struct follow_pfnmap_args { * Outputs: * * @pfn: the PFN of the address + * @addr_mask: address mask covering pfn * @pgprot: the pgprot_t of the mapping * @writable: whether the mapping is writable * @special: whether the mapping is a special mapping (real PFN maps) */ unsigned long pfn; + unsigned long addr_mask; pgprot_t pgprot; bool writable; bool special; diff --git a/mm/memory.c b/mm/memory.c index 6ea3551eb2df..2d8c265fc7d6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6670,6 +6670,7 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args, args->lock = lock; args->ptep = ptep; args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); + args->addr_mask = addr_mask; args->pgprot = pgprot; args->writable = writable; args->special = special; |