summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/pci/vfio_pci.c4
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c3
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c10
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c6
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_priv.h6
-rw-r--r--drivers/vfio/pci/virtio/Kconfig6
-rw-r--r--drivers/vfio/pci/virtio/legacy_io.c4
-rw-r--r--drivers/vfio/pci/virtio/main.c5
-rw-r--r--drivers/vfio/vfio_iommu_type1.c123
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/memory.c1
12 files changed, 106 insertions, 66 deletions
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 6f7ae7e5b7b0..5ba39f7623bb 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -111,9 +111,7 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev)
if (ret)
return ret;
- if (vfio_pci_is_vga(pdev) &&
- pdev->vendor == PCI_VENDOR_ID_INTEL &&
- IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
+ if (vfio_pci_is_intel_display(pdev)) {
ret = vfio_pci_igd_init(vdev);
if (ret && ret != -ENODEV) {
pci_warn(pdev, "Failed to setup Intel IGD regions\n");
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 94142581c98c..14437396d721 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1814,7 +1814,8 @@ int vfio_config_init(struct vfio_pci_core_device *vdev)
cpu_to_le16(PCI_COMMAND_MEMORY);
}
- if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx ||
+ vdev->pdev->irq == IRQ_NOTCONNECTED)
vconfig[PCI_INTERRUPT_PIN] = 0;
ret = vfio_cap_init(vdev);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index c8586d47704c..35f9046af315 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -727,15 +727,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type)
{
if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
- u8 pin;
-
- if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
- vdev->nointx || vdev->pdev->is_virtfn)
- return 0;
-
- pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-
- return pin ? 1 : 0;
+ return vdev->vconfig[PCI_INTERRUPT_PIN] ? 1 : 0;
} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
u8 pos;
u16 flags;
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index dd70e2431bd7..ef490a4545f4 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -435,6 +435,12 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev)
return 0;
}
+bool vfio_pci_is_intel_display(struct pci_dev *pdev)
+{
+ return (pdev->vendor == PCI_VENDOR_ID_INTEL) &&
+ ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY);
+}
+
int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
{
int ret;
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 8382c5834335..565966351dfa 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -259,7 +259,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
if (!is_irq_none(vdev))
return -EINVAL;
- if (!pdev->irq)
+ if (!pdev->irq || pdev->irq == IRQ_NOTCONNECTED)
return -ENODEV;
name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 5e4fa69aee16..a9972eacb293 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -67,8 +67,14 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
u16 cmd);
#ifdef CONFIG_VFIO_PCI_IGD
+bool vfio_pci_is_intel_display(struct pci_dev *pdev);
int vfio_pci_igd_init(struct vfio_pci_core_device *vdev);
#else
+static inline bool vfio_pci_is_intel_display(struct pci_dev *pdev)
+{
+ return false;
+}
+
static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
{
return -ENODEV;
diff --git a/drivers/vfio/pci/virtio/Kconfig b/drivers/vfio/pci/virtio/Kconfig
index 2770f7eb702c..33e04e65bec6 100644
--- a/drivers/vfio/pci/virtio/Kconfig
+++ b/drivers/vfio/pci/virtio/Kconfig
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
config VIRTIO_VFIO_PCI
- tristate "VFIO support for VIRTIO NET PCI VF devices"
+ tristate "VFIO support for VIRTIO PCI VF devices"
depends on VIRTIO_PCI
select VFIO_PCI_CORE
help
- This provides migration support for VIRTIO NET PCI VF devices
- using the VFIO framework. Migration support requires the
+ This provides migration support for VIRTIO NET and BLOCK PCI VF
+ devices using the VFIO framework. Migration support requires the
SR-IOV PF device to support specific VIRTIO extensions,
otherwise this driver provides no additional functionality
beyond vfio-pci.
diff --git a/drivers/vfio/pci/virtio/legacy_io.c b/drivers/vfio/pci/virtio/legacy_io.c
index 20382ee15fac..832af5ba267c 100644
--- a/drivers/vfio/pci/virtio/legacy_io.c
+++ b/drivers/vfio/pci/virtio/legacy_io.c
@@ -382,7 +382,9 @@ static bool virtiovf_bar0_exists(struct pci_dev *pdev)
bool virtiovf_support_legacy_io(struct pci_dev *pdev)
{
- return virtio_pci_admin_has_legacy_io(pdev) && !virtiovf_bar0_exists(pdev);
+ /* For now, the legacy IO functionality is supported only for virtio-net */
+ return pdev->device == 0x1041 && virtio_pci_admin_has_legacy_io(pdev) &&
+ !virtiovf_bar0_exists(pdev);
}
int virtiovf_init_legacy_io(struct virtiovf_pci_core_device *virtvdev)
diff --git a/drivers/vfio/pci/virtio/main.c b/drivers/vfio/pci/virtio/main.c
index d534d48c4163..515fe1b9f94d 100644
--- a/drivers/vfio/pci/virtio/main.c
+++ b/drivers/vfio/pci/virtio/main.c
@@ -187,8 +187,9 @@ static void virtiovf_pci_remove(struct pci_dev *pdev)
}
static const struct pci_device_id virtiovf_pci_table[] = {
- /* Only virtio-net is supported/tested so far */
+ /* Only virtio-net and virtio-block are supported/tested so far */
{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1042) },
{}
};
@@ -221,4 +222,4 @@ module_pci_driver(virtiovf_pci_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");
MODULE_DESCRIPTION(
- "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices");
+ "VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET and BLOCK devices");
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 50ebc9593c9d..0ac56072af9f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -103,9 +103,9 @@ struct vfio_dma {
struct vfio_batch {
struct page **pages; /* for pin_user_pages_remote */
struct page *fallback_page; /* if pages alloc fails */
- int capacity; /* length of pages array */
- int size; /* of batch currently */
- int offset; /* of next entry in pages */
+ unsigned int capacity; /* length of pages array */
+ unsigned int size; /* of batch currently */
+ unsigned int offset; /* of next entry in pages */
};
struct vfio_iommu_group {
@@ -471,12 +471,12 @@ static int put_pfn(unsigned long pfn, int prot)
#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
-static void vfio_batch_init(struct vfio_batch *batch)
+static void __vfio_batch_init(struct vfio_batch *batch, bool single)
{
batch->size = 0;
batch->offset = 0;
- if (unlikely(disable_hugepages))
+ if (single || unlikely(disable_hugepages))
goto fallback;
batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
@@ -491,6 +491,16 @@ fallback:
batch->capacity = 1;
}
+static void vfio_batch_init(struct vfio_batch *batch)
+{
+ __vfio_batch_init(batch, false);
+}
+
+static void vfio_batch_init_single(struct vfio_batch *batch)
+{
+ __vfio_batch_init(batch, true);
+}
+
static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
{
while (batch->size) {
@@ -510,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch)
static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long vaddr, unsigned long *pfn,
- bool write_fault)
+ unsigned long *addr_mask, bool write_fault)
{
struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
int ret;
@@ -534,10 +544,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
return ret;
}
- if (write_fault && !args.writable)
+ if (write_fault && !args.writable) {
ret = -EFAULT;
- else
+ } else {
*pfn = args.pfn;
+ *addr_mask = args.addr_mask;
+ }
follow_pfnmap_end(&args);
return ret;
@@ -545,25 +557,33 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
/*
* Returns the positive number of pfns successfully obtained or a negative
- * error code.
+ * error code. The initial pfn is stored in the pfn arg. For page-backed
+ * pfns, the provided batch is also updated to indicate the filled pages and
+ * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and
+ * returned initial pfn are provided; subsequent pfns are contiguous.
*/
-static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
- long npages, int prot, unsigned long *pfn,
- struct page **pages)
+static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
+ unsigned long npages, int prot, unsigned long *pfn,
+ struct vfio_batch *batch)
{
+ unsigned long pin_pages = min_t(unsigned long, npages, batch->capacity);
struct vm_area_struct *vma;
unsigned int flags = 0;
- int ret;
+ long ret;
if (prot & IOMMU_WRITE)
flags |= FOLL_WRITE;
mmap_read_lock(mm);
- ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
- pages, NULL);
+ ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM,
+ batch->pages, NULL);
if (ret > 0) {
- *pfn = page_to_pfn(pages[0]);
+ *pfn = page_to_pfn(batch->pages[0]);
+ batch->size = ret;
+ batch->offset = 0;
goto done;
+ } else if (!ret) {
+ ret = -EFAULT;
}
vaddr = untagged_addr_remote(mm, vaddr);
@@ -572,15 +592,22 @@ retry:
vma = vma_lookup(mm, vaddr);
if (vma && vma->vm_flags & VM_PFNMAP) {
- ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
+ unsigned long addr_mask;
+
+ ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask,
+ prot & IOMMU_WRITE);
if (ret == -EAGAIN)
goto retry;
if (!ret) {
- if (is_invalid_reserved_pfn(*pfn))
- ret = 1;
- else
+ if (is_invalid_reserved_pfn(*pfn)) {
+ unsigned long epfn;
+
+ epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1;
+ ret = min_t(long, npages, epfn - *pfn);
+ } else {
ret = -EFAULT;
+ }
}
}
done:
@@ -594,7 +621,7 @@ done:
* first page and all consecutive pages with the same locking.
*/
static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
- long npage, unsigned long *pfn_base,
+ unsigned long npage, unsigned long *pfn_base,
unsigned long limit, struct vfio_batch *batch)
{
unsigned long pfn;
@@ -616,32 +643,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
*pfn_base = 0;
}
+ if (unlikely(disable_hugepages))
+ npage = 1;
+
while (npage) {
if (!batch->size) {
/* Empty batch, so refill it. */
- long req_pages = min_t(long, npage, batch->capacity);
-
- ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
- &pfn, batch->pages);
+ ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot,
+ &pfn, batch);
if (ret < 0)
goto unpin_out;
- batch->size = ret;
- batch->offset = 0;
-
if (!*pfn_base) {
*pfn_base = pfn;
rsvd = is_invalid_reserved_pfn(*pfn_base);
}
+
+ /* Handle pfnmap */
+ if (!batch->size) {
+ if (pfn != *pfn_base + pinned || !rsvd)
+ goto out;
+
+ pinned += ret;
+ npage -= ret;
+ vaddr += (PAGE_SIZE * ret);
+ iova += (PAGE_SIZE * ret);
+ continue;
+ }
}
/*
- * pfn is preset for the first iteration of this inner loop and
- * updated at the end to handle a VM_PFNMAP pfn. In that case,
- * batch->pages isn't valid (there's no struct page), so allow
- * batch->pages to be touched only when there's more than one
- * pfn to check, which guarantees the pfns are from a
- * !VM_PFNMAP vma.
+ * pfn is preset for the first iteration of this inner loop
+ * due to the fact that vaddr_get_pfns() needs to provide the
+ * initial pfn for pfnmaps. Therefore to reduce redundancy,
+ * the next pfn is fetched at the end of the loop.
+ * A PageReserved() page could still qualify as page backed
+ * and rsvd here, and therefore continues to use the batch.
*/
while (true) {
if (pfn != *pfn_base + pinned ||
@@ -676,21 +713,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
pfn = page_to_pfn(batch->pages[batch->offset]);
}
-
- if (unlikely(disable_hugepages))
- break;
}
out:
ret = vfio_lock_acct(dma, lock_acct, false);
unpin_out:
- if (batch->size == 1 && !batch->offset) {
- /* May be a VM_PFNMAP pfn, which the batch can't remember. */
- put_pfn(pfn, dma->prot);
- batch->size = 0;
- }
-
if (ret < 0) {
if (pinned && !rsvd) {
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
@@ -705,7 +733,7 @@ unpin_out:
}
static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
- unsigned long pfn, long npage,
+ unsigned long pfn, unsigned long npage,
bool do_accounting)
{
long unlocked = 0, locked = 0;
@@ -728,7 +756,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
unsigned long *pfn_base, bool do_accounting)
{
- struct page *pages[1];
+ struct vfio_batch batch;
struct mm_struct *mm;
int ret;
@@ -736,7 +764,9 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
if (!mmget_not_zero(mm))
return -ENODEV;
- ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+ vfio_batch_init_single(&batch);
+
+ ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch);
if (ret != 1)
goto out;
@@ -755,6 +785,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
}
out:
+ vfio_batch_fini(&batch);
mmput(mm);
return ret;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32ba0e33422b..d66bc0e97632 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2495,11 +2495,13 @@ struct follow_pfnmap_args {
* Outputs:
*
* @pfn: the PFN of the address
+ * @addr_mask: address mask covering pfn
* @pgprot: the pgprot_t of the mapping
* @writable: whether the mapping is writable
* @special: whether the mapping is a special mapping (real PFN maps)
*/
unsigned long pfn;
+ unsigned long addr_mask;
pgprot_t pgprot;
bool writable;
bool special;
diff --git a/mm/memory.c b/mm/memory.c
index 6ea3551eb2df..2d8c265fc7d6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6670,6 +6670,7 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
args->lock = lock;
args->ptep = ptep;
args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
+ args->addr_mask = addr_mask;
args->pgprot = pgprot;
args->writable = writable;
args->special = special;