// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2025 Ant Group * Author: Tiwei Bie */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vfio_user.h" int uml_vfio_user_open_container(void) { int r, fd; fd = open("/dev/vfio/vfio", O_RDWR); if (fd < 0) return -errno; r = ioctl(fd, VFIO_GET_API_VERSION); if (r != VFIO_API_VERSION) { r = r < 0 ? -errno : -EINVAL; goto error; } r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); if (r <= 0) { r = r < 0 ? -errno : -EINVAL; goto error; } return fd; error: close(fd); return r; } int uml_vfio_user_setup_iommu(int container) { /* * This is a bit tricky. See the big comment in * vhost_user_set_mem_table() in virtio_uml.c. */ unsigned long reserved = uml_reserved - uml_physmem; struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map), .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, .vaddr = uml_reserved, .iova = reserved, .size = physmem_size - reserved, }; if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) return -errno; if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0) return -errno; return 0; } int uml_vfio_user_get_group_id(const char *device) { char *path, *buf, *end; const char *name; int r; path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); if (!path) return -ENOMEM; sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device); buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL); if (!buf) { r = -ENOMEM; goto free_path; } r = readlink(path, buf, PATH_MAX); if (r < 0) { r = -errno; goto free_buf; } buf[r] = '\0'; name = basename(buf); r = strtoul(name, &end, 10); if (*end != '\0' || end == name) { r = -EINVAL; goto free_buf; } free_buf: kfree(buf); free_path: kfree(path); return r; } int uml_vfio_user_open_group(int group_id) { char *path; int fd; path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); if (!path) return -ENOMEM; sprintf(path, "/dev/vfio/%d", group_id); fd = open(path, O_RDWR); if (fd < 0) { fd = -errno; goto out; } out: kfree(path); return fd; } int uml_vfio_user_set_container(int container, int group) { if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0) return -errno; return 0; } int uml_vfio_user_unset_container(int container, int group) { if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0) return -errno; return 0; } static int vfio_set_irqs(int device, int start, int count, int *irqfd) { struct vfio_irq_set *irq_set; int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count; int err = 0; irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL); if (!irq_set) return -ENOMEM; irq_set->argsz = argsz; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = start; irq_set->count = count; memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count); if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { err = -errno; goto out; } out: kfree(irq_set); return err; } int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, int group, const char *device) { struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; int err, i; dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device); if (dev->device < 0) return -errno; if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) { err = -errno; goto close_device; } dev->num_regions = device_info.num_regions; if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1) dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1; dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions, UM_GFP_KERNEL); if (!dev->region) { err = -ENOMEM; goto close_device; } for (i = 0; i < dev->num_regions; i++) { struct vfio_region_info region = { .argsz = sizeof(region), .index = i, }; if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) { err = -errno; goto free_region; } dev->region[i].size = region.size; dev->region[i].offset = region.offset; } /* Only MSI-X is supported currently. */ irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX; if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { err = -errno; goto free_region; } dev->irq_count = irq_info.count; dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL); if (!dev->irqfd) { err = -ENOMEM; goto free_region; } memset(dev->irqfd, -1, sizeof(int) * dev->irq_count); err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); if (err) goto free_irqfd; return 0; free_irqfd: kfree(dev->irqfd); free_region: kfree(dev->region); close_device: close(dev->device); return err; } void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev) { kfree(dev->irqfd); kfree(dev->region); close(dev->device); } int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index) { int irqfd; irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); if (irqfd < 0) return -errno; dev->irqfd[index] = irqfd; return irqfd; } void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index) { close(dev->irqfd[index]); dev->irqfd[index] = -1; } int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev) { return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); } static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index, uint64_t offset, void *buf, uint64_t size) { if (index >= dev->num_regions || offset + size > dev->region[index].size) return -EINVAL; if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0) return -errno; return 0; } static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index, uint64_t offset, const void *buf, uint64_t size) { if (index >= dev->num_regions || offset + size > dev->region[index].size) return -EINVAL; if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0) return -errno; return 0; } int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, unsigned int offset, void *buf, int size) { return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX, offset, buf, size); } int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, unsigned int offset, const void *buf, int size) { return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX, offset, buf, size); } int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, unsigned int offset, void *buf, int size) { return vfio_region_read(dev, bar, offset, buf, size); } int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, unsigned int offset, const void *buf, int size) { return vfio_region_write(dev, bar, offset, buf, size); }