summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig158
-rw-r--r--drivers/iommu/Makefile6
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h2
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h10
-rw-r--r--drivers/iommu/amd/init.c94
-rw-r--r--drivers/iommu/amd/io_pgtable.c38
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c12
-rw-r--r--drivers/iommu/amd/iommu.c94
-rw-r--r--drivers/iommu/amd/ppr.c2
-rw-r--r--drivers/iommu/apple-dart.c3
-rw-r--r--drivers/iommu/arm/Kconfig144
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c86
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c138
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h39
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c9
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c44
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c6
-rw-r--r--drivers/iommu/dma-iommu.c493
-rw-r--r--drivers/iommu/exynos-iommu.c12
-rw-r--r--drivers/iommu/fsl_pamu_domain.c2
-rw-r--r--drivers/iommu/intel/Makefile7
-rw-r--r--drivers/iommu/intel/dmar.c14
-rw-r--r--drivers/iommu/intel/iommu.c244
-rw-r--r--drivers/iommu/intel/iommu.h62
-rw-r--r--drivers/iommu/intel/irq_remapping.c12
-rw-r--r--drivers/iommu/intel/nested.c20
-rw-r--r--drivers/iommu/intel/pasid.c13
-rw-r--r--drivers/iommu/intel/pasid.h1
-rw-r--r--drivers/iommu/intel/prq.c7
-rw-r--r--drivers/iommu/intel/svm.c9
-rw-r--r--drivers/iommu/io-pgtable-arm.c58
-rw-r--r--drivers/iommu/io-pgtable-dart.c23
-rw-r--r--drivers/iommu/iommu-pages.c119
-rw-r--r--drivers/iommu/iommu-pages.h195
-rw-r--r--drivers/iommu/iommu-sva.c18
-rw-r--r--drivers/iommu/iommu.c191
-rw-r--r--drivers/iommu/iommufd/device.c59
-rw-r--r--drivers/iommu/iommufd/eventq.c48
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h6
-rw-r--r--drivers/iommu/iommufd/selftest.c57
-rw-r--r--drivers/iommu/ipmmu-vmsa.c3
-rw-r--r--drivers/iommu/mtk_iommu.c37
-rw-r--r--drivers/iommu/riscv/Makefile2
-rw-r--r--drivers/iommu/riscv/iommu.c43
-rw-r--r--drivers/iommu/rockchip-iommu.c14
-rw-r--r--drivers/iommu/s390-iommu.c345
-rw-r--r--drivers/iommu/sun50i-iommu.c6
-rw-r--r--drivers/iommu/tegra-smmu.c111
-rw-r--r--drivers/iommu/virtio-iommu.c187
50 files changed, 1990 insertions, 1315 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index cd750f512dee..0a33d995d15d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -192,6 +192,7 @@ config MSM_IOMMU
If unsure, say N here.
source "drivers/iommu/amd/Kconfig"
+source "drivers/iommu/arm/Kconfig"
source "drivers/iommu/intel/Kconfig"
source "drivers/iommu/iommufd/Kconfig"
source "drivers/iommu/riscv/Kconfig"
@@ -199,7 +200,6 @@ source "drivers/iommu/riscv/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
- select DMAR_TABLE if INTEL_IOMMU
help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
@@ -314,150 +314,6 @@ config APPLE_DART
Say Y here if you are using an Apple SoC.
-# ARM IOMMU support
-config ARM_SMMU
- tristate "ARM Ltd. System MMU (SMMU) Support"
- depends on ARM64 || ARM || COMPILE_TEST
- depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
- select IOMMU_API
- select IOMMU_IO_PGTABLE_LPAE
- select ARM_DMA_USE_IOMMU if ARM
- help
- Support for implementations of the ARM System MMU architecture
- versions 1 and 2.
-
- Say Y here if your SoC includes an IOMMU device implementing
- the ARM SMMU architecture.
-
-config ARM_SMMU_LEGACY_DT_BINDINGS
- bool "Support the legacy \"mmu-masters\" devicetree bindings"
- depends on ARM_SMMU=y && OF
- help
- Support for the badly designed and deprecated "mmu-masters"
- devicetree bindings. This allows some DMA masters to attach
- to the SMMU but does not provide any support via the DMA API.
- If you're lucky, you might be able to get VFIO up and running.
-
- If you say Y here then you'll make me very sad. Instead, say N
- and move your firmware to the utopian future that was 2016.
-
-config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
- bool "Default to disabling bypass on ARM SMMU v1 and v2"
- depends on ARM_SMMU
- default y
- help
- Say Y here to (by default) disable bypass streams such that
- incoming transactions from devices that are not attached to
- an iommu domain will report an abort back to the device and
- will not be allowed to pass through the SMMU.
-
- Any old kernels that existed before this KConfig was
- introduced would default to _allowing_ bypass (AKA the
- equivalent of NO for this config). However the default for
- this option is YES because the old behavior is insecure.
-
- There are few reasons to allow unmatched stream bypass, and
- even fewer good ones. If saying YES here breaks your board
- you should work on fixing your board. This KConfig option
- is expected to be removed in the future and we'll simply
- hardcode the bypass disable in the code.
-
- NOTE: the kernel command line parameter
- 'arm-smmu.disable_bypass' will continue to override this
- config.
-
-config ARM_SMMU_MMU_500_CPRE_ERRATA
- bool "Enable errata workaround for CPRE in SMMU reset path"
- depends on ARM_SMMU
- default y
- help
- Say Y here (by default) to apply workaround to disable
- MMU-500's next-page prefetcher for sake of 4 known errata.
-
- Say N here only when it is sure that any errata related to
- prefetch enablement are not applicable on the platform.
- Refer silicon-errata.rst for info on errata IDs.
-
-config ARM_SMMU_QCOM
- def_tristate y
- depends on ARM_SMMU && ARCH_QCOM
- select QCOM_SCM
- help
- When running on a Qualcomm platform that has the custom variant
- of the ARM SMMU, this needs to be built into the SMMU driver.
-
-config ARM_SMMU_QCOM_DEBUG
- bool "ARM SMMU QCOM implementation defined debug support"
- depends on ARM_SMMU_QCOM=y
- help
- Support for implementation specific debug features in ARM SMMU
- hardware found in QTI platforms. This include support for
- the Translation Buffer Units (TBU) that can be used to obtain
- additional information when debugging memory management issues
- like context faults.
-
- Say Y here to enable debug for issues such as context faults
- or TLB sync timeouts which requires implementation defined
- register dumps.
-
-config ARM_SMMU_V3
- tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
- depends on ARM64
- select IOMMU_API
- select IOMMU_IO_PGTABLE_LPAE
- select GENERIC_MSI_IRQ
- select IOMMUFD_DRIVER if IOMMUFD
- help
- Support for implementations of the ARM System MMU architecture
- version 3 providing translation support to a PCIe root complex.
-
- Say Y here if your system includes an IOMMU device implementing
- the ARM SMMUv3 architecture.
-
-if ARM_SMMU_V3
-config ARM_SMMU_V3_SVA
- bool "Shared Virtual Addressing support for the ARM SMMUv3"
- select IOMMU_SVA
- select IOMMU_IOPF
- select MMU_NOTIFIER
- help
- Support for sharing process address spaces with devices using the
- SMMUv3.
-
- Say Y here if your system supports SVA extensions such as PCIe PASID
- and PRI.
-
-config ARM_SMMU_V3_IOMMUFD
- bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)"
- depends on IOMMUFD
- help
- Support for IOMMUFD features intended to support virtual machines
- with accelerated virtual IOMMUs.
-
- Say Y here if you are doing development and testing on this feature.
-
-config ARM_SMMU_V3_KUNIT_TEST
- tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS
- depends on KUNIT
- depends on ARM_SMMU_V3_SVA
- default KUNIT_ALL_TESTS
- help
- Enable this option to unit-test arm-smmu-v3 driver functions.
-
- If unsure, say N.
-
-config TEGRA241_CMDQV
- bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
- depends on ACPI
- help
- Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
- CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
- support, except with virtualization capabilities.
-
- Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
- CMDQ-V extension.
-endif
-
config S390_IOMMU
def_bool y if S390 && PCI
depends on S390 && PCI
@@ -494,18 +350,6 @@ config MTK_IOMMU_V1
if unsure, say N here.
-config QCOM_IOMMU
- # Note: iommu drivers cannot (yet?) be built as modules
- bool "Qualcomm IOMMU Support"
- depends on ARCH_QCOM || COMPILE_TEST
- depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
- select QCOM_SCM
- select IOMMU_API
- select IOMMU_IO_PGTABLE_LPAE
- select ARM_DMA_USE_IOMMU
- help
- Support for IOMMU on certain Qualcomm SoCs.
-
config HYPERV_IOMMU
bool "Hyper-V IRQ Handling"
depends on HYPERV && X86
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 5e5a83c6c2aa..355294fa9033 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += amd/ intel/ arm/ iommufd/ riscv/
+obj-y += arm/ iommufd/
+obj-$(CONFIG_AMD_IOMMU) += amd/
+obj-$(CONFIG_INTEL_IOMMU) += intel/
+obj-$(CONFIG_RISCV_IOMMU) += riscv/
obj-$(CONFIG_IOMMU_API) += iommu.o
+obj-$(CONFIG_IOMMU_SUPPORT) += iommu-pages.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 9de33b2d42f5..59c04a67f398 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
+obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 220c598b7e14..29a8864381c3 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -147,6 +147,8 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev)
return PCI_SEG_DEVID_TO_SBDF(seg, devid);
}
+bool amd_iommu_ht_range_ignore(void);
+
/*
* This must be called after device probe completes. During probe
* use rlookup_amd_iommu() get the iommu.
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5089b58e528a..ccbab3a4811a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -29,8 +29,6 @@
* some size calculation constants
*/
#define DEV_TABLE_ENTRY_SIZE 32
-#define ALIAS_TABLE_ENTRY_SIZE 2
-#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
/* Capability offsets used by the driver */
#define MMIO_CAP_HDR_OFFSET 0x00
@@ -111,6 +109,7 @@
#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5)
#define FEATURE_SNPAVICSUP_GAM(x) \
(FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
+#define FEATURE_HT_RANGE_IGNORE BIT_ULL(11)
#define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8)
#define FEATURE_NUM_INT_REMAP_SUP_2K(x) \
@@ -316,6 +315,7 @@
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_REMAP_ENABLE 1ULL
+#define DTE_INTTAB_ALIGNMENT 128
#define DTE_INTTABLEN_MASK (0xfULL << 1)
#define DTE_INTTABLEN_VALUE_512 9ULL
#define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1)
@@ -616,12 +616,6 @@ struct amd_iommu_pci_seg {
/* Size of the device table */
u32 dev_table_size;
- /* Size of the alias table */
- u32 alias_table_size;
-
- /* Size of the rlookup table */
- u32 rlookup_table_size;
-
/*
* device table virtual address
*
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 14aa0d77df26..c06b62f87b9b 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -243,17 +243,14 @@ static void init_translation_status(struct amd_iommu *iommu)
iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
}
-static inline unsigned long tbl_size(int entry_size, int last_bdf)
+int amd_iommu_get_num_iommus(void)
{
- unsigned shift = PAGE_SHIFT +
- get_order((last_bdf + 1) * entry_size);
-
- return 1UL << shift;
+ return amd_iommus_present;
}
-int amd_iommu_get_num_iommus(void)
+bool amd_iommu_ht_range_ignore(void)
{
- return amd_iommus_present;
+ return check_feature2(FEATURE_HT_RANGE_IGNORE);
}
/*
@@ -634,8 +631,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_
/* Allocate per PCI segment device table */
static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
{
- pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
- get_order(pci_seg->dev_table_size));
+ pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+ pci_seg->dev_table_size);
if (!pci_seg->dev_table)
return -ENOMEM;
@@ -644,16 +641,16 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->dev_table,
- get_order(pci_seg->dev_table_size));
+ iommu_free_pages(pci_seg->dev_table);
pci_seg->dev_table = NULL;
}
/* Allocate per PCI segment IOMMU rlookup table. */
static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL,
- get_order(pci_seg->rlookup_table_size));
+ pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1,
+ sizeof(*pci_seg->rlookup_table),
+ GFP_KERNEL);
if (pci_seg->rlookup_table == NULL)
return -ENOMEM;
@@ -662,17 +659,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->rlookup_table,
- get_order(pci_seg->rlookup_table_size));
+ kvfree(pci_seg->rlookup_table);
pci_seg->rlookup_table = NULL;
}
static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL,
- get_order(pci_seg->rlookup_table_size));
- kmemleak_alloc(pci_seg->irq_lookup_table,
- pci_seg->rlookup_table_size, 1, GFP_KERNEL);
+ pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1,
+ sizeof(*pci_seg->irq_lookup_table),
+ GFP_KERNEL);
if (pci_seg->irq_lookup_table == NULL)
return -ENOMEM;
@@ -681,9 +676,7 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se
static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- kmemleak_free(pci_seg->irq_lookup_table);
- iommu_free_pages(pci_seg->irq_lookup_table,
- get_order(pci_seg->rlookup_table_size));
+ kvfree(pci_seg->irq_lookup_table);
pci_seg->irq_lookup_table = NULL;
}
@@ -691,8 +684,9 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
{
int i;
- pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL,
- get_order(pci_seg->alias_table_size));
+ pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1,
+ sizeof(*pci_seg->alias_table),
+ GFP_KERNEL);
if (!pci_seg->alias_table)
return -ENOMEM;
@@ -707,8 +701,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->alias_table,
- get_order(pci_seg->alias_table_size));
+ kvfree(pci_seg->alias_table);
pci_seg->alias_table = NULL;
}
@@ -719,8 +712,7 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
*/
static int __init alloc_command_buffer(struct amd_iommu *iommu)
{
- iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL,
- get_order(CMD_BUFFER_SIZE));
+ iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE);
return iommu->cmd_buf ? 0 : -ENOMEM;
}
@@ -817,20 +809,22 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu)
static void __init free_command_buffer(struct amd_iommu *iommu)
{
- iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
+ iommu_free_pages(iommu->cmd_buf);
}
void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
size_t size)
{
- int order = get_order(size);
- void *buf = iommu_alloc_pages(gfp, order);
+ void *buf;
- if (buf &&
- check_feature(FEATURE_SNP) &&
- set_memory_4k((unsigned long)buf, (1 << order))) {
- iommu_free_pages(buf, order);
- buf = NULL;
+ size = PAGE_ALIGN(size);
+ buf = iommu_alloc_pages_sz(gfp, size);
+ if (!buf)
+ return NULL;
+ if (check_feature(FEATURE_SNP) &&
+ set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) {
+ iommu_free_pages(buf);
+ return NULL;
}
return buf;
@@ -873,14 +867,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu)
static void __init free_event_buffer(struct amd_iommu *iommu)
{
- iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
+ iommu_free_pages(iommu->evt_buf);
}
static void free_ga_log(struct amd_iommu *iommu)
{
#ifdef CONFIG_IRQ_REMAP
- iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE));
- iommu_free_pages(iommu->ga_log_tail, get_order(8));
+ iommu_free_pages(iommu->ga_log);
+ iommu_free_pages(iommu->ga_log_tail);
#endif
}
@@ -925,11 +919,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
return 0;
- iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE));
+ iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE);
if (!iommu->ga_log)
goto err_out;
- iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8));
+ iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8);
if (!iommu->ga_log_tail)
goto err_out;
@@ -950,7 +944,7 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
static void __init free_cwwb_sem(struct amd_iommu *iommu)
{
if (iommu->cmd_sem)
- iommu_free_page((void *)iommu->cmd_sem);
+ iommu_free_pages((void *)iommu->cmd_sem);
}
static void iommu_enable_xt(struct amd_iommu *iommu)
@@ -1024,8 +1018,8 @@ static bool __copy_device_table(struct amd_iommu *iommu)
if (!old_devtb)
return false;
- pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
- get_order(pci_seg->dev_table_size));
+ pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz(
+ GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size);
if (pci_seg->old_dev_tbl_cpy == NULL) {
pr_err("Failed to allocate memory for copying old device table!\n");
memunmap(old_devtb);
@@ -1599,9 +1593,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
pci_seg->last_bdf = last_bdf;
DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
- pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
- pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
- pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
+ pci_seg->dev_table_size =
+ max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE),
+ SZ_4K);
pci_seg->id = id;
init_llist_head(&pci_seg->dev_data_list);
@@ -2789,8 +2783,7 @@ static void early_enable_iommus(void)
for_each_pci_segment(pci_seg) {
if (pci_seg->old_dev_tbl_cpy != NULL) {
- iommu_free_pages(pci_seg->old_dev_tbl_cpy,
- get_order(pci_seg->dev_table_size));
+ iommu_free_pages(pci_seg->old_dev_tbl_cpy);
pci_seg->old_dev_tbl_cpy = NULL;
}
}
@@ -2803,8 +2796,7 @@ static void early_enable_iommus(void)
pr_info("Copied DEV table from previous kernel.\n");
for_each_pci_segment(pci_seg) {
- iommu_free_pages(pci_seg->dev_table,
- get_order(pci_seg->dev_table_size));
+ iommu_free_pages(pci_seg->dev_table);
pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 26cf562dde11..4d308c071134 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -47,14 +47,7 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
return fpte;
}
-static void free_pt_page(u64 *pt, struct list_head *freelist)
-{
- struct page *p = virt_to_page(pt);
-
- list_add_tail(&p->lru, freelist);
-}
-
-static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
+static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl)
{
u64 *p;
int i;
@@ -77,20 +70,20 @@ static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
if (lvl > 2)
free_pt_lvl(p, freelist, lvl - 1);
else
- free_pt_page(p, freelist);
+ iommu_pages_list_add(freelist, p);
}
- free_pt_page(pt, freelist);
+ iommu_pages_list_add(freelist, pt);
}
-static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
+static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist)
{
switch (mode) {
case PAGE_MODE_NONE:
case PAGE_MODE_7_LEVEL:
break;
case PAGE_MODE_1_LEVEL:
- free_pt_page(root, freelist);
+ iommu_pages_list_add(freelist, root);
break;
case PAGE_MODE_2_LEVEL:
case PAGE_MODE_3_LEVEL:
@@ -121,7 +114,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
bool ret = true;
u64 *pte;
- pte = iommu_alloc_page_node(cfg->amd.nid, gfp);
+ pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K);
if (!pte)
return false;
@@ -146,7 +139,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
out:
spin_unlock_irqrestore(&domain->lock, flags);
- iommu_free_page(pte);
+ iommu_free_pages(pte);
return ret;
}
@@ -213,7 +206,8 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
if (!IOMMU_PTE_PRESENT(__pte) ||
pte_level == PAGE_MODE_NONE) {
- page = iommu_alloc_page_node(cfg->amd.nid, gfp);
+ page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp,
+ SZ_4K);
if (!page)
return NULL;
@@ -222,7 +216,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
/* pte could have been changed somewhere. */
if (!try_cmpxchg64(pte, &__pte, __npte))
- iommu_free_page(page);
+ iommu_free_pages(page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
@@ -299,7 +293,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
return pte;
}
-static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
+static void free_clear_pte(u64 *pte, u64 pteval,
+ struct iommu_pages_list *freelist)
{
u64 *pt;
int mode;
@@ -328,7 +323,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
int prot, gfp_t gfp, size_t *mapped)
{
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
bool updated = false;
u64 __pte, *pte;
int ret, i, count;
@@ -353,7 +348,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
for (i = 0; i < count; ++i)
free_clear_pte(&pte[i], pte[i], &freelist);
- if (!list_empty(&freelist))
+ if (!iommu_pages_list_empty(&freelist))
updated = true;
if (count > 1) {
@@ -524,7 +519,7 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
static void v1_free_pgtable(struct io_pgtable *iop)
{
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
if (pgtable->mode == PAGE_MODE_NONE)
return;
@@ -541,7 +536,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
{
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
- pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
+ pgtable->root =
+ iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
if (!pgtable->root)
return NULL;
pgtable->mode = PAGE_MODE_3_LEVEL;
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index a56a27396305..b47941353ccb 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -121,10 +121,10 @@ static void free_pgtable(u64 *pt, int level)
if (level > 2)
free_pgtable(p, level - 1);
else
- iommu_free_page(p);
+ iommu_free_pages(p);
}
- iommu_free_page(pt);
+ iommu_free_pages(pt);
}
/* Allocate page table */
@@ -152,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
}
if (!IOMMU_PTE_PRESENT(__pte)) {
- page = iommu_alloc_page_node(nid, gfp);
+ page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K);
if (!page)
return NULL;
__npte = set_pgtable_attr(page);
/* pte could have been changed somewhere. */
if (!try_cmpxchg64(pte, &__pte, __npte))
- iommu_free_page(page);
+ iommu_free_pages(page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
@@ -181,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
if (pg_size == IOMMU_PAGE_SIZE_1G)
free_pgtable(__pte, end_level - 1);
else if (pg_size == IOMMU_PAGE_SIZE_2M)
- iommu_free_page(__pte);
+ iommu_free_pages(__pte);
}
return pte;
@@ -346,7 +346,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
int ias = IOMMU_IN_ADDR_BIT_SIZE;
- pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
+ pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
if (!pgtable->pgd)
return NULL;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f34209b08b4c..3117d99cf83d 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -241,7 +241,9 @@ static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
- struct acpihid_map_entry *p;
+ struct acpihid_map_entry *p, *p1 = NULL;
+ int hid_count = 0;
+ bool fw_bug;
if (!adev)
return -ENODEV;
@@ -249,12 +251,33 @@ static inline int get_acpihid_device_id(struct device *dev,
list_for_each_entry(p, &acpihid_map, list) {
if (acpi_dev_hid_uid_match(adev, p->hid,
p->uid[0] ? p->uid : NULL)) {
- if (entry)
- *entry = p;
- return p->devid;
+ p1 = p;
+ fw_bug = false;
+ hid_count = 1;
+ break;
+ }
+
+ /*
+ * Count HID matches w/o UID, raise FW_BUG but allow exactly one match
+ */
+ if (acpi_dev_hid_match(adev, p->hid)) {
+ p1 = p;
+ hid_count++;
+ fw_bug = true;
}
}
- return -EINVAL;
+
+ if (!p1)
+ return -EINVAL;
+ if (fw_bug)
+ dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n",
+ hid_count, hid_count > 1 ? "s" : "");
+ if (hid_count > 1)
+ return -EINVAL;
+ if (entry)
+ *entry = p1;
+
+ return p1->devid;
}
static inline int get_device_sbdf_id(struct device *dev)
@@ -982,6 +1005,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
{
iommu_ga_log_notifier = notifier;
+ /*
+ * Ensure all in-flight IRQ handlers run to completion before returning
+ * to the caller, e.g. to ensure module code isn't unloaded while it's
+ * being executed in the IRQ handler.
+ */
+ if (!notifier)
+ synchronize_rcu();
+
return 0;
}
EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
@@ -1812,7 +1843,7 @@ static void free_gcr3_tbl_level1(u64 *tbl)
ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
- iommu_free_page(ptr);
+ iommu_free_pages(ptr);
}
}
@@ -1845,7 +1876,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
/* Free per device domain ID */
pdom_id_free(gcr3_info->domid);
- iommu_free_page(gcr3_info->gcr3_tbl);
+ iommu_free_pages(gcr3_info->gcr3_tbl);
gcr3_info->gcr3_tbl = NULL;
}
@@ -1884,7 +1915,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
return -ENOSPC;
gcr3_info->domid = domid;
- gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC);
+ gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K);
if (gcr3_info->gcr3_tbl == NULL) {
pdom_id_free(domid);
return -ENOMEM;
@@ -2908,6 +2939,9 @@ static void amd_iommu_get_resv_regions(struct device *dev,
return;
list_add_tail(&region->list, head);
+ if (amd_iommu_ht_range_ignore())
+ return;
+
region = iommu_alloc_resv_region(HT_RANGE_START,
HT_RANGE_END - HT_RANGE_START + 1,
0, IOMMU_RESV_RESERVED, GFP_KERNEL);
@@ -2984,38 +3018,6 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
};
-static int amd_iommu_dev_enable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- int ret = 0;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- case IOMMU_DEV_FEAT_SVA:
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
-static int amd_iommu_dev_disable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- int ret = 0;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- case IOMMU_DEV_FEAT_SVA:
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.blocked_domain = &blocked_domain,
@@ -3029,8 +3031,6 @@ const struct iommu_ops amd_iommu_ops = {
.get_resv_regions = amd_iommu_get_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.def_domain_type = amd_iommu_def_domain_type,
- .dev_enable_feat = amd_iommu_dev_enable_feature,
- .dev_disable_feat = amd_iommu_dev_disable_feature,
.page_response = amd_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
@@ -3129,7 +3129,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
return table;
}
-static struct irq_remap_table *__alloc_irq_table(int nid, int order)
+static struct irq_remap_table *__alloc_irq_table(int nid, size_t size)
{
struct irq_remap_table *table;
@@ -3137,7 +3137,8 @@ static struct irq_remap_table *__alloc_irq_table(int nid, int order)
if (!table)
return NULL;
- table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order);
+ table->table = iommu_alloc_pages_node_sz(
+ nid, GFP_KERNEL, max(DTE_INTTAB_ALIGNMENT, size));
if (!table->table) {
kfree(table);
return NULL;
@@ -3191,7 +3192,6 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
struct irq_remap_table *new_table = NULL;
struct amd_iommu_pci_seg *pci_seg;
unsigned long flags;
- int order = get_order(get_irq_table_size(max_irqs));
int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
u16 alias;
@@ -3211,7 +3211,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- new_table = __alloc_irq_table(nid, order);
+ new_table = __alloc_irq_table(nid, get_irq_table_size(max_irqs));
if (!new_table)
return NULL;
@@ -3246,7 +3246,7 @@ out_unlock:
spin_unlock_irqrestore(&iommu_table_lock, flags);
if (new_table) {
- iommu_free_pages(new_table->table, order);
+ iommu_free_pages(new_table->table);
kfree(new_table);
}
return table;
diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c
index 7c67d69f0b8c..e6767c057d01 100644
--- a/drivers/iommu/amd/ppr.c
+++ b/drivers/iommu/amd/ppr.c
@@ -48,7 +48,7 @@ void amd_iommu_enable_ppr_log(struct amd_iommu *iommu)
void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu)
{
- iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE));
+ iommu_free_pages(iommu->ppr_log);
}
/*
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index e13501541fdd..757d24f67ad4 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -776,8 +776,7 @@ static void apple_dart_domain_free(struct iommu_domain *domain)
{
struct apple_dart_domain *dart_domain = to_dart_domain(domain);
- if (dart_domain->pgtbl_ops)
- free_io_pgtable_ops(dart_domain->pgtbl_ops);
+ free_io_pgtable_ops(dart_domain->pgtbl_ops);
kfree(dart_domain);
}
diff --git a/drivers/iommu/arm/Kconfig b/drivers/iommu/arm/Kconfig
new file mode 100644
index 000000000000..ef42bbe07dbe
--- /dev/null
+++ b/drivers/iommu/arm/Kconfig
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# ARM IOMMU support
+config ARM_SMMU
+ tristate "ARM Ltd. System MMU (SMMU) Support"
+ depends on ARM64 || ARM || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
+ select IOMMU_API
+ select IOMMU_IO_PGTABLE_LPAE
+ select ARM_DMA_USE_IOMMU if ARM
+ help
+ Support for implementations of the ARM System MMU architecture
+ versions 1 and 2.
+
+ Say Y here if your SoC includes an IOMMU device implementing
+ the ARM SMMU architecture.
+
+if ARM_SMMU
+config ARM_SMMU_LEGACY_DT_BINDINGS
+ bool "Support the legacy \"mmu-masters\" devicetree bindings"
+ depends on ARM_SMMU=y && OF
+ help
+ Support for the badly designed and deprecated "mmu-masters"
+ devicetree bindings. This allows some DMA masters to attach
+ to the SMMU but does not provide any support via the DMA API.
+ If you're lucky, you might be able to get VFIO up and running.
+
+ If you say Y here then you'll make me very sad. Instead, say N
+ and move your firmware to the utopian future that was 2016.
+
+config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
+ bool "Disable unmatched stream bypass by default" if EXPERT
+ default y
+ help
+ If your firmware is broken and fails to describe StreamIDs which
+ Linux should know about in order to manage the SMMU correctly and
+ securely, and you don't want to boot with the 'arm-smmu.disable_bypass=0'
+ command line parameter, then as a last resort you can turn it off
+ by default here. But don't. This option may be removed at any time.
+
+ Note that 'arm-smmu.disable_bypass=1' will still take precedence.
+
+config ARM_SMMU_MMU_500_CPRE_ERRATA
+ bool "Enable errata workaround for CPRE in SMMU reset path"
+ default y
+ help
+ Say Y here (by default) to apply workaround to disable
+ MMU-500's next-page prefetcher for sake of 4 known errata.
+
+ Say N here only when it is sure that any errata related to
+ prefetch enablement are not applicable on the platform.
+ Refer silicon-errata.rst for info on errata IDs.
+
+config ARM_SMMU_QCOM
+ def_tristate y
+ depends on ARCH_QCOM
+ select QCOM_SCM
+ help
+ When running on a Qualcomm platform that has the custom variant
+ of the ARM SMMU, this needs to be built into the SMMU driver.
+
+config ARM_SMMU_QCOM_DEBUG
+ bool "ARM SMMU QCOM implementation defined debug support"
+ depends on ARM_SMMU_QCOM=y
+ help
+ Support for implementation specific debug features in ARM SMMU
+ hardware found in QTI platforms. This include support for
+ the Translation Buffer Units (TBU) that can be used to obtain
+ additional information when debugging memory management issues
+ like context faults.
+
+ Say Y here to enable debug for issues such as context faults
+ or TLB sync timeouts which requires implementation defined
+ register dumps.
+endif
+
+config ARM_SMMU_V3
+ tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
+ depends on ARM64
+ select IOMMU_API
+ select IOMMU_IO_PGTABLE_LPAE
+ select GENERIC_MSI_IRQ
+ select IOMMUFD_DRIVER if IOMMUFD
+ help
+ Support for implementations of the ARM System MMU architecture
+ version 3 providing translation support to a PCIe root complex.
+
+ Say Y here if your system includes an IOMMU device implementing
+ the ARM SMMUv3 architecture.
+
+if ARM_SMMU_V3
+config ARM_SMMU_V3_SVA
+ bool "Shared Virtual Addressing support for the ARM SMMUv3"
+ select IOMMU_SVA
+ select IOMMU_IOPF
+ select MMU_NOTIFIER
+ help
+ Support for sharing process address spaces with devices using the
+ SMMUv3.
+
+ Say Y here if your system supports SVA extensions such as PCIe PASID
+ and PRI.
+
+config ARM_SMMU_V3_IOMMUFD
+ bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)"
+ depends on IOMMUFD
+ help
+ Support for IOMMUFD features intended to support virtual machines
+ with accelerated virtual IOMMUs.
+
+ Say Y here if you are doing development and testing on this feature.
+
+config ARM_SMMU_V3_KUNIT_TEST
+ tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ depends on ARM_SMMU_V3_SVA
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to unit-test arm-smmu-v3 driver functions.
+
+ If unsure, say N.
+
+config TEGRA241_CMDQV
+ bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
+ depends on ACPI
+ help
+ Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
+ CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
+ support, except with virtualization capabilities.
+
+ Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
+ CMDQ-V extension.
+endif
+
+config QCOM_IOMMU
+ # Note: iommu drivers cannot (yet?) be built as modules
+ bool "Qualcomm IOMMU Support"
+ depends on ARCH_QCOM || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
+ select QCOM_SCM
+ select IOMMU_API
+ select IOMMU_IO_PGTABLE_LPAE
+ select ARM_DMA_USE_IOMMU
+ help
+ Support for IOMMU on certain Qualcomm SoCs.
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 980cc6b33c43..0601dece0a0d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -13,8 +13,6 @@
#include "arm-smmu-v3.h"
#include "../../io-pgtable-arm.h"
-static DEFINE_MUTEX(sva_lock);
-
static void __maybe_unused
arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
{
@@ -257,84 +255,6 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
return true;
}
-bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
-{
- /* We're not keeping track of SIDs in fault events */
- if (master->num_streams != 1)
- return false;
-
- return master->stall_enabled;
-}
-
-bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
-{
- if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
- return false;
-
- /* SSID support is mandatory for the moment */
- return master->ssid_bits;
-}
-
-bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
-{
- bool enabled;
-
- mutex_lock(&sva_lock);
- enabled = master->sva_enabled;
- mutex_unlock(&sva_lock);
- return enabled;
-}
-
-static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
-{
- struct device *dev = master->dev;
-
- /*
- * Drivers for devices supporting PRI or stall should enable IOPF first.
- * Others have device-specific fault handlers and don't need IOPF.
- */
- if (!arm_smmu_master_iopf_supported(master))
- return 0;
-
- if (!master->iopf_enabled)
- return -EINVAL;
-
- return iopf_queue_add_device(master->smmu->evtq.iopf, dev);
-}
-
-static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
-{
- struct device *dev = master->dev;
-
- if (!master->iopf_enabled)
- return;
-
- iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
-}
-
-int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
-{
- int ret;
-
- mutex_lock(&sva_lock);
- ret = arm_smmu_master_sva_enable_iopf(master);
- if (!ret)
- master->sva_enabled = true;
- mutex_unlock(&sva_lock);
-
- return ret;
-}
-
-int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
-{
- mutex_lock(&sva_lock);
- arm_smmu_master_sva_disable_iopf(master);
- master->sva_enabled = false;
- mutex_unlock(&sva_lock);
-
- return 0;
-}
-
void arm_smmu_sva_notifier_synchronize(void)
{
/*
@@ -353,6 +273,9 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct arm_smmu_cd target;
int ret;
+ if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
+ return -EOPNOTSUPP;
+
/* Prevent arm_smmu_mm_release from being called while we are attaching */
if (!mmget_not_zero(domain->mm))
return -EINVAL;
@@ -406,6 +329,9 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
u32 asid;
int ret;
+ if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
+ return ERR_PTR(-EOPNOTSUPP);
+
smmu_domain = arm_smmu_domain_alloc();
if (IS_ERR(smmu_domain))
return ERR_CAST(smmu_domain);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 48d910399a1b..10cc6dc26b7b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2720,6 +2720,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
static struct arm_smmu_master_domain *
arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
+ struct iommu_domain *domain,
struct arm_smmu_master *master,
ioasid_t ssid, bool nested_ats_flush)
{
@@ -2730,6 +2731,7 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
list_for_each_entry(master_domain, &smmu_domain->devices,
devices_elm) {
if (master_domain->master == master &&
+ master_domain->domain == domain &&
master_domain->ssid == ssid &&
master_domain->nested_ats_flush == nested_ats_flush)
return master_domain;
@@ -2756,6 +2758,58 @@ to_smmu_domain_devices(struct iommu_domain *domain)
return NULL;
}
+static int arm_smmu_enable_iopf(struct arm_smmu_master *master,
+ struct arm_smmu_master_domain *master_domain)
+{
+ int ret;
+
+ iommu_group_mutex_assert(master->dev);
+
+ if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
+ return -EOPNOTSUPP;
+
+ /*
+ * Drivers for devices supporting PRI or stall require iopf others have
+ * device-specific fault handlers and don't need IOPF, so this is not a
+ * failure.
+ */
+ if (!master->stall_enabled)
+ return 0;
+
+ /* We're not keeping track of SIDs in fault events */
+ if (master->num_streams != 1)
+ return -EOPNOTSUPP;
+
+ if (master->iopf_refcount) {
+ master->iopf_refcount++;
+ master_domain->using_iopf = true;
+ return 0;
+ }
+
+ ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev);
+ if (ret)
+ return ret;
+ master->iopf_refcount = 1;
+ master_domain->using_iopf = true;
+ return 0;
+}
+
+static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
+ struct arm_smmu_master_domain *master_domain)
+{
+ iommu_group_mutex_assert(master->dev);
+
+ if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
+ return;
+
+ if (!master_domain || !master_domain->using_iopf)
+ return;
+
+ master->iopf_refcount--;
+ if (master->iopf_refcount == 0)
+ iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
+}
+
static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
struct iommu_domain *domain,
ioasid_t ssid)
@@ -2772,15 +2826,17 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid,
- nested_ats_flush);
+ master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master,
+ ssid, nested_ats_flush);
if (master_domain) {
list_del(&master_domain->devices_elm);
- kfree(master_domain);
if (master->ats_enabled)
atomic_dec(&smmu_domain->nr_ats_masters);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ arm_smmu_disable_iopf(master, master_domain);
+ kfree(master_domain);
}
/*
@@ -2853,12 +2909,19 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
kfree(state->vmaster);
return -ENOMEM;
}
+ master_domain->domain = new_domain;
master_domain->master = master;
master_domain->ssid = state->ssid;
if (new_domain->type == IOMMU_DOMAIN_NESTED)
master_domain->nested_ats_flush =
to_smmu_nested_domain(new_domain)->enable_ats;
+ if (new_domain->iopf_handler) {
+ ret = arm_smmu_enable_iopf(master, master_domain);
+ if (ret)
+ goto err_free_master_domain;
+ }
+
/*
* During prepare we want the current smmu_domain and new
* smmu_domain to be in the devices list before we change any
@@ -2878,9 +2941,9 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
!arm_smmu_master_canwbs(master)) {
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
- kfree(master_domain);
kfree(state->vmaster);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_iopf;
}
if (state->ats_enabled)
@@ -2899,6 +2962,12 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
wmb();
}
return 0;
+
+err_iopf:
+ arm_smmu_disable_iopf(master, master_domain);
+err_free_master_domain:
+ kfree(master_domain);
+ return ret;
}
/*
@@ -2953,7 +3022,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
smmu = master->smmu;
if (smmu_domain->smmu != smmu)
- return ret;
+ return -EINVAL;
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
@@ -3510,8 +3579,7 @@ static void arm_smmu_release_device(struct device *dev)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- if (WARN_ON(arm_smmu_master_sva_enabled(master)))
- iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+ WARN_ON(master->iopf_refcount);
/* Put the STE back to what arm_smmu_init_strtab() sets */
if (dev->iommu->require_direct)
@@ -3586,58 +3654,6 @@ static void arm_smmu_get_resv_regions(struct device *dev,
iommu_dma_get_resv_regions(dev, head);
}
-static int arm_smmu_dev_enable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-
- if (!master)
- return -ENODEV;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- if (!arm_smmu_master_iopf_supported(master))
- return -EINVAL;
- if (master->iopf_enabled)
- return -EBUSY;
- master->iopf_enabled = true;
- return 0;
- case IOMMU_DEV_FEAT_SVA:
- if (!arm_smmu_master_sva_supported(master))
- return -EINVAL;
- if (arm_smmu_master_sva_enabled(master))
- return -EBUSY;
- return arm_smmu_master_enable_sva(master);
- default:
- return -EINVAL;
- }
-}
-
-static int arm_smmu_dev_disable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-
- if (!master)
- return -EINVAL;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- if (!master->iopf_enabled)
- return -EINVAL;
- if (master->sva_enabled)
- return -EBUSY;
- master->iopf_enabled = false;
- return 0;
- case IOMMU_DEV_FEAT_SVA:
- if (!arm_smmu_master_sva_enabled(master))
- return -EINVAL;
- return arm_smmu_master_disable_sva(master);
- default:
- return -EINVAL;
- }
-}
-
/*
* HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
* PCIe link and save the data to memory by DMA. The hardware is restricted to
@@ -3670,8 +3686,6 @@ static struct iommu_ops arm_smmu_ops = {
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .dev_enable_feat = arm_smmu_dev_enable_feature,
- .dev_disable_feat = arm_smmu_dev_disable_feature,
.page_response = arm_smmu_page_response,
.def_domain_type = arm_smmu_def_domain_type,
.viommu_alloc = arm_vsmmu_alloc,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index dd1ad56ce863..ea41d790463e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -837,9 +837,8 @@ struct arm_smmu_master {
bool ats_enabled : 1;
bool ste_ats_enabled : 1;
bool stall_enabled;
- bool sva_enabled;
- bool iopf_enabled;
unsigned int ssid_bits;
+ unsigned int iopf_refcount;
};
/* SMMU private data for an IOMMU domain */
@@ -915,8 +914,14 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master_domain {
struct list_head devices_elm;
struct arm_smmu_master *master;
+ /*
+ * For nested domains the master_domain is threaded onto the S2 parent,
+ * this points to the IOMMU_DOMAIN_NESTED to disambiguate the masters.
+ */
+ struct iommu_domain *domain;
ioasid_t ssid;
bool nested_ats_flush : 1;
+ bool using_iopf : 1;
};
static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
@@ -995,11 +1000,6 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
#ifdef CONFIG_ARM_SMMU_V3_SVA
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
-bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
-bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
-int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
-int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
-bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
void arm_smmu_sva_notifier_synchronize(void);
struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
struct mm_struct *mm);
@@ -1009,31 +1009,6 @@ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
return false;
}
-static inline bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
-{
- return false;
-}
-
-static inline bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
-{
- return false;
-}
-
-static inline int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
-{
- return -ENODEV;
-}
-
-static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
-{
- return -ENODEV;
-}
-
-static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
-{
- return false;
-}
-
static inline void arm_smmu_sva_notifier_synchronize(void) {}
#define arm_smmu_sva_domain_alloc NULL
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index d03b2239baad..65e0ef6539fe 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -406,6 +406,12 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
arm_smmu_print_context_fault_info(smmu, idx, &cfi);
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);
+
+ if (cfi.fsr & ARM_SMMU_CB_FSR_SS) {
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
+ ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
+ }
+
return IRQ_HANDLED;
}
@@ -416,6 +422,9 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
if (!tmp || tmp == -EBUSY) {
ret = IRQ_HANDLED;
resume = ARM_SMMU_RESUME_TERMINATE;
+ } else if (tmp == -EAGAIN) {
+ ret = IRQ_HANDLED;
+ resume = 0;
} else {
phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 59d02687280e..62874b18f645 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -112,25 +112,39 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled)
{
struct arm_smmu_domain *smmu_domain = (void *)cookie;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ u32 mask = BIT(cfg->cbndx);
+ bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled;
+ unsigned long flags;
if (enabled)
- qsmmu->stall_enabled |= BIT(cfg->cbndx);
+ qsmmu->stall_enabled |= mask;
else
- qsmmu->stall_enabled &= ~BIT(cfg->cbndx);
-}
+ qsmmu->stall_enabled &= ~mask;
-static void qcom_adreno_smmu_resume_translation(const void *cookie, bool terminate)
-{
- struct arm_smmu_domain *smmu_domain = (void *)cookie;
- struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- u32 reg = 0;
+ /*
+ * If the device is on and we changed the setting, update the register.
+ * The spec pseudocode says that CFCFG is resampled after a fault, and
+ * we believe that no implementations cache it in the TLB, so it should
+ * be safe to change it without a TLB invalidation.
+ */
+ if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) {
+ u32 reg;
+
+ spin_lock_irqsave(&smmu_domain->cb_lock, flags);
+ reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR);
+
+ if (enabled)
+ reg |= ARM_SMMU_SCTLR_CFCFG;
+ else
+ reg &= ~ARM_SMMU_SCTLR_CFCFG;
- if (terminate)
- reg |= ARM_SMMU_RESUME_TERMINATE;
+ arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg);
+ spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
- arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
+ pm_runtime_put_autosuspend(smmu->dev);
+ }
}
static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
@@ -337,7 +351,6 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
priv->set_ttbr0_cfg = qcom_adreno_smmu_set_ttbr0_cfg;
priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
priv->set_stall = qcom_adreno_smmu_set_stall;
- priv->resume_translation = qcom_adreno_smmu_resume_translation;
priv->set_prr_bit = NULL;
priv->set_prr_addr = NULL;
@@ -356,6 +369,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,mdp4" },
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,qcm2290-mdss" },
+ { .compatible = "qcom,sar2130p-mdss" },
{ .compatible = "qcom,sc7180-mdss" },
{ .compatible = "qcom,sc7180-mss-pil" },
{ .compatible = "qcom,sc7280-mdss" },
@@ -585,6 +599,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr,
.tlb_sync = qcom_smmu_tlb_sync,
+ .context_fault_needs_threaded_irq = true,
};
static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
@@ -594,6 +609,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr,
.tlb_sync = qcom_smmu_tlb_sync,
+ .context_fault_needs_threaded_irq = true,
};
static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 8f439c265a23..8d95b14c7d5a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -474,6 +474,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
arm_smmu_print_context_fault_info(smmu, idx, &cfi);
arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr);
+
+ if (cfi.fsr & ARM_SMMU_CB_FSR_SS) {
+ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME,
+ ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
+ }
+
return IRQ_HANDLED;
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a775e4dbe06f..6c708fec48d1 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -27,6 +27,7 @@
#include <linux/msi.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/swiotlb.h>
@@ -105,7 +106,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup);
struct iova_fq_entry {
unsigned long iova_pfn;
unsigned long pages;
- struct list_head freelist;
+ struct iommu_pages_list freelist;
u64 counter; /* Flush counter when this entry was added */
};
@@ -154,6 +155,8 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq
fq->entries[idx].iova_pfn,
fq->entries[idx].pages);
+ fq->entries[idx].freelist =
+ IOMMU_PAGES_LIST_INIT(fq->entries[idx].freelist);
fq->head = (fq->head + 1) & fq->mod_mask;
}
}
@@ -192,7 +195,7 @@ static void fq_flush_timeout(struct timer_list *t)
static void queue_iova(struct iommu_dma_cookie *cookie,
unsigned long pfn, unsigned long pages,
- struct list_head *freelist)
+ struct iommu_pages_list *freelist)
{
struct iova_fq *fq;
unsigned long flags;
@@ -231,7 +234,7 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
fq->entries[idx].iova_pfn = pfn;
fq->entries[idx].pages = pages;
fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt);
- list_splice(freelist, &fq->entries[idx].freelist);
+ iommu_pages_list_splice(freelist, &fq->entries[idx].freelist);
spin_unlock_irqrestore(&fq->lock, flags);
@@ -289,7 +292,8 @@ static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
spin_lock_init(&fq->lock);
for (i = 0; i < fq_size; i++)
- INIT_LIST_HEAD(&fq->entries[i].freelist);
+ fq->entries[i].freelist =
+ IOMMU_PAGES_LIST_INIT(fq->entries[i].freelist);
}
static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
@@ -1137,6 +1141,54 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
}
+static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
+
+ if (!is_swiotlb_active(dev)) {
+ dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ trace_swiotlb_bounced(dev, phys, size);
+
+ phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir,
+ attrs);
+
+ /*
+ * Untrusted devices should not see padding areas with random leftover
+ * kernel data, so zero the pre- and post-padding.
+ * swiotlb_tbl_map_single() has initialized the bounce buffer proper to
+ * the contents of the original memory buffer.
+ */
+ if (phys != (phys_addr_t)DMA_MAPPING_ERROR && dev_is_untrusted(dev)) {
+ size_t start, virt = (size_t)phys_to_virt(phys);
+
+ /* Pre-padding */
+ start = iova_align_down(iovad, virt);
+ memset((void *)start, 0, virt - start);
+
+ /* Post-padding */
+ start = virt + size;
+ memset((void *)start, 0, iova_align(iovad, start) - start);
+ }
+
+ return phys;
+}
+
+/*
+ * Checks if a physical buffer has unaligned boundaries with respect to
+ * the IOMMU granule. Returns non-zero if either the start or end
+ * address is not aligned to the granule boundary.
+ */
+static inline size_t iova_unaligned(struct iova_domain *iovad, phys_addr_t phys,
+ size_t size)
+{
+ return iova_offset(iovad, phys | size);
+}
+
dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
@@ -1150,42 +1202,14 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
dma_addr_t iova, dma_mask = dma_get_mask(dev);
/*
- * If both the physical buffer start address and size are
- * page aligned, we don't need to use a bounce page.
+ * If both the physical buffer start address and size are page aligned,
+ * we don't need to use a bounce page.
*/
if (dev_use_swiotlb(dev, size, dir) &&
- iova_offset(iovad, phys | size)) {
- if (!is_swiotlb_active(dev)) {
- dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
+ iova_unaligned(iovad, phys, size)) {
+ phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
+ if (phys == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
- }
-
- trace_swiotlb_bounced(dev, phys, size);
-
- phys = swiotlb_tbl_map_single(dev, phys, size,
- iova_mask(iovad), dir, attrs);
-
- if (phys == DMA_MAPPING_ERROR)
- return DMA_MAPPING_ERROR;
-
- /*
- * Untrusted devices should not see padding areas with random
- * leftover kernel data, so zero the pre- and post-padding.
- * swiotlb_tbl_map_single() has initialized the bounce buffer
- * proper to the contents of the original memory buffer.
- */
- if (dev_is_untrusted(dev)) {
- size_t start, virt = (size_t)phys_to_virt(phys);
-
- /* Pre-padding */
- start = iova_align_down(iovad, virt);
- memset((void *)start, 0, virt - start);
-
- /* Post-padding */
- start = virt + size;
- memset((void *)start, 0,
- iova_align(iovad, start) - start);
- }
}
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1359,7 +1383,6 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
struct scatterlist *s, *prev = NULL;
int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
struct pci_p2pdma_map_state p2pdma_state = {};
- enum pci_p2pdma_map_type map;
dma_addr_t iova;
size_t iova_len = 0;
unsigned long mask = dma_get_seg_boundary(dev);
@@ -1389,28 +1412,30 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
size_t s_length = s->length;
size_t pad_len = (mask - iova_len + 1) & mask;
- if (is_pci_p2pdma_page(sg_page(s))) {
- map = pci_p2pdma_map_segment(&p2pdma_state, dev, s);
- switch (map) {
- case PCI_P2PDMA_MAP_BUS_ADDR:
- /*
- * iommu_map_sg() will skip this segment as
- * it is marked as a bus address,
- * __finalise_sg() will copy the dma address
- * into the output segment.
- */
- continue;
- case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
- /*
- * Mapping through host bridge should be
- * mapped with regular IOVAs, thus we
- * do nothing here and continue below.
- */
- break;
- default:
- ret = -EREMOTEIO;
- goto out_restore_sg;
- }
+ switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(s))) {
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ /*
+ * Mapping through host bridge should be mapped with
+ * regular IOVAs, thus we do nothing here and continue
+ * below.
+ */
+ break;
+ case PCI_P2PDMA_MAP_NONE:
+ break;
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ /*
+ * iommu_map_sg() will skip this segment as it is marked
+ * as a bus address, __finalise_sg() will copy the dma
+ * address into the output segment.
+ */
+ s->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state,
+ sg_phys(s));
+ sg_dma_len(s) = sg->length;
+ sg_dma_mark_bus_address(s);
+ continue;
+ default:
+ ret = -EREMOTEIO;
+ goto out_restore_sg;
}
sg_dma_address(s) = s_iova_off;
@@ -1721,6 +1746,354 @@ size_t iommu_dma_max_mapping_size(struct device *dev)
return SIZE_MAX;
}
+/**
+ * dma_iova_try_alloc - Try to allocate an IOVA space
+ * @dev: Device to allocate the IOVA space for
+ * @state: IOVA state
+ * @phys: physical address
+ * @size: IOVA size
+ *
+ * Check if @dev supports the IOVA-based DMA API, and if yes allocate IOVA space
+ * for the given base address and size.
+ *
+ * Note: @phys is only used to calculate the IOVA alignment. Callers that always
+ * do PAGE_SIZE aligned transfers can safely pass 0 here.
+ *
+ * Returns %true if the IOVA-based DMA API can be used and IOVA space has been
+ * allocated, or %false if the regular DMA API should be used.
+ */
+bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t size)
+{
+ struct iommu_dma_cookie *cookie;
+ struct iommu_domain *domain;
+ struct iova_domain *iovad;
+ size_t iova_off;
+ dma_addr_t addr;
+
+ memset(state, 0, sizeof(*state));
+ if (!use_dma_iommu(dev))
+ return false;
+
+ domain = iommu_get_dma_domain(dev);
+ cookie = domain->iova_cookie;
+ iovad = &cookie->iovad;
+ iova_off = iova_offset(iovad, phys);
+
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
+ iommu_deferred_attach(dev, iommu_get_domain_for_dev(dev)))
+ return false;
+
+ if (WARN_ON_ONCE(!size))
+ return false;
+
+ /*
+ * DMA_IOVA_USE_SWIOTLB is flag which is set by dma-iommu
+ * internals, make sure that caller didn't set it and/or
+ * didn't use this interface to map SIZE_MAX.
+ */
+ if (WARN_ON_ONCE((u64)size & DMA_IOVA_USE_SWIOTLB))
+ return false;
+
+ addr = iommu_dma_alloc_iova(domain,
+ iova_align(iovad, size + iova_off),
+ dma_get_mask(dev), dev);
+ if (!addr)
+ return false;
+
+ state->addr = addr + iova_off;
+ state->__size = size;
+ return true;
+}
+EXPORT_SYMBOL_GPL(dma_iova_try_alloc);
+
+/**
+ * dma_iova_free - Free an IOVA space
+ * @dev: Device to free the IOVA space for
+ * @state: IOVA state
+ *
+ * Undoes a successful dma_try_iova_alloc().
+ *
+ * Note that all dma_iova_link() calls need to be undone first. For callers
+ * that never call dma_iova_unlink(), dma_iova_destroy() can be used instead
+ * which unlinks all ranges and frees the IOVA space in a single efficient
+ * operation.
+ */
+void dma_iova_free(struct device *dev, struct dma_iova_state *state)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, state->addr);
+ size_t size = dma_iova_size(state);
+
+ iommu_dma_free_iova(domain, state->addr - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad), NULL);
+}
+EXPORT_SYMBOL_GPL(dma_iova_free);
+
+static int __dma_iova_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ bool coherent = dev_is_dma_coherent(dev);
+
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, dir);
+
+ return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size,
+ dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC);
+}
+
+static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t bounce_len,
+ enum dma_data_direction dir, unsigned long attrs,
+ size_t iova_start_pad)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
+ phys_addr_t bounce_phys;
+ int error;
+
+ bounce_phys = iommu_dma_map_swiotlb(dev, phys, bounce_len, dir, attrs);
+ if (bounce_phys == DMA_MAPPING_ERROR)
+ return -ENOMEM;
+
+ error = __dma_iova_link(dev, addr - iova_start_pad,
+ bounce_phys - iova_start_pad,
+ iova_align(iovad, bounce_len), dir, attrs);
+ if (error)
+ swiotlb_tbl_unmap_single(dev, bounce_phys, bounce_len, dir,
+ attrs);
+ return error;
+}
+
+static int iommu_dma_iova_link_swiotlb(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+ size_t iova_end_pad = iova_offset(iovad, phys + size);
+ dma_addr_t addr = state->addr + offset;
+ size_t mapped = 0;
+ int error;
+
+ if (iova_start_pad) {
+ size_t bounce_len = min(size, iovad->granule - iova_start_pad);
+
+ error = iommu_dma_iova_bounce_and_link(dev, addr, phys,
+ bounce_len, dir, attrs, iova_start_pad);
+ if (error)
+ return error;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+
+ mapped += bounce_len;
+ size -= bounce_len;
+ if (!size)
+ return 0;
+ }
+
+ size -= iova_end_pad;
+ error = __dma_iova_link(dev, addr + mapped, phys + mapped, size, dir,
+ attrs);
+ if (error)
+ goto out_unmap;
+ mapped += size;
+
+ if (iova_end_pad) {
+ error = iommu_dma_iova_bounce_and_link(dev, addr + mapped,
+ phys + mapped, iova_end_pad, dir, attrs, 0);
+ if (error)
+ goto out_unmap;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+ }
+
+ return 0;
+
+out_unmap:
+ dma_iova_unlink(dev, state, 0, mapped, dir, attrs);
+ return error;
+}
+
+/**
+ * dma_iova_link - Link a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @phys: physical address to link
+ * @offset: offset into the IOVA state to map into
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Link a range of IOVA space for the given IOVA state without IOTLB sync.
+ * This function is used to link multiple physical addresses in contiguous
+ * IOVA space without performing costly IOTLB sync.
+ *
+ * The caller is responsible to call to dma_iova_sync() to sync IOTLB at
+ * the end of linkage.
+ */
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+
+ if (WARN_ON_ONCE(iova_start_pad && offset > 0))
+ return -EIO;
+
+ if (dev_use_swiotlb(dev, size, dir) &&
+ iova_unaligned(iovad, phys, size))
+ return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
+ size, dir, attrs);
+
+ return __dma_iova_link(dev, state->addr + offset - iova_start_pad,
+ phys - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad), dir, attrs);
+}
+EXPORT_SYMBOL_GPL(dma_iova_link);
+
+/**
+ * dma_iova_sync - Sync IOTLB
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to sync
+ * @size: size of the buffer
+ *
+ * Sync IOTLB for the given IOVA state. This function should be called on
+ * the IOVA-contiguous range created by one ore more dma_iova_link() calls
+ * to sync the IOTLB.
+ */
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+
+ return iommu_sync_map(domain, addr - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad));
+}
+EXPORT_SYMBOL_GPL(dma_iova_sync);
+
+static void iommu_dma_iova_unlink_range_slow(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ dma_addr_t end = addr + size;
+
+ do {
+ phys_addr_t phys;
+ size_t len;
+
+ phys = iommu_iova_to_phys(domain, addr);
+ if (WARN_ON(!phys))
+ /* Something very horrible happen here */
+ return;
+
+ len = min_t(size_t,
+ end - addr, iovad->granule - iova_start_pad);
+
+ if (!dev_is_dma_coherent(dev) &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_cpu(phys, len, dir);
+
+ swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs);
+
+ addr += len;
+ iova_start_pad = 0;
+ } while (addr < end);
+}
+
+static void __iommu_dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs,
+ bool free_iova)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ struct iommu_iotlb_gather iotlb_gather;
+ size_t unmapped;
+
+ if ((state->__size & DMA_IOVA_USE_SWIOTLB) ||
+ (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)))
+ iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs);
+
+ iommu_iotlb_gather_init(&iotlb_gather);
+ iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain);
+
+ size = iova_align(iovad, size + iova_start_pad);
+ addr -= iova_start_pad;
+ unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather);
+ WARN_ON(unmapped != size);
+
+ if (!iotlb_gather.queued)
+ iommu_iotlb_sync(domain, &iotlb_gather);
+ if (free_iova)
+ iommu_dma_free_iova(domain, addr, size, &iotlb_gather);
+}
+
+/**
+ * dma_iova_unlink - Unlink a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to unlink
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink a range of IOVA space for the given IOVA state.
+ */
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ __iommu_dma_iova_unlink(dev, state, offset, size, dir, attrs, false);
+}
+EXPORT_SYMBOL_GPL(dma_iova_unlink);
+
+/**
+ * dma_iova_destroy - Finish a DMA mapping transaction
+ * @dev: DMA device
+ * @state: IOVA state
+ * @mapped_len: number of bytes to unmap
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink the IOVA range up to @mapped_len and free the entire IOVA space. The
+ * range of IOVA from dma_addr to @mapped_len must all be linked, and be the
+ * only linked IOVA in state.
+ */
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (mapped_len)
+ __iommu_dma_iova_unlink(dev, state, 0, mapped_len, dir, attrs,
+ true);
+ else
+ /*
+ * We can be here if first call to dma_iova_link() failed and
+ * there is nothing to unlink, so let's be more clear.
+ */
+ dma_iova_free(dev, state);
+}
+EXPORT_SYMBOL_GPL(dma_iova_destroy);
+
void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 317266aca6e2..fcb6a0f7c082 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -902,11 +902,11 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
if (!domain)
return NULL;
- domain->pgtable = iommu_alloc_pages(GFP_KERNEL, 2);
+ domain->pgtable = iommu_alloc_pages_sz(GFP_KERNEL, SZ_16K);
if (!domain->pgtable)
goto err_pgtable;
- domain->lv2entcnt = iommu_alloc_pages(GFP_KERNEL, 1);
+ domain->lv2entcnt = iommu_alloc_pages_sz(GFP_KERNEL, SZ_8K);
if (!domain->lv2entcnt)
goto err_counter;
@@ -932,9 +932,9 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
return &domain->domain;
err_lv2ent:
- iommu_free_pages(domain->lv2entcnt, 1);
+ iommu_free_pages(domain->lv2entcnt);
err_counter:
- iommu_free_pages(domain->pgtable, 2);
+ iommu_free_pages(domain->pgtable);
err_pgtable:
kfree(domain);
return NULL;
@@ -975,8 +975,8 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
phys_to_virt(base));
}
- iommu_free_pages(domain->pgtable, 2);
- iommu_free_pages(domain->lv2entcnt, 1);
+ iommu_free_pages(domain->pgtable);
+ iommu_free_pages(domain->lv2entcnt);
kfree(domain);
}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 30be786bff11..5f08523f97cb 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -64,7 +64,7 @@ static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain,
spin_lock_irqsave(&iommu_lock, flags);
ret = pamu_update_paace_stash(liodn, val);
if (ret) {
- pr_debug("Failed to update SPAACE for liodn %d\n ", liodn);
+ pr_debug("Failed to update SPAACE for liodn %d\n", liodn);
spin_unlock_irqrestore(&iommu_lock, flags);
return ret;
}
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 6c7528130cf9..ada651c4a01b 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,11 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o
-obj-$(CONFIG_DMAR_TABLE) += trace.o
+obj-y += iommu.o pasid.o nested.o cache.o prq.o
+obj-$(CONFIG_DMAR_TABLE) += dmar.o trace.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
-ifdef CONFIG_INTEL_IOMMU
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
-endif
obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index e540092d664d..b61d9ea27aa9 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1099,6 +1099,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
spin_lock_init(&iommu->device_rbtree_lock);
mutex_init(&iommu->iopf_lock);
iommu->node = NUMA_NO_NODE;
+ spin_lock_init(&iommu->lock);
+ ida_init(&iommu->domain_ida);
+ mutex_init(&iommu->did_lock);
ver = readl(iommu->reg + DMAR_VER_REG);
pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
@@ -1187,7 +1190,7 @@ static void free_iommu(struct intel_iommu *iommu)
}
if (iommu->qi) {
- iommu_free_page(iommu->qi->desc);
+ iommu_free_pages(iommu->qi->desc);
kfree(iommu->qi->desc_status);
kfree(iommu->qi);
}
@@ -1195,6 +1198,7 @@ static void free_iommu(struct intel_iommu *iommu)
if (iommu->reg)
unmap_iommu(iommu);
+ ida_destroy(&iommu->domain_ida);
ida_free(&dmar_seq_ids, iommu->seq_id);
kfree(iommu);
}
@@ -1681,7 +1685,6 @@ int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
void *desc;
- int order;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
@@ -1702,8 +1705,9 @@ int dmar_enable_qi(struct intel_iommu *iommu)
* Need two pages to accommodate 256 descriptors of 256 bits each
* if the remapping hardware supports scalable mode translation.
*/
- order = ecap_smts(iommu->ecap) ? 1 : 0;
- desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
+ desc = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
+ ecap_smts(iommu->ecap) ? SZ_8K :
+ SZ_4K);
if (!desc) {
kfree(qi);
iommu->qi = NULL;
@@ -1714,7 +1718,7 @@ int dmar_enable_qi(struct intel_iommu *iommu)
qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
- iommu_free_page(qi->desc);
+ iommu_free_pages(qi->desc);
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index cb0b993bebb4..7aa3932251b2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -397,7 +397,8 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC);
+ context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
+ SZ_4K);
if (!context)
return NULL;
@@ -571,17 +572,17 @@ static void free_context_table(struct intel_iommu *iommu)
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
- iommu_free_page(context);
+ iommu_free_pages(context);
if (!sm_supported(iommu))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
- iommu_free_page(context);
+ iommu_free_pages(context);
}
- iommu_free_page(iommu->root_entry);
+ iommu_free_pages(iommu->root_entry);
iommu->root_entry = NULL;
}
@@ -731,7 +732,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval, tmp;
- tmp_page = iommu_alloc_page_node(domain->nid, gfp);
+ tmp_page = iommu_alloc_pages_node_sz(domain->nid, gfp,
+ SZ_4K);
if (!tmp_page)
return NULL;
@@ -745,7 +747,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
tmp = 0ULL;
if (!try_cmpxchg64(&pte->val, &tmp, pteval))
/* Someone else set it while we were thinking; use theirs. */
- iommu_free_page(tmp_page);
+ iommu_free_pages(tmp_page);
else
domain_flush_cache(domain, pte, sizeof(*pte));
}
@@ -858,7 +860,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
last_pfn < level_pfn + level_size(level) - 1)) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
- iommu_free_page(level_pte);
+ iommu_free_pages(level_pte);
}
next:
pfn += level_size(level);
@@ -882,7 +884,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- iommu_free_page(domain->pgd);
+ iommu_free_pages(domain->pgd);
domain->pgd = NULL;
}
}
@@ -894,18 +896,16 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
The 'pte' argument is the *parent* PTE, pointing to the page that is to
be freed. */
static void dma_pte_list_pagetables(struct dmar_domain *domain,
- int level, struct dma_pte *pte,
- struct list_head *freelist)
+ int level, struct dma_pte *parent_pte,
+ struct iommu_pages_list *freelist)
{
- struct page *pg;
+ struct dma_pte *pte = phys_to_virt(dma_pte_addr(parent_pte));
- pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
- list_add_tail(&pg->lru, freelist);
+ iommu_pages_list_add(freelist, pte);
if (level == 1)
return;
- pte = page_address(pg);
do {
if (dma_pte_present(pte) && !dma_pte_superpage(pte))
dma_pte_list_pagetables(domain, level - 1, pte, freelist);
@@ -916,7 +916,7 @@ static void dma_pte_list_pagetables(struct dmar_domain *domain,
static void dma_pte_clear_level(struct dmar_domain *domain, int level,
struct dma_pte *pte, unsigned long pfn,
unsigned long start_pfn, unsigned long last_pfn,
- struct list_head *freelist)
+ struct iommu_pages_list *freelist)
{
struct dma_pte *first_pte = NULL, *last_pte = NULL;
@@ -961,7 +961,8 @@ next:
the page tables, and may have cached the intermediate levels. The
pages can only be freed after the IOTLB flush has been done. */
static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
- unsigned long last_pfn, struct list_head *freelist)
+ unsigned long last_pfn,
+ struct iommu_pages_list *freelist)
{
if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
WARN_ON(start_pfn > last_pfn))
@@ -973,8 +974,7 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- struct page *pgd_page = virt_to_page(domain->pgd);
- list_add_tail(&pgd_page->lru, freelist);
+ iommu_pages_list_add(freelist, domain->pgd);
domain->pgd = NULL;
}
}
@@ -984,7 +984,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC);
+ root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -1289,52 +1289,13 @@ static void iommu_disable_translation(struct intel_iommu *iommu)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-static int iommu_init_domains(struct intel_iommu *iommu)
-{
- u32 ndomains;
-
- ndomains = cap_ndoms(iommu->cap);
- pr_debug("%s: Number of Domains supported <%d>\n",
- iommu->name, ndomains);
-
- spin_lock_init(&iommu->lock);
-
- iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
- if (!iommu->domain_ids)
- return -ENOMEM;
-
- /*
- * If Caching mode is set, then invalid translations are tagged
- * with domain-id 0, hence we need to pre-allocate it. We also
- * use domain-id 0 as a marker for non-allocated domain-id, so
- * make sure it is not used for a real domain.
- */
- set_bit(0, iommu->domain_ids);
-
- /*
- * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
- * entry for first-level or pass-through translation modes should
- * be programmed with a domain id different from those used for
- * second-level or nested translation. We reserve a domain id for
- * this purpose. This domain id is also used for identity domain
- * in legacy mode.
- */
- set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
-
- return 0;
-}
-
static void disable_dmar_iommu(struct intel_iommu *iommu)
{
- if (!iommu->domain_ids)
- return;
-
/*
* All iommu domains must have been detached from the devices,
* hence there should be no domain IDs in use.
*/
- if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
- > NUM_RESERVED_DID))
+ if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
return;
if (iommu->gcmd & DMA_GCMD_TE)
@@ -1343,11 +1304,6 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
static void free_dmar_iommu(struct intel_iommu *iommu)
{
- if (iommu->domain_ids) {
- bitmap_free(iommu->domain_ids);
- iommu->domain_ids = NULL;
- }
-
if (iommu->copied_tables) {
bitmap_free(iommu->copied_tables);
iommu->copied_tables = NULL;
@@ -1380,7 +1336,6 @@ static bool first_level_by_default(struct intel_iommu *iommu)
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info, *curr;
- unsigned long ndomains;
int num, ret = -ENOSPC;
if (domain->domain.type == IOMMU_DOMAIN_SVA)
@@ -1390,40 +1345,36 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
if (!info)
return -ENOMEM;
- spin_lock(&iommu->lock);
+ guard(mutex)(&iommu->did_lock);
curr = xa_load(&domain->iommu_array, iommu->seq_id);
if (curr) {
curr->refcnt++;
- spin_unlock(&iommu->lock);
kfree(info);
return 0;
}
- ndomains = cap_ndoms(iommu->cap);
- num = find_first_zero_bit(iommu->domain_ids, ndomains);
- if (num >= ndomains) {
+ num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
+ cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
+ if (num < 0) {
pr_err("%s: No free domain ids\n", iommu->name);
goto err_unlock;
}
- set_bit(num, iommu->domain_ids);
info->refcnt = 1;
info->did = num;
info->iommu = iommu;
curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
- NULL, info, GFP_ATOMIC);
+ NULL, info, GFP_KERNEL);
if (curr) {
ret = xa_err(curr) ? : -EBUSY;
goto err_clear;
}
- spin_unlock(&iommu->lock);
return 0;
err_clear:
- clear_bit(info->did, iommu->domain_ids);
+ ida_free(&iommu->domain_ida, info->did);
err_unlock:
- spin_unlock(&iommu->lock);
kfree(info);
return ret;
}
@@ -1435,21 +1386,21 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
if (domain->domain.type == IOMMU_DOMAIN_SVA)
return;
- spin_lock(&iommu->lock);
+ guard(mutex)(&iommu->did_lock);
info = xa_load(&domain->iommu_array, iommu->seq_id);
if (--info->refcnt == 0) {
- clear_bit(info->did, iommu->domain_ids);
+ ida_free(&iommu->domain_ida, info->did);
xa_erase(&domain->iommu_array, iommu->seq_id);
domain->nid = NUMA_NO_NODE;
kfree(info);
}
- spin_unlock(&iommu->lock);
}
static void domain_exit(struct dmar_domain *domain)
{
if (domain->pgd) {
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist =
+ IOMMU_PAGES_LIST_INIT(freelist);
domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
iommu_put_pages_list(&freelist);
@@ -1681,9 +1632,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
}
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
- attr |= DMA_FL_PTE_PRESENT;
if (domain->use_first_level) {
- attr |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
if (prot & DMA_PTE_WRITE)
attr |= DMA_FL_PTE_DIRTY;
}
@@ -1859,6 +1809,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
return ret;
info->domain = domain;
+ info->domain_attached = true;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -2027,7 +1978,8 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL);
+ new_ce = iommu_alloc_pages_node_sz(iommu->node,
+ GFP_KERNEL, SZ_4K);
if (!new_ce)
goto out_unmap;
@@ -2042,7 +1994,7 @@ static int copy_context_table(struct intel_iommu *iommu,
did = context_domain_id(&ce);
if (did >= 0 && did < cap_ndoms(iommu->cap))
- set_bit(did, iommu->domain_ids);
+ ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
set_context_copied(iommu, bus, devfn);
new_ce[idx] = ce;
@@ -2169,11 +2121,6 @@ static int __init init_dmars(void)
}
intel_iommu_init_qi(iommu);
-
- ret = iommu_init_domains(iommu);
- if (ret)
- goto free_iommu;
-
init_translation_status(iommu);
if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
@@ -2651,9 +2598,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
- ret = iommu_init_domains(iommu);
- if (ret == 0)
- ret = iommu_alloc_root_entry(iommu);
+ ret = iommu_alloc_root_entry(iommu);
if (ret)
goto out;
@@ -2744,7 +2689,6 @@ static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
struct device *tmp;
int i;
- dev = pci_physfn(dev);
rcu_read_lock();
list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
@@ -2761,15 +2705,16 @@ out:
return satcu;
}
-static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
+static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
{
- int i, ret = 1;
- struct pci_bus *bus;
struct pci_dev *bridge = NULL;
- struct device *tmp;
- struct acpi_dmar_atsr *atsr;
struct dmar_atsr_unit *atsru;
struct dmar_satc_unit *satcu;
+ struct acpi_dmar_atsr *atsr;
+ bool supported = true;
+ struct pci_bus *bus;
+ struct device *tmp;
+ int i;
dev = pci_physfn(dev);
satcu = dmar_find_matched_satc_unit(dev);
@@ -2787,11 +2732,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
bridge = bus->self;
/* If it's an integrated device, allow ATS */
if (!bridge)
- return 1;
+ return true;
/* Connected via non-PCIe: no ATS */
if (!pci_is_pcie(bridge) ||
pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
- return 0;
+ return false;
/* If we found the root port, look it up in the ATSR */
if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
break;
@@ -2810,11 +2755,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
if (atsru->include_all)
goto out;
}
- ret = 0;
+ supported = false;
out:
rcu_read_unlock();
- return ret;
+ return supported;
}
int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
@@ -2972,9 +2917,14 @@ static ssize_t domains_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sysfs_emit(buf, "%d\n",
- bitmap_weight(iommu->domain_ids,
- cap_ndoms(iommu->cap)));
+ unsigned int count = 0;
+ int id;
+
+ for (id = 0; id < cap_ndoms(iommu->cap); id++)
+ if (ida_exists(&iommu->domain_ida, id))
+ count++;
+
+ return sysfs_emit(buf, "%d\n", count);
}
static DEVICE_ATTR_RO(domains_used);
@@ -3257,6 +3207,10 @@ void device_block_translation(struct device *dev)
struct intel_iommu *iommu = info->iommu;
unsigned long flags;
+ /* Device in DMA blocking state. Noting to do. */
+ if (!info->domain_attached)
+ return;
+
if (info->domain)
cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
@@ -3268,6 +3222,9 @@ void device_block_translation(struct device *dev)
domain_context_clear(info);
}
+ /* Device now in DMA blocking state. */
+ info->domain_attached = false;
+
if (!info->domain)
return;
@@ -3282,6 +3239,9 @@ void device_block_translation(struct device *dev)
static int blocking_domain_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
device_block_translation(dev);
return 0;
}
@@ -3360,7 +3320,7 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
/* always allocate the top pgd */
- domain->pgd = iommu_alloc_page_node(domain->nid, GFP_KERNEL);
+ domain->pgd = iommu_alloc_pages_node_sz(domain->nid, GFP_KERNEL, SZ_4K);
if (!domain->pgd) {
kfree(domain);
return ERR_PTR(-ENOMEM);
@@ -3492,7 +3452,15 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
if (ret)
return ret;
- return dmar_domain_attach_device(to_dmar_domain(domain), dev);
+ ret = iopf_for_domain_set(domain, dev);
+ if (ret)
+ return ret;
+
+ ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
+ if (ret)
+ iopf_for_domain_remove(domain, dev);
+
+ return ret;
}
static int intel_iommu_map(struct iommu_domain *domain,
@@ -3603,7 +3571,8 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
cache_tag_flush_range(to_dmar_domain(domain), gather->start,
- gather->end, list_empty(&gather->freelist));
+ gather->end,
+ iommu_pages_list_empty(&gather->freelist));
iommu_put_pages_list(&gather->freelist);
}
@@ -3918,6 +3887,8 @@ int intel_iommu_enable_iopf(struct device *dev)
if (!info->pri_enabled)
return -ENODEV;
+ /* pri_enabled is protected by the group mutex. */
+ iommu_group_mutex_assert(dev);
if (info->iopf_refcount) {
info->iopf_refcount++;
return 0;
@@ -3940,43 +3911,13 @@ void intel_iommu_disable_iopf(struct device *dev)
if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
return;
+ iommu_group_mutex_assert(dev);
if (--info->iopf_refcount)
return;
iopf_queue_remove_device(iommu->iopf_queue, dev);
}
-static int
-intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
-{
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- return intel_iommu_enable_iopf(dev);
-
- case IOMMU_DEV_FEAT_SVA:
- return 0;
-
- default:
- return -ENODEV;
- }
-}
-
-static int
-intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
-{
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- intel_iommu_disable_iopf(dev);
- return 0;
-
- case IOMMU_DEV_FEAT_SVA:
- return 0;
-
- default:
- return -ENODEV;
- }
-}
-
static bool intel_iommu_is_attach_deferred(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
@@ -4050,6 +3991,7 @@ static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ iopf_for_domain_remove(old, dev);
intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
domain_remove_dev_pasid(old, dev, pasid);
@@ -4123,6 +4065,10 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
+ ret = iopf_for_domain_replace(domain, old, dev);
+ if (ret)
+ goto out_remove_dev_pasid;
+
if (dmar_domain->use_first_level)
ret = domain_setup_first_level(iommu, dmar_domain,
dev, pasid, old);
@@ -4130,7 +4076,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
ret = domain_setup_second_level(iommu, dmar_domain,
dev, pasid, old);
if (ret)
- goto out_remove_dev_pasid;
+ goto out_unwind_iopf;
domain_remove_dev_pasid(old, dev, pasid);
@@ -4138,6 +4084,8 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
return 0;
+out_unwind_iopf:
+ iopf_for_domain_replace(old, domain, dev);
out_remove_dev_pasid:
domain_remove_dev_pasid(domain, dev, pasid);
return ret;
@@ -4352,11 +4300,19 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device
if (dev_is_real_dma_subdevice(dev))
return 0;
+ /*
+ * No PRI support with the global identity domain. No need to enable or
+ * disable PRI in this path as the iommu has been put in the blocking
+ * state.
+ */
if (sm_supported(iommu))
ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
else
ret = device_setup_pass_through(dev);
+ if (!ret)
+ info->domain_attached = true;
+
return ret;
}
@@ -4371,10 +4327,16 @@ static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
- ret = domain_setup_passthrough(iommu, dev, pasid, old);
+ ret = iopf_for_domain_replace(domain, old, dev);
if (ret)
return ret;
+ ret = domain_setup_passthrough(iommu, dev, pasid, old);
+ if (ret) {
+ iopf_for_domain_replace(old, domain, dev);
+ return ret;
+ }
+
domain_remove_dev_pasid(old, dev, pasid);
return 0;
}
@@ -4401,8 +4363,6 @@ const struct iommu_ops intel_iommu_ops = {
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.device_group = intel_iommu_device_group,
- .dev_enable_feat = intel_iommu_dev_enable_feat,
- .dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
.pgsize_bitmap = SZ_4K,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index c4916886da5a..3ddbcc603de2 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -493,14 +493,13 @@ struct q_inval {
/* Page Request Queue depth */
#define PRQ_ORDER 4
-#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
-#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5)
+#define PRQ_SIZE (SZ_4K << PRQ_ORDER)
+#define PRQ_RING_MASK (PRQ_SIZE - 0x20)
+#define PRQ_DEPTH (PRQ_SIZE >> 5)
struct dmar_pci_notify_info;
#ifdef CONFIG_IRQ_REMAP
-/* 1MB - maximum possible interrupt remapping table size */
-#define INTR_REMAP_PAGE_ORDER 8
#define INTR_REMAP_TABLE_REG_SIZE 0xf
#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf
@@ -722,7 +721,9 @@ struct intel_iommu {
unsigned char name[16]; /* Device Name */
#ifdef CONFIG_INTEL_IOMMU
- unsigned long *domain_ids; /* bitmap of domains */
+ /* mutex to protect domain_ida */
+ struct mutex did_lock;
+ struct ida domain_ida; /* domain id allocator */
unsigned long *copied_tables; /* bitmap of copied tables */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
@@ -773,6 +774,7 @@ struct device_domain_info {
u8 ats_supported:1;
u8 ats_enabled:1;
u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
+ u8 domain_attached:1; /* Device has domain attached */
u8 ats_qdep;
unsigned int iopf_refcount;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
@@ -809,11 +811,22 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
}
/*
- * Domain ID reserved for pasid entries programmed for first-level
- * only and pass-through transfer modes.
+ * Domain ID 0 and 1 are reserved:
+ *
+ * If Caching mode is set, then invalid translations are tagged
+ * with domain-id 0, hence we need to pre-allocate it. We also
+ * use domain-id 0 as a marker for non-allocated domain-id, so
+ * make sure it is not used for a real domain.
+ *
+ * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
+ * entry for first-level or pass-through translation modes should
+ * be programmed with a domain id different from those used for
+ * second-level or nested translation. We reserve a domain id for
+ * this purpose. This domain id is also used for identity domain
+ * in legacy mode.
*/
#define FLPT_DEFAULT_DID 1
-#define NUM_RESERVED_DID 2
+#define IDA_START_DID 2
/* Retrieve the domain ID which has allocated to the domain */
static inline u16
@@ -1298,6 +1311,39 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid);
int intel_iommu_enable_iopf(struct device *dev);
void intel_iommu_disable_iopf(struct device *dev);
+static inline int iopf_for_domain_set(struct iommu_domain *domain,
+ struct device *dev)
+{
+ if (!domain || !domain->iopf_handler)
+ return 0;
+
+ return intel_iommu_enable_iopf(dev);
+}
+
+static inline void iopf_for_domain_remove(struct iommu_domain *domain,
+ struct device *dev)
+{
+ if (!domain || !domain->iopf_handler)
+ return;
+
+ intel_iommu_disable_iopf(dev);
+}
+
+static inline int iopf_for_domain_replace(struct iommu_domain *new,
+ struct iommu_domain *old,
+ struct device *dev)
+{
+ int ret;
+
+ ret = iopf_for_domain_set(new, dev);
+ if (ret)
+ return ret;
+
+ iopf_for_domain_remove(old, dev);
+
+ return 0;
+}
+
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 3bc2a03cceca..cf7b6882ec75 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -530,11 +530,11 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
if (!ir_table)
return -ENOMEM;
- ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL,
- INTR_REMAP_PAGE_ORDER);
+ /* 1MB - maximum possible interrupt remapping table size */
+ ir_table_base =
+ iommu_alloc_pages_node_sz(iommu->node, GFP_KERNEL, SZ_1M);
if (!ir_table_base) {
- pr_err("IR%d: failed to allocate pages of order %d\n",
- iommu->seq_id, INTR_REMAP_PAGE_ORDER);
+ pr_err("IR%d: failed to allocate 1M of pages\n", iommu->seq_id);
goto out_free_table;
}
@@ -612,7 +612,7 @@ out_free_fwnode:
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
- iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER);
+ iommu_free_pages(ir_table_base);
out_free_table:
kfree(ir_table);
@@ -633,7 +633,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
irq_domain_free_fwnode(fn);
iommu->ir_domain = NULL;
}
- iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER);
+ iommu_free_pages(iommu->ir_table->base);
bitmap_free(iommu->ir_table->bitmap);
kfree(iommu->ir_table);
iommu->ir_table = NULL;
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 6ac5c534bef4..fc312f649f9e 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -27,8 +27,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
unsigned long flags;
int ret = 0;
- if (info->domain)
- device_block_translation(dev);
+ device_block_translation(dev);
if (iommu->agaw < dmar_domain->s2_domain->agaw) {
dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
@@ -56,17 +55,24 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
if (ret)
goto detach_iommu;
+ ret = iopf_for_domain_set(domain, dev);
+ if (ret)
+ goto unassign_tag;
+
ret = intel_pasid_setup_nested(iommu, dev,
IOMMU_NO_PASID, dmar_domain);
if (ret)
- goto unassign_tag;
+ goto disable_iopf;
info->domain = dmar_domain;
+ info->domain_attached = true;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&info->link, &dmar_domain->devices);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
return 0;
+disable_iopf:
+ iopf_for_domain_remove(domain, dev);
unassign_tag:
cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID);
detach_iommu:
@@ -166,14 +172,20 @@ static int intel_nested_set_dev_pasid(struct iommu_domain *domain,
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
- ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old);
+ ret = iopf_for_domain_replace(domain, old, dev);
if (ret)
goto out_remove_dev_pasid;
+ ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old);
+ if (ret)
+ goto out_unwind_iopf;
+
domain_remove_dev_pasid(old, dev, pasid);
return 0;
+out_unwind_iopf:
+ iopf_for_domain_replace(old, domain, dev);
out_remove_dev_pasid:
domain_remove_dev_pasid(domain, dev, pasid);
return ret;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 7ee18bb48bd4..ac67a056b6c8 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -60,14 +60,14 @@ int intel_pasid_alloc_table(struct device *dev)
size = max_pasid >> (PASID_PDE_SHIFT - 3);
order = size ? get_order(size) : 0;
- dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order);
+ dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
+ 1 << (order + PAGE_SHIFT));
if (!dir) {
kfree(pasid_table);
return -ENOMEM;
}
pasid_table->table = dir;
- pasid_table->order = order;
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
@@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev)
max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
for (i = 0; i < max_pde; i++) {
table = get_pasid_table_from_pde(&dir[i]);
- iommu_free_page(table);
+ iommu_free_pages(table);
}
- iommu_free_pages(pasid_table->table, pasid_table->order);
+ iommu_free_pages(pasid_table->table);
kfree(pasid_table);
}
@@ -148,7 +148,8 @@ retry:
if (!entries) {
u64 tmp;
- entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC);
+ entries = iommu_alloc_pages_node_sz(info->iommu->node,
+ GFP_ATOMIC, SZ_4K);
if (!entries)
return NULL;
@@ -161,7 +162,7 @@ retry:
tmp = 0ULL;
if (!try_cmpxchg64(&dir[dir_index].val, &tmp,
(u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
- iommu_free_page(entries);
+ iommu_free_pages(entries);
goto retry;
}
if (!ecap_coherent(info->iommu->ecap)) {
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 668d8ece6b14..fd0fd1a0df84 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -47,7 +47,6 @@ struct pasid_entry {
/* The representative of a PASID table */
struct pasid_table {
void *table; /* pasid table pointer */
- int order; /* page order of pasid table */
u32 max_pasid; /* max pasid */
};
diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
index 5b6a64d96850..52570e42a14c 100644
--- a/drivers/iommu/intel/prq.c
+++ b/drivers/iommu/intel/prq.c
@@ -290,7 +290,8 @@ int intel_iommu_enable_prq(struct intel_iommu *iommu)
struct iopf_queue *iopfq;
int irq, ret;
- iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
+ iommu->prq =
+ iommu_alloc_pages_node_sz(iommu->node, GFP_KERNEL, PRQ_SIZE);
if (!iommu->prq) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
@@ -340,7 +341,7 @@ free_hwirq:
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
- iommu_free_pages(iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq);
iommu->prq = NULL;
return ret;
@@ -363,7 +364,7 @@ int intel_iommu_finish_prq(struct intel_iommu *iommu)
iommu->iopf_queue = NULL;
}
- iommu_free_pages(iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq);
iommu->prq = NULL;
return 0;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index ba93123cb4eb..f3da596410b5 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -164,18 +164,23 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
+ ret = iopf_for_domain_replace(domain, old, dev);
+ if (ret)
+ goto out_remove_dev_pasid;
+
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
ret = __domain_setup_first_level(iommu, dev, pasid,
FLPT_DEFAULT_DID, mm->pgd,
sflags, old);
if (ret)
- goto out_remove_dev_pasid;
+ goto out_unwind_iopf;
domain_remove_dev_pasid(old, dev, pasid);
return 0;
-
+out_unwind_iopf:
+ iopf_for_domain_replace(old, domain, dev);
out_remove_dev_pasid:
domain_remove_dev_pasid(domain, dev, pasid);
return ret;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 7632c80edea6..96425e92f313 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -13,6 +13,7 @@
#include <linux/bitops.h>
#include <linux/io-pgtable.h>
#include <linux/kernel.h>
+#include <linux/device/faux.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -251,8 +252,6 @@ static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg,
(data->start_level == 1) && (oas == 40);
}
-static bool selftest_running = false;
-
static dma_addr_t __arm_lpae_dma_addr(void *pages)
{
return (dma_addr_t)virt_to_phys(pages);
@@ -263,16 +262,20 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
void *cookie)
{
struct device *dev = cfg->iommu_dev;
- int order = get_order(size);
+ size_t alloc_size;
dma_addr_t dma;
void *pages;
- VM_BUG_ON((gfp & __GFP_HIGHMEM));
-
+ /*
+ * For very small starting-level translation tables the HW requires a
+ * minimum alignment of at least 64 to cover all cases.
+ */
+ alloc_size = max(size, 64);
if (cfg->alloc)
- pages = cfg->alloc(cookie, size, gfp);
+ pages = cfg->alloc(cookie, alloc_size, gfp);
else
- pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order);
+ pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp,
+ alloc_size);
if (!pages)
return NULL;
@@ -300,7 +303,7 @@ out_free:
if (cfg->free)
cfg->free(cookie, pages, size);
else
- iommu_free_pages(pages, order);
+ iommu_free_pages(pages);
return NULL;
}
@@ -316,7 +319,7 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
if (cfg->free)
cfg->free(cookie, pages, size);
else
- iommu_free_pages(pages, get_order(size));
+ iommu_free_pages(pages);
}
static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
@@ -371,7 +374,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
for (i = 0; i < num_entries; i++)
if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
/* We require an unmap first */
- WARN_ON(!selftest_running);
+ WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN));
return -EEXIST;
} else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
/*
@@ -473,7 +476,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
cptep = iopte_deref(pte, data);
} else if (pte) {
/* We require an unmap first */
- WARN_ON(!selftest_running);
+ WARN_ON(!(cfg->quirks & IO_PGTABLE_QUIRK_NO_WARN));
return -EEXIST;
}
@@ -641,8 +644,10 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
ptep += unmap_idx_start;
pte = READ_ONCE(*ptep);
- if (WARN_ON(!pte))
- return 0;
+ if (!pte) {
+ WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN));
+ return -ENOENT;
+ }
/* If the size matches this level, we're in the right place */
if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
@@ -652,8 +657,10 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
/* Find and handle non-leaf entries */
for (i = 0; i < num_entries; i++) {
pte = READ_ONCE(ptep[i]);
- if (WARN_ON(!pte))
+ if (!pte) {
+ WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN));
break;
+ }
if (!iopte_leaf(pte, lvl, iop->fmt)) {
__arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
@@ -968,7 +975,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_ARM_TTBR1 |
IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
- IO_PGTABLE_QUIRK_ARM_HD))
+ IO_PGTABLE_QUIRK_ARM_HD |
+ IO_PGTABLE_QUIRK_NO_WARN))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
@@ -1069,7 +1077,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
struct arm_lpae_io_pgtable *data;
typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
- if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB))
+ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB |
+ IO_PGTABLE_QUIRK_NO_WARN))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
@@ -1310,7 +1319,6 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
#define __FAIL(ops, i) ({ \
WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
arm_lpae_dump_ops(ops); \
- selftest_running = false; \
-EFAULT; \
})
@@ -1326,8 +1334,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
size_t size, mapped;
struct io_pgtable_ops *ops;
- selftest_running = true;
-
for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
cfg_cookie = cfg;
ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
@@ -1416,7 +1422,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
free_io_pgtable_ops(ops);
}
- selftest_running = false;
return 0;
}
@@ -1433,15 +1438,18 @@ static int __init arm_lpae_do_selftests(void)
};
int i, j, k, pass = 0, fail = 0;
- struct device dev;
+ struct faux_device *dev;
struct io_pgtable_cfg cfg = {
.tlb = &dummy_tlb_ops,
.coherent_walk = true,
- .iommu_dev = &dev,
+ .quirks = IO_PGTABLE_QUIRK_NO_WARN,
};
- /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */
- set_dev_node(&dev, NUMA_NO_NODE);
+ dev = faux_device_create("io-pgtable-test", NULL, 0);
+ if (!dev)
+ return -ENOMEM;
+
+ cfg.iommu_dev = &dev->dev;
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
for (j = 0; j < ARRAY_SIZE(address_size); ++j) {
@@ -1461,6 +1469,8 @@ static int __init arm_lpae_do_selftests(void)
}
pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
+ faux_device_destroy(dev);
+
return fail ? -EFAULT : 0;
}
subsys_initcall(arm_lpae_do_selftests);
diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c
index 06aca9ab52f9..679bda104797 100644
--- a/drivers/iommu/io-pgtable-dart.c
+++ b/drivers/iommu/io-pgtable-dart.c
@@ -107,14 +107,6 @@ static phys_addr_t iopte_to_paddr(dart_iopte pte,
return paddr;
}
-static void *__dart_alloc_pages(size_t size, gfp_t gfp)
-{
- int order = get_order(size);
-
- VM_BUG_ON((gfp & __GFP_HIGHMEM));
- return iommu_alloc_pages(gfp, order);
-}
-
static int dart_init_pte(struct dart_io_pgtable *data,
unsigned long iova, phys_addr_t paddr,
dart_iopte prot, int num_entries,
@@ -256,13 +248,13 @@ static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
/* no L2 table present */
if (!pte) {
- cptep = __dart_alloc_pages(tblsz, gfp);
+ cptep = iommu_alloc_pages_sz(gfp, tblsz);
if (!cptep)
return -ENOMEM;
pte = dart_install_table(cptep, ptep, 0, data);
if (pte)
- iommu_free_pages(cptep, get_order(tblsz));
+ iommu_free_pages(cptep);
/* L2 table is present (now) */
pte = READ_ONCE(*ptep);
@@ -413,7 +405,8 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits;
for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) {
- data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL);
+ data->pgd[i] =
+ iommu_alloc_pages_sz(GFP_KERNEL, DART_GRANULE(data));
if (!data->pgd[i])
goto out_free_data;
cfg->apple_dart_cfg.ttbr[i] = virt_to_phys(data->pgd[i]);
@@ -423,8 +416,7 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
out_free_data:
while (--i >= 0) {
- iommu_free_pages(data->pgd[i],
- get_order(DART_GRANULE(data)));
+ iommu_free_pages(data->pgd[i]);
}
kfree(data);
return NULL;
@@ -433,7 +425,6 @@ out_free_data:
static void apple_dart_free_pgtable(struct io_pgtable *iop)
{
struct dart_io_pgtable *data = io_pgtable_to_data(iop);
- int order = get_order(DART_GRANULE(data));
dart_iopte *ptep, *end;
int i;
@@ -445,9 +436,9 @@ static void apple_dart_free_pgtable(struct io_pgtable *iop)
dart_iopte pte = *ptep++;
if (pte)
- iommu_free_pages(iopte_deref(pte, data), order);
+ iommu_free_pages(iopte_deref(pte, data));
}
- iommu_free_pages(data->pgd[i], order);
+ iommu_free_pages(data->pgd[i]);
}
kfree(data);
diff --git a/drivers/iommu/iommu-pages.c b/drivers/iommu/iommu-pages.c
new file mode 100644
index 000000000000..238c09e5166b
--- /dev/null
+++ b/drivers/iommu/iommu-pages.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Google LLC.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+#include "iommu-pages.h"
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+#define IOPTDESC_MATCH(pg_elm, elm) \
+ static_assert(offsetof(struct page, pg_elm) == \
+ offsetof(struct ioptdesc, elm))
+IOPTDESC_MATCH(flags, __page_flags);
+IOPTDESC_MATCH(lru, iopt_freelist_elm); /* Ensure bit 0 is clear */
+IOPTDESC_MATCH(mapping, __page_mapping);
+IOPTDESC_MATCH(private, _private);
+IOPTDESC_MATCH(page_type, __page_type);
+IOPTDESC_MATCH(_refcount, __page_refcount);
+#ifdef CONFIG_MEMCG
+IOPTDESC_MATCH(memcg_data, memcg_data);
+#endif
+#undef IOPTDESC_MATCH
+static_assert(sizeof(struct ioptdesc) <= sizeof(struct page));
+
+/**
+ * iommu_alloc_pages_node_sz - Allocate a zeroed page of a given size from
+ * specific NUMA node
+ * @nid: memory NUMA node id
+ * @gfp: buddy allocator flags
+ * @size: Memory size to allocate, rounded up to a power of 2
+ *
+ * Returns the virtual address of the allocated page. The page must be freed
+ * either by calling iommu_free_pages() or via iommu_put_pages_list(). The
+ * returned allocation is round_up_pow_two(size) big, and is physically aligned
+ * to its size.
+ */
+void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size)
+{
+ unsigned long pgcnt;
+ struct folio *folio;
+ unsigned int order;
+
+ /* This uses page_address() on the memory. */
+ if (WARN_ON(gfp & __GFP_HIGHMEM))
+ return NULL;
+
+ /*
+ * Currently sub page allocations result in a full page being returned.
+ */
+ order = get_order(size);
+
+ /*
+ * __folio_alloc_node() does not handle NUMA_NO_NODE like
+ * alloc_pages_node() did.
+ */
+ if (nid == NUMA_NO_NODE)
+ nid = numa_mem_id();
+
+ folio = __folio_alloc_node(gfp | __GFP_ZERO, order, nid);
+ if (unlikely(!folio))
+ return NULL;
+
+ /*
+ * All page allocations that should be reported to as "iommu-pagetables"
+ * to userspace must use one of the functions below. This includes
+ * allocations of page-tables and other per-iommu_domain configuration
+ * structures.
+ *
+ * This is necessary for the proper accounting as IOMMU state can be
+ * rather large, i.e. multiple gigabytes in size.
+ */
+ pgcnt = 1UL << order;
+ mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, pgcnt);
+ lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, pgcnt);
+
+ return folio_address(folio);
+}
+EXPORT_SYMBOL_GPL(iommu_alloc_pages_node_sz);
+
+static void __iommu_free_desc(struct ioptdesc *iopt)
+{
+ struct folio *folio = ioptdesc_folio(iopt);
+ const unsigned long pgcnt = 1UL << folio_order(folio);
+
+ mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, -pgcnt);
+ lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, -pgcnt);
+ folio_put(folio);
+}
+
+/**
+ * iommu_free_pages - free pages
+ * @virt: virtual address of the page to be freed.
+ *
+ * The page must have have been allocated by iommu_alloc_pages_node_sz()
+ */
+void iommu_free_pages(void *virt)
+{
+ if (!virt)
+ return;
+ __iommu_free_desc(virt_to_ioptdesc(virt));
+}
+EXPORT_SYMBOL_GPL(iommu_free_pages);
+
+/**
+ * iommu_put_pages_list - free a list of pages.
+ * @list: The list of pages to be freed
+ *
+ * Frees a list of pages allocated by iommu_alloc_pages_node_sz(). On return the
+ * passed list is invalid, the caller must use IOMMU_PAGES_LIST_INIT to reinit
+ * the list if it expects to use it again.
+ */
+void iommu_put_pages_list(struct iommu_pages_list *list)
+{
+ struct ioptdesc *iopt, *tmp;
+
+ list_for_each_entry_safe(iopt, tmp, &list->pages, iopt_freelist_elm)
+ __iommu_free_desc(iopt);
+}
+EXPORT_SYMBOL_GPL(iommu_put_pages_list);
diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h
index 82ebf0033081..b3af2813ed0c 100644
--- a/drivers/iommu/iommu-pages.h
+++ b/drivers/iommu/iommu-pages.h
@@ -7,180 +7,95 @@
#ifndef __IOMMU_PAGES_H
#define __IOMMU_PAGES_H
-#include <linux/vmstat.h>
-#include <linux/gfp.h>
-#include <linux/mm.h>
-
-/*
- * All page allocations that should be reported to as "iommu-pagetables" to
- * userspace must use one of the functions below. This includes allocations of
- * page-tables and other per-iommu_domain configuration structures.
- *
- * This is necessary for the proper accounting as IOMMU state can be rather
- * large, i.e. multiple gigabytes in size.
- */
-
-/**
- * __iommu_alloc_account - account for newly allocated page.
- * @page: head struct page of the page.
- * @order: order of the page
- */
-static inline void __iommu_alloc_account(struct page *page, int order)
-{
- const long pgcnt = 1l << order;
-
- mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, pgcnt);
- mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, pgcnt);
-}
-
-/**
- * __iommu_free_account - account a page that is about to be freed.
- * @page: head struct page of the page.
- * @order: order of the page
- */
-static inline void __iommu_free_account(struct page *page, int order)
-{
- const long pgcnt = 1l << order;
-
- mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, -pgcnt);
- mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, -pgcnt);
-}
+#include <linux/iommu.h>
/**
- * __iommu_alloc_pages - allocate a zeroed page of a given order.
- * @gfp: buddy allocator flags
- * @order: page order
+ * struct ioptdesc - Memory descriptor for IOMMU page tables
+ * @iopt_freelist_elm: List element for a struct iommu_pages_list
*
- * returns the head struct page of the allocated page.
+ * This struct overlays struct page for now. Do not modify without a good
+ * understanding of the issues.
*/
-static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order)
+struct ioptdesc {
+ unsigned long __page_flags;
+
+ struct list_head iopt_freelist_elm;
+ unsigned long __page_mapping;
+ pgoff_t __index;
+ void *_private;
+
+ unsigned int __page_type;
+ atomic_t __page_refcount;
+#ifdef CONFIG_MEMCG
+ unsigned long memcg_data;
+#endif
+};
+
+static inline struct ioptdesc *folio_ioptdesc(struct folio *folio)
{
- struct page *page;
-
- page = alloc_pages(gfp | __GFP_ZERO, order);
- if (unlikely(!page))
- return NULL;
-
- __iommu_alloc_account(page, order);
-
- return page;
+ return (struct ioptdesc *)folio;
}
-/**
- * __iommu_free_pages - free page of a given order
- * @page: head struct page of the page
- * @order: page order
- */
-static inline void __iommu_free_pages(struct page *page, int order)
+static inline struct folio *ioptdesc_folio(struct ioptdesc *iopt)
{
- if (!page)
- return;
-
- __iommu_free_account(page, order);
- __free_pages(page, order);
+ return (struct folio *)iopt;
}
-/**
- * iommu_alloc_pages_node - allocate a zeroed page of a given order from
- * specific NUMA node.
- * @nid: memory NUMA node id
- * @gfp: buddy allocator flags
- * @order: page order
- *
- * returns the virtual address of the allocated page
- */
-static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order)
+static inline struct ioptdesc *virt_to_ioptdesc(void *virt)
{
- struct page *page = alloc_pages_node(nid, gfp | __GFP_ZERO, order);
-
- if (unlikely(!page))
- return NULL;
-
- __iommu_alloc_account(page, order);
-
- return page_address(page);
+ return folio_ioptdesc(virt_to_folio(virt));
}
-/**
- * iommu_alloc_pages - allocate a zeroed page of a given order
- * @gfp: buddy allocator flags
- * @order: page order
- *
- * returns the virtual address of the allocated page
- */
-static inline void *iommu_alloc_pages(gfp_t gfp, int order)
-{
- struct page *page = __iommu_alloc_pages(gfp, order);
-
- if (unlikely(!page))
- return NULL;
-
- return page_address(page);
-}
+void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size);
+void iommu_free_pages(void *virt);
+void iommu_put_pages_list(struct iommu_pages_list *list);
/**
- * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node.
- * @nid: memory NUMA node id
- * @gfp: buddy allocator flags
- *
- * returns the virtual address of the allocated page
+ * iommu_pages_list_add - add the page to a iommu_pages_list
+ * @list: List to add the page to
+ * @virt: Address returned from iommu_alloc_pages_node_sz()
*/
-static inline void *iommu_alloc_page_node(int nid, gfp_t gfp)
+static inline void iommu_pages_list_add(struct iommu_pages_list *list,
+ void *virt)
{
- return iommu_alloc_pages_node(nid, gfp, 0);
+ list_add_tail(&virt_to_ioptdesc(virt)->iopt_freelist_elm, &list->pages);
}
/**
- * iommu_alloc_page - allocate a zeroed page
- * @gfp: buddy allocator flags
+ * iommu_pages_list_splice - Put all the pages in list from into list to
+ * @from: Source list of pages
+ * @to: Destination list of pages
*
- * returns the virtual address of the allocated page
+ * from must be re-initialized after calling this function if it is to be
+ * used again.
*/
-static inline void *iommu_alloc_page(gfp_t gfp)
+static inline void iommu_pages_list_splice(struct iommu_pages_list *from,
+ struct iommu_pages_list *to)
{
- return iommu_alloc_pages(gfp, 0);
+ list_splice(&from->pages, &to->pages);
}
/**
- * iommu_free_pages - free page of a given order
- * @virt: virtual address of the page to be freed.
- * @order: page order
+ * iommu_pages_list_empty - True if the list is empty
+ * @list: List to check
*/
-static inline void iommu_free_pages(void *virt, int order)
+static inline bool iommu_pages_list_empty(struct iommu_pages_list *list)
{
- if (!virt)
- return;
-
- __iommu_free_pages(virt_to_page(virt), order);
+ return list_empty(&list->pages);
}
/**
- * iommu_free_page - free page
- * @virt: virtual address of the page to be freed.
- */
-static inline void iommu_free_page(void *virt)
-{
- iommu_free_pages(virt, 0);
-}
-
-/**
- * iommu_put_pages_list - free a list of pages.
- * @page: the head of the lru list to be freed.
+ * iommu_alloc_pages_sz - Allocate a zeroed page of a given size from
+ * specific NUMA node
+ * @nid: memory NUMA node id
+ * @gfp: buddy allocator flags
+ * @size: Memory size to allocate, this is rounded up to a power of 2
*
- * There are no locking requirement for these pages, as they are going to be
- * put on a free list as soon as refcount reaches 0. Pages are put on this LRU
- * list once they are removed from the IOMMU page tables. However, they can
- * still be access through debugfs.
+ * Returns the virtual address of the allocated page.
*/
-static inline void iommu_put_pages_list(struct list_head *page)
+static inline void *iommu_alloc_pages_sz(gfp_t gfp, size_t size)
{
- while (!list_empty(page)) {
- struct page *p = list_entry(page->prev, struct page, lru);
-
- list_del(&p->lru);
- __iommu_free_account(p, 0);
- put_page(p);
- }
+ return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size);
}
#endif /* __IOMMU_PAGES_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index ab18bc494eef..1a51cfd82808 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -63,9 +63,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
* reference is taken. Caller must call iommu_sva_unbind_device()
* to release each reference.
*
- * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
- * initialize the required SVA features.
- *
* On error, returns an ERR_PTR value.
*/
struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -299,15 +296,12 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
const struct iommu_ops *ops = dev_iommu_ops(dev);
struct iommu_domain *domain;
- if (ops->domain_alloc_sva) {
- domain = ops->domain_alloc_sva(dev, mm);
- if (IS_ERR(domain))
- return domain;
- } else {
- domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
- if (!domain)
- return ERR_PTR(-ENOMEM);
- }
+ if (!ops->domain_alloc_sva)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ domain = ops->domain_alloc_sva(dev, mm);
+ if (IS_ERR(domain))
+ return domain;
domain->type = IOMMU_DOMAIN_SVA;
domain->cookie_type = IOMMU_COOKIE_SVA;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9d728800a862..a4b606c591da 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -277,6 +277,8 @@ int iommu_device_register(struct iommu_device *iommu,
err = bus_iommu_probe(iommu_buses[i]);
if (err)
iommu_device_unregister(iommu);
+ else
+ WRITE_ONCE(iommu->ready, true);
return err;
}
EXPORT_SYMBOL_GPL(iommu_device_register);
@@ -422,13 +424,15 @@ static int iommu_init_device(struct device *dev)
* is buried in the bus dma_configure path. Properly unpicking that is
* still a big job, so for now just invoke the whole thing. The device
* already having a driver bound means dma_configure has already run and
- * either found no IOMMU to wait for, or we're in its replay call right
- * now, so either way there's no point calling it again.
+ * found no IOMMU to wait for, so there's no point calling it again.
*/
- if (!dev->driver && dev->bus->dma_configure) {
+ if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) {
mutex_unlock(&iommu_probe_device_lock);
dev->bus->dma_configure(dev);
mutex_lock(&iommu_probe_device_lock);
+ /* If another instance finished the job for us, skip it */
+ if (!dev->iommu || dev->iommu_group)
+ return -ENODEV;
}
/*
* At this point, relevant devices either now have a fwspec which will
@@ -1629,15 +1633,13 @@ static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev)
if (ops->identity_domain)
return ops->identity_domain;
- /* Older drivers create the identity domain via ops->domain_alloc() */
- if (!ops->domain_alloc)
+ if (ops->domain_alloc_identity) {
+ domain = ops->domain_alloc_identity(dev);
+ if (IS_ERR(domain))
+ return domain;
+ } else {
return ERR_PTR(-EOPNOTSUPP);
-
- domain = ops->domain_alloc(IOMMU_DOMAIN_IDENTITY);
- if (IS_ERR(domain))
- return domain;
- if (!domain)
- return ERR_PTR(-ENOMEM);
+ }
iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops);
return domain;
@@ -2025,8 +2027,10 @@ __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type,
domain = ops->domain_alloc_paging(dev);
else if (ops->domain_alloc_paging_flags)
domain = ops->domain_alloc_paging_flags(dev, flags, NULL);
+#if IS_ENABLED(CONFIG_FSL_PAMU)
else if (ops->domain_alloc && !flags)
domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
+#endif
else
return ERR_PTR(-EOPNOTSUPP);
@@ -2204,6 +2208,19 @@ static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
}
+static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
+ struct iommu_domain *domain)
+{
+ if (domain->owner == ops)
+ return true;
+
+ /* For static domains, owner isn't set. */
+ if (domain == ops->blocked_domain || domain == ops->identity_domain)
+ return true;
+
+ return false;
+}
+
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
@@ -2214,7 +2231,8 @@ static int __iommu_attach_group(struct iommu_domain *domain,
return -EBUSY;
dev = iommu_group_first_dev(group);
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
+ if (!dev_has_iommu(dev) ||
+ !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
return -EINVAL;
return __iommu_group_set_domain(group, domain);
@@ -2395,6 +2413,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
size_t offset, pgsize, pgsize_next;
+ size_t offset_end;
unsigned long addr_merge = paddr | iova;
/* Page sizes supported by the hardware and small enough for @size */
@@ -2435,7 +2454,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
* If size is big enough to accommodate the larger page, reduce
* the number of smaller pages.
*/
- if (offset + pgsize_next <= size)
+ if (!check_add_overflow(offset, pgsize_next, &offset_end) &&
+ offset_end <= size)
size = offset;
out_set_count:
@@ -2443,8 +2463,8 @@ out_set_count:
return pgsize;
}
-static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
unsigned long orig_iova = iova;
@@ -2453,12 +2473,19 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
return -ENODEV;
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -2506,31 +2533,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
{
const struct iommu_domain_ops *ops = domain->ops;
- int ret;
-
- might_sleep_if(gfpflags_allow_blocking(gfp));
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
+ if (!ops->iotlb_sync_map)
+ return 0;
+ return ops->iotlb_sync_map(domain, iova, size);
+}
- ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
- if (ret == 0 && ops->iotlb_sync_map) {
- ret = ops->iotlb_sync_map(domain, iova, size);
- if (ret)
- goto out_err;
- }
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ int ret;
- return ret;
+ ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp);
+ if (ret)
+ return ret;
-out_err:
- /* undo mappings already done */
- iommu_unmap(domain, iova, size);
+ ret = iommu_sync_map(domain, iova, size);
+ if (ret)
+ iommu_unmap(domain, iova, size);
return ret;
}
@@ -2618,6 +2641,25 @@ size_t iommu_unmap(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap);
+/**
+ * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync
+ * @domain: Domain to manipulate
+ * @iova: IO virtual address to start
+ * @size: Length of the range starting from @iova
+ * @iotlb_gather: range information for a pending IOTLB flush
+ *
+ * iommu_unmap_fast() will remove a translation created by iommu_map().
+ * It can't subdivide a mapping created by iommu_map(), so it should be
+ * called with IOVA ranges that match what was passed to iommu_map(). The
+ * range can aggregate contiguous iommu_map() calls so long as no individual
+ * range is split.
+ *
+ * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers
+ * which manage the IOTLB flushing externally to perform a batched sync.
+ *
+ * Returns: Number of bytes of IOVA unmapped. iova + res will be the point
+ * unmapping stopped.
+ */
size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2630,26 +2672,17 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot,
gfp_t gfp)
{
- const struct iommu_domain_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
- might_sleep_if(gfpflags_allow_blocking(gfp));
-
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
-
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
if (len && s_phys != start + len) {
- ret = __iommu_map(domain, iova + mapped, start,
+ ret = iommu_map_nosync(domain, iova + mapped, start,
len, prot, gfp);
-
if (ret)
goto out_err;
@@ -2672,11 +2705,10 @@ next:
sg = sg_next(sg);
}
- if (ops->iotlb_sync_map) {
- ret = ops->iotlb_sync_map(domain, iova, mapped);
- if (ret)
- goto out_err;
- }
+ ret = iommu_sync_map(domain, iova, mapped);
+ if (ret)
+ goto out_err;
+
return mapped;
out_err:
@@ -2830,31 +2862,39 @@ bool iommu_default_passthrough(void)
}
EXPORT_SYMBOL_GPL(iommu_default_passthrough);
-const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
+static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode)
{
- const struct iommu_ops *ops = NULL;
- struct iommu_device *iommu;
+ const struct iommu_device *iommu, *ret = NULL;
spin_lock(&iommu_device_lock);
list_for_each_entry(iommu, &iommu_device_list, list)
if (iommu->fwnode == fwnode) {
- ops = iommu->ops;
+ ret = iommu;
break;
}
spin_unlock(&iommu_device_lock);
- return ops;
+ return ret;
+}
+
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
+{
+ const struct iommu_device *iommu = iommu_from_fwnode(fwnode);
+
+ return iommu ? iommu->ops : NULL;
}
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode)
{
- const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode);
+ const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- if (!ops)
+ if (!iommu)
return driver_deferred_probe_check_state(dev);
+ if (!dev->iommu && !READ_ONCE(iommu->ready))
+ return -EPROBE_DEFER;
if (fwspec)
- return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
+ return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
if (!dev_iommu_get(dev))
return -ENOMEM;
@@ -2908,38 +2948,6 @@ int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
-/*
- * Per device IOMMU features.
- */
-int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
-{
- if (dev_has_iommu(dev)) {
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->dev_enable_feat)
- return ops->dev_enable_feat(dev, feat);
- }
-
- return -ENODEV;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
-
-/*
- * The device drivers should do the necessary cleanups before calling this.
- */
-int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
-{
- if (dev_has_iommu(dev)) {
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->dev_disable_feat)
- return ops->dev_disable_feat(dev, feat);
- }
-
- return -EBUSY;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
-
/**
* iommu_setup_default_domain - Set the default_domain for the group
* @group: Group to change
@@ -3442,7 +3450,8 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
!ops->blocked_domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
- if (ops != domain->owner || pasid == IOMMU_NO_PASID)
+ if (!domain_iommu_ops_compatible(ops, domain) ||
+ pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
@@ -3524,7 +3533,7 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
if (!domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
- if (dev_iommu_ops(dev) != domain->owner ||
+ if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
pasid == IOMMU_NO_PASID || !handle)
return -EINVAL;
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 2111bad72c72..86244403b532 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -221,7 +221,6 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
refcount_inc(&idev->obj.users);
/* igroup refcount moves into iommufd_device */
idev->igroup = igroup;
- mutex_init(&idev->iopf_lock);
/*
* If the caller fails after this success it must call
@@ -425,6 +424,25 @@ static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
return 0;
}
+static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct pci_dev *pdev;
+
+ if (!hwpt->fault || !dev_is_pci(idev->dev))
+ return true;
+
+ /*
+ * Once we turn on PCI/PRI support for VF, the response failure code
+ * should not be forwarded to the hardware due to PRI being a shared
+ * resource between PF and VFs. There is no coordination for this
+ * shared capability. This waits for a vPRI reset to recover.
+ */
+ pdev = to_pci_dev(idev->dev);
+
+ return (!pdev->is_virtfn || !pci_pri_supported(pdev));
+}
+
static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev,
ioasid_t pasid)
@@ -432,6 +450,9 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
struct iommufd_attach_handle *handle;
int rc;
+ if (!iommufd_hwpt_compatible_device(hwpt, idev))
+ return -EINVAL;
+
rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
if (rc)
return rc;
@@ -440,12 +461,6 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
if (!handle)
return -ENOMEM;
- if (hwpt->fault) {
- rc = iommufd_fault_iopf_enable(idev);
- if (rc)
- goto out_free_handle;
- }
-
handle->idev = idev;
if (pasid == IOMMU_NO_PASID)
rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
@@ -454,13 +469,10 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
&handle->handle);
if (rc)
- goto out_disable_iopf;
+ goto out_free_handle;
return 0;
-out_disable_iopf:
- if (hwpt->fault)
- iommufd_fault_iopf_disable(idev);
out_free_handle:
kfree(handle);
return rc;
@@ -492,10 +504,7 @@ static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
else
iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
- if (hwpt->fault) {
- iommufd_auto_response_faults(hwpt, handle);
- iommufd_fault_iopf_disable(idev);
- }
+ iommufd_auto_response_faults(hwpt, handle);
kfree(handle);
}
@@ -507,6 +516,9 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
struct iommufd_attach_handle *handle, *old_handle;
int rc;
+ if (!iommufd_hwpt_compatible_device(hwpt, idev))
+ return -EINVAL;
+
rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
if (rc)
return rc;
@@ -517,12 +529,6 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
if (!handle)
return -ENOMEM;
- if (hwpt->fault && !old->fault) {
- rc = iommufd_fault_iopf_enable(idev);
- if (rc)
- goto out_free_handle;
- }
-
handle->idev = idev;
if (pasid == IOMMU_NO_PASID)
rc = iommu_replace_group_handle(idev->igroup->group,
@@ -531,20 +537,13 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
pasid, &handle->handle);
if (rc)
- goto out_disable_iopf;
+ goto out_free_handle;
- if (old->fault) {
- iommufd_auto_response_faults(hwpt, old_handle);
- if (!hwpt->fault)
- iommufd_fault_iopf_disable(idev);
- }
+ iommufd_auto_response_faults(hwpt, old_handle);
kfree(old_handle);
return 0;
-out_disable_iopf:
- if (hwpt->fault && !old->fault)
- iommufd_fault_iopf_disable(idev);
out_free_handle:
kfree(handle);
return rc;
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
index f39cf0797347..e373b9eec7f5 100644
--- a/drivers/iommu/iommufd/eventq.c
+++ b/drivers/iommu/iommufd/eventq.c
@@ -9,8 +9,6 @@
#include <linux/iommufd.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/pci-ats.h>
#include <linux/poll.h>
#include <uapi/linux/iommufd.h>
@@ -18,50 +16,6 @@
#include "iommufd_private.h"
/* IOMMUFD_OBJ_FAULT Functions */
-
-int iommufd_fault_iopf_enable(struct iommufd_device *idev)
-{
- struct device *dev = idev->dev;
- int ret;
-
- /*
- * Once we turn on PCI/PRI support for VF, the response failure code
- * should not be forwarded to the hardware due to PRI being a shared
- * resource between PF and VFs. There is no coordination for this
- * shared capability. This waits for a vPRI reset to recover.
- */
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- if (pdev->is_virtfn && pci_pri_supported(pdev))
- return -EINVAL;
- }
-
- mutex_lock(&idev->iopf_lock);
- /* Device iopf has already been on. */
- if (++idev->iopf_enabled > 1) {
- mutex_unlock(&idev->iopf_lock);
- return 0;
- }
-
- ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
- if (ret)
- --idev->iopf_enabled;
- mutex_unlock(&idev->iopf_lock);
-
- return ret;
-}
-
-void iommufd_fault_iopf_disable(struct iommufd_device *idev)
-{
- mutex_lock(&idev->iopf_lock);
- if (!WARN_ON(idev->iopf_enabled == 0)) {
- if (--idev->iopf_enabled == 0)
- iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
- }
- mutex_unlock(&idev->iopf_lock);
-}
-
void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
struct iommufd_attach_handle *handle)
{
@@ -70,7 +24,7 @@ void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
struct list_head free_list;
unsigned long index;
- if (!fault)
+ if (!fault || !handle)
return;
INIT_LIST_HEAD(&free_list);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 80e8c76d25f2..9ccc83341f32 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -425,9 +425,6 @@ struct iommufd_device {
/* always the physical device */
struct device *dev;
bool enforce_cache_coherency;
- /* protect iopf_enabled counter */
- struct mutex iopf_lock;
- unsigned int iopf_enabled;
};
static inline struct iommufd_device *
@@ -506,9 +503,6 @@ iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
int iommufd_fault_iopf_handler(struct iopf_group *group);
-
-int iommufd_fault_iopf_enable(struct iommufd_device *idev);
-void iommufd_fault_iopf_disable(struct iommufd_device *idev);
void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
struct iommufd_attach_handle *handle);
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 18d9a216eb30..6bd0abf9a641 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -58,6 +58,9 @@ enum {
MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2,
};
+static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain);
+static void mock_dev_disable_iopf(struct device *dev, struct iommu_domain *domain);
+
/*
* Syzkaller has trouble randomizing the correct iova to use since it is linked
* to the map ioctl's output, and it has no ide about that. So, simplify things.
@@ -168,6 +171,8 @@ struct mock_dev {
int id;
u32 cache[MOCK_DEV_CACHE_NUM];
atomic_t pasid_1024_fake_error;
+ unsigned int iopf_refcount;
+ struct iommu_domain *domain;
};
static inline struct mock_dev *to_mock_dev(struct device *dev)
@@ -221,6 +226,13 @@ static int mock_domain_nop_attach(struct iommu_domain *domain,
up_write(&mdev->viommu_rwsem);
}
+ rc = mock_dev_enable_iopf(dev, domain);
+ if (rc)
+ return rc;
+
+ mock_dev_disable_iopf(dev, mdev->domain);
+ mdev->domain = domain;
+
return 0;
}
@@ -229,6 +241,7 @@ static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain,
struct iommu_domain *old)
{
struct mock_dev *mdev = to_mock_dev(dev);
+ int rc;
/*
* Per the first attach with pasid 1024, set the
@@ -256,6 +269,12 @@ static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain,
}
}
+ rc = mock_dev_enable_iopf(dev, domain);
+ if (rc)
+ return rc;
+
+ mock_dev_disable_iopf(dev, old);
+
return 0;
}
@@ -610,22 +629,42 @@ static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt
{
}
-static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
+static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain)
{
- if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue)
+ struct mock_dev *mdev = to_mock_dev(dev);
+ int ret;
+
+ if (!domain || !domain->iopf_handler)
+ return 0;
+
+ if (!mock_iommu_iopf_queue)
return -ENODEV;
- return iopf_queue_add_device(mock_iommu_iopf_queue, dev);
+ if (mdev->iopf_refcount) {
+ mdev->iopf_refcount++;
+ return 0;
+ }
+
+ ret = iopf_queue_add_device(mock_iommu_iopf_queue, dev);
+ if (ret)
+ return ret;
+
+ mdev->iopf_refcount = 1;
+
+ return 0;
}
-static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
+static void mock_dev_disable_iopf(struct device *dev, struct iommu_domain *domain)
{
- if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue)
- return -ENODEV;
+ struct mock_dev *mdev = to_mock_dev(dev);
- iopf_queue_remove_device(mock_iommu_iopf_queue, dev);
+ if (!domain || !domain->iopf_handler)
+ return;
- return 0;
+ if (--mdev->iopf_refcount)
+ return;
+
+ iopf_queue_remove_device(mock_iommu_iopf_queue, dev);
}
static void mock_viommu_destroy(struct iommufd_viommu *viommu)
@@ -770,8 +809,6 @@ static const struct iommu_ops mock_ops = {
.device_group = generic_device_group,
.probe_device = mock_probe_device,
.page_response = mock_domain_page_response,
- .dev_enable_feat = mock_dev_enable_feat,
- .dev_disable_feat = mock_dev_disable_feat,
.user_pasid_table = true,
.viommu_alloc = mock_viommu_alloc,
.default_domain_ops =
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index e424b279a8cd..90341b24a811 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1090,7 +1090,8 @@ static int ipmmu_probe(struct platform_device *pdev)
if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu))
return 0;
- ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev));
+ ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s",
+ dev_name(&pdev->dev));
if (ret)
return ret;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index df98d0c65f54..cb95fecf6016 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1550,6 +1550,31 @@ static const struct mtk_iommu_plat_data mt6795_data = {
.larbid_remap = {{0}, {1}, {2}, {3}, {4}}, /* Linear mapping. */
};
+static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = {
+ [0] = {~0, ~0}, /* Region0: larb0/1 */
+ [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */
+ [2] = {0, 0, ~0, 0, 0, 0, 0, 0, /* Region2: larb2/9/11/13/14/16/17/18/19/20 */
+ 0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), ~(u32)(BIT(4) | BIT(5)), 0,
+ ~0, ~0, ~0, ~0, ~0},
+ [3] = {0},
+ [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */
+ [5] = {[14] = BIT(4) | BIT(5)}, /* larb14 port4/5 */
+};
+
+static const struct mtk_iommu_plat_data mt6893_data = {
+ .m4u_plat = M4U_MT8192,
+ .flags = HAS_BCLK | OUT_ORDER_WR_EN | HAS_SUB_COMM_2BITS |
+ WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | MTK_IOMMU_TYPE_MM,
+ .inv_sel_reg = REG_MMU_INV_SEL_GEN2,
+ .banks_num = 1,
+ .banks_enable = {true},
+ .iova_region = mt8192_multi_dom,
+ .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom),
+ .iova_region_larb_msk = mt8192_larb_region_msk,
+ .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20},
+ {0, 14, 16}, {0, 13, 18, 17}},
+};
+
static const struct mtk_iommu_plat_data mt8167_data = {
.m4u_plat = M4U_MT8167,
.flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM,
@@ -1673,17 +1698,6 @@ static const struct mtk_iommu_plat_data mt8188_data_vpp = {
27, 28 /* ccu0 */, MTK_INVALID_LARBID}, {4, 6}},
};
-static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = {
- [0] = {~0, ~0}, /* Region0: larb0/1 */
- [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */
- [2] = {0, 0, ~0, 0, 0, 0, 0, 0, /* Region2: larb2/9/11/13/14/16/17/18/19/20 */
- 0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), ~(u32)(BIT(4) | BIT(5)), 0,
- ~0, ~0, ~0, ~0, ~0},
- [3] = {0},
- [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */
- [5] = {[14] = BIT(4) | BIT(5)}, /* larb14 port4/5 */
-};
-
static const struct mtk_iommu_plat_data mt8192_data = {
.m4u_plat = M4U_MT8192,
.flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN |
@@ -1777,6 +1791,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
{ .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
{ .compatible = "mediatek,mt6795-m4u", .data = &mt6795_data},
+ { .compatible = "mediatek,mt6893-iommu-mm", .data = &mt6893_data},
{ .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data},
{ .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
{ .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile
index f54c9ed17d41..b5929f9f23e6 100644
--- a/drivers/iommu/riscv/Makefile
+++ b/drivers/iommu/riscv/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o
+obj-y += iommu.o iommu-platform.o
obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 8f049d4a0e2c..bb57092ca901 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -48,14 +48,13 @@ static DEFINE_IDA(riscv_iommu_pscids);
/* Device resource-managed allocations */
struct riscv_iommu_devres {
void *addr;
- int order;
};
static void riscv_iommu_devres_pages_release(struct device *dev, void *res)
{
struct riscv_iommu_devres *devres = res;
- iommu_free_pages(devres->addr, devres->order);
+ iommu_free_pages(devres->addr);
}
static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p)
@@ -66,13 +65,14 @@ static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p
return devres->addr == target->addr;
}
-static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, int order)
+static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu,
+ unsigned int size)
{
struct riscv_iommu_devres *devres;
void *addr;
- addr = iommu_alloc_pages_node(dev_to_node(iommu->dev),
- GFP_KERNEL_ACCOUNT, order);
+ addr = iommu_alloc_pages_node_sz(dev_to_node(iommu->dev),
+ GFP_KERNEL_ACCOUNT, size);
if (unlikely(!addr))
return NULL;
@@ -80,12 +80,11 @@ static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, int order)
sizeof(struct riscv_iommu_devres), GFP_KERNEL);
if (unlikely(!devres)) {
- iommu_free_pages(addr, order);
+ iommu_free_pages(addr);
return NULL;
}
devres->addr = addr;
- devres->order = order;
devres_add(iommu->dev, devres);
@@ -163,9 +162,9 @@ static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu,
} else {
do {
const size_t queue_size = entry_size << (logsz + 1);
- const int order = get_order(queue_size);
- queue->base = riscv_iommu_get_pages(iommu, order);
+ queue->base = riscv_iommu_get_pages(
+ iommu, max(queue_size, SZ_4K));
queue->phys = __pa(queue->base);
} while (!queue->base && logsz-- > 0);
}
@@ -620,7 +619,7 @@ static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iomm
break;
}
- ptr = riscv_iommu_get_pages(iommu, 0);
+ ptr = riscv_iommu_get_pages(iommu, SZ_4K);
if (!ptr)
return NULL;
@@ -700,7 +699,7 @@ static int riscv_iommu_iodir_alloc(struct riscv_iommu_device *iommu)
}
if (!iommu->ddt_root) {
- iommu->ddt_root = riscv_iommu_get_pages(iommu, 0);
+ iommu->ddt_root = riscv_iommu_get_pages(iommu, SZ_4K);
iommu->ddt_phys = __pa(iommu->ddt_root);
}
@@ -1087,7 +1086,8 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
- unsigned long pte, struct list_head *freelist)
+ unsigned long pte,
+ struct iommu_pages_list *freelist)
{
unsigned long *ptr;
int i;
@@ -1105,9 +1105,9 @@ static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
}
if (freelist)
- list_add_tail(&virt_to_page(ptr)->lru, freelist);
+ iommu_pages_list_add(freelist, ptr);
else
- iommu_free_page(ptr);
+ iommu_free_pages(ptr);
}
static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
@@ -1144,13 +1144,14 @@ pte_retry:
* page table. This might race with other mappings, retry.
*/
if (_io_pte_none(pte)) {
- addr = iommu_alloc_page_node(domain->numa_node, gfp);
+ addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,
+ SZ_4K);
if (!addr)
return NULL;
old = pte;
pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);
if (cmpxchg_relaxed(ptr, old, pte) != old) {
- iommu_free_page(addr);
+ iommu_free_pages(addr);
goto pte_retry;
}
}
@@ -1194,7 +1195,7 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
unsigned long *ptr;
unsigned long pte, old, pte_prot;
int rc = 0;
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
if (!(prot & IOMMU_WRITE))
pte_prot = _PAGE_BASE | _PAGE_READ;
@@ -1225,7 +1226,7 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
*mapped = size;
- if (!list_empty(&freelist)) {
+ if (!iommu_pages_list_empty(&freelist)) {
/*
* In 1.0 spec version, the smallest scope we can use to
* invalidate all levels of page table (i.e. leaf and non-leaf)
@@ -1385,8 +1386,8 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
domain->numa_node = dev_to_node(iommu->dev);
domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD);
domain->pgd_mode = pgd_mode;
- domain->pgd_root = iommu_alloc_page_node(domain->numa_node,
- GFP_KERNEL_ACCOUNT);
+ domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
+ GFP_KERNEL_ACCOUNT, SZ_4K);
if (!domain->pgd_root) {
kfree(domain);
return ERR_PTR(-ENOMEM);
@@ -1395,7 +1396,7 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,
RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);
if (domain->pscid < 0) {
- iommu_free_page(domain->pgd_root);
+ iommu_free_pages(domain->pgd_root);
kfree(domain);
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index af4cc91b2bbf..22f74ba33a0e 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -730,14 +730,15 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
if (rk_dte_is_pt_valid(dte))
goto done;
- page_table = iommu_alloc_page(GFP_ATOMIC | rk_ops->gfp_flags);
+ page_table = iommu_alloc_pages_sz(GFP_ATOMIC | rk_ops->gfp_flags,
+ SPAGE_SIZE);
if (!page_table)
return ERR_PTR(-ENOMEM);
pt_dma = dma_map_single(rk_domain->dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
if (dma_mapping_error(rk_domain->dma_dev, pt_dma)) {
dev_err(rk_domain->dma_dev, "DMA mapping error while allocating page table\n");
- iommu_free_page(page_table);
+ iommu_free_pages(page_table);
return ERR_PTR(-ENOMEM);
}
@@ -1062,7 +1063,8 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
* Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
* Allocate one 4 KiB page for each table.
*/
- rk_domain->dt = iommu_alloc_page(GFP_KERNEL | rk_ops->gfp_flags);
+ rk_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL | rk_ops->gfp_flags,
+ SPAGE_SIZE);
if (!rk_domain->dt)
goto err_free_domain;
@@ -1086,7 +1088,7 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
return &rk_domain->domain;
err_free_dt:
- iommu_free_page(rk_domain->dt);
+ iommu_free_pages(rk_domain->dt);
err_free_domain:
kfree(rk_domain);
@@ -1107,13 +1109,13 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
u32 *page_table = phys_to_virt(pt_phys);
dma_unmap_single(rk_domain->dma_dev, pt_phys,
SPAGE_SIZE, DMA_TO_DEVICE);
- iommu_free_page(page_table);
+ iommu_free_pages(page_table);
}
}
dma_unmap_single(rk_domain->dma_dev, rk_domain->dt_dma,
SPAGE_SIZE, DMA_TO_DEVICE);
- iommu_free_page(rk_domain->dt);
+ iommu_free_pages(rk_domain->dt);
kfree(rk_domain);
}
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index e1c76e0f9c2b..433b59f43530 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -31,10 +31,21 @@ struct s390_domain {
unsigned long *dma_table;
spinlock_t list_lock;
struct rcu_head rcu;
+ u8 origin_type;
};
static struct iommu_domain blocking_domain;
+static inline unsigned int calc_rfx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_RF_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_rsx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_RS_SHIFT) & ZPCI_INDEX_MASK;
+}
+
static inline unsigned int calc_rtx(dma_addr_t ptr)
{
return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@@ -56,6 +67,20 @@ static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
}
+static inline void set_rf_rso(unsigned long *entry, phys_addr_t rso)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= (rso & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RFX;
+}
+
+static inline void set_rs_rto(unsigned long *entry, phys_addr_t rto)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= (rto & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RSX;
+}
+
static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
{
*entry &= ZPCI_RTE_FLAG_MASK;
@@ -70,6 +95,22 @@ static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
*entry |= ZPCI_TABLE_TYPE_SX;
}
+static inline void validate_rf_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RFX;
+}
+
+static inline void validate_rs_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RSX;
+}
+
static inline void validate_rt_entry(unsigned long *entry)
{
*entry &= ~ZPCI_TABLE_VALID_MASK;
@@ -120,6 +161,22 @@ static inline int pt_entry_isvalid(unsigned long entry)
return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
}
+static inline unsigned long *get_rf_rso(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RFX)
+ return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static inline unsigned long *get_rs_rto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RSX)
+ return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+ else
+ return NULL;
+}
+
static inline unsigned long *get_rt_sto(unsigned long entry)
{
if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
@@ -191,18 +248,59 @@ static void dma_free_seg_table(unsigned long entry)
dma_free_cpu_table(sto);
}
-static void dma_cleanup_tables(unsigned long *table)
+static void dma_free_rt_table(unsigned long entry)
{
+ unsigned long *rto = get_rs_rto(entry);
int rtx;
- if (!table)
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(rto[rtx]))
+ dma_free_seg_table(rto[rtx]);
+
+ dma_free_cpu_table(rto);
+}
+
+static void dma_free_rs_table(unsigned long entry)
+{
+ unsigned long *rso = get_rf_rso(entry);
+ int rsx;
+
+ for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++)
+ if (reg_entry_isvalid(rso[rsx]))
+ dma_free_rt_table(rso[rsx]);
+
+ dma_free_cpu_table(rso);
+}
+
+static void dma_cleanup_tables(struct s390_domain *domain)
+{
+ int rtx, rsx, rfx;
+
+ if (!domain->dma_table)
return;
- for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
- if (reg_entry_isvalid(table[rtx]))
- dma_free_seg_table(table[rtx]);
+ switch (domain->origin_type) {
+ case ZPCI_TABLE_TYPE_RFX:
+ for (rfx = 0; rfx < ZPCI_TABLE_ENTRIES; rfx++)
+ if (reg_entry_isvalid(domain->dma_table[rfx]))
+ dma_free_rs_table(domain->dma_table[rfx]);
+ break;
+ case ZPCI_TABLE_TYPE_RSX:
+ for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++)
+ if (reg_entry_isvalid(domain->dma_table[rsx]))
+ dma_free_rt_table(domain->dma_table[rsx]);
+ break;
+ case ZPCI_TABLE_TYPE_RTX:
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(domain->dma_table[rtx]))
+ dma_free_seg_table(domain->dma_table[rtx]);
+ break;
+ default:
+ WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type);
+ return;
+ }
- dma_free_cpu_table(table);
+ dma_free_cpu_table(domain->dma_table);
}
static unsigned long *dma_alloc_page_table(gfp_t gfp)
@@ -218,6 +316,70 @@ static unsigned long *dma_alloc_page_table(gfp_t gfp)
return table;
}
+static unsigned long *dma_walk_rs_table(unsigned long *rso,
+ dma_addr_t dma_addr, gfp_t gfp)
+{
+ unsigned int rsx = calc_rsx(dma_addr);
+ unsigned long old_rse, rse;
+ unsigned long *rsep, *rto;
+
+ rsep = &rso[rsx];
+ rse = READ_ONCE(*rsep);
+ if (reg_entry_isvalid(rse)) {
+ rto = get_rs_rto(rse);
+ } else {
+ rto = dma_alloc_cpu_table(gfp);
+ if (!rto)
+ return NULL;
+
+ set_rs_rto(&rse, virt_to_phys(rto));
+ validate_rs_entry(&rse);
+ entry_clr_protected(&rse);
+
+ old_rse = cmpxchg(rsep, ZPCI_TABLE_INVALID, rse);
+ if (old_rse != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_cpu_table(rto);
+ rto = get_rs_rto(old_rse);
+ }
+ }
+ return rto;
+}
+
+static unsigned long *dma_walk_rf_table(unsigned long *rfo,
+ dma_addr_t dma_addr, gfp_t gfp)
+{
+ unsigned int rfx = calc_rfx(dma_addr);
+ unsigned long old_rfe, rfe;
+ unsigned long *rfep, *rso;
+
+ rfep = &rfo[rfx];
+ rfe = READ_ONCE(*rfep);
+ if (reg_entry_isvalid(rfe)) {
+ rso = get_rf_rso(rfe);
+ } else {
+ rso = dma_alloc_cpu_table(gfp);
+ if (!rso)
+ return NULL;
+
+ set_rf_rso(&rfe, virt_to_phys(rso));
+ validate_rf_entry(&rfe);
+ entry_clr_protected(&rfe);
+
+ old_rfe = cmpxchg(rfep, ZPCI_TABLE_INVALID, rfe);
+ if (old_rfe != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_cpu_table(rso);
+ rso = get_rf_rso(old_rfe);
+ }
+ }
+
+ if (!rso)
+ return NULL;
+
+ return dma_walk_rs_table(rso, dma_addr, gfp);
+}
+
static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
{
unsigned long old_rte, rte;
@@ -271,11 +433,31 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
return pto;
}
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+static unsigned long *dma_walk_region_tables(struct s390_domain *domain,
+ dma_addr_t dma_addr, gfp_t gfp)
+{
+ switch (domain->origin_type) {
+ case ZPCI_TABLE_TYPE_RFX:
+ return dma_walk_rf_table(domain->dma_table, dma_addr, gfp);
+ case ZPCI_TABLE_TYPE_RSX:
+ return dma_walk_rs_table(domain->dma_table, dma_addr, gfp);
+ case ZPCI_TABLE_TYPE_RTX:
+ return domain->dma_table;
+ default:
+ return NULL;
+ }
+}
+
+static unsigned long *dma_walk_cpu_trans(struct s390_domain *domain,
+ dma_addr_t dma_addr, gfp_t gfp)
{
- unsigned long *sto, *pto;
+ unsigned long *rto, *sto, *pto;
unsigned int rtx, sx, px;
+ rto = dma_walk_region_tables(domain, dma_addr, gfp);
+ if (!rto)
+ return NULL;
+
rtx = calc_rtx(dma_addr);
sto = dma_get_seg_table_origin(&rto[rtx], gfp);
if (!sto)
@@ -329,9 +511,25 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
}
}
+static inline u64 max_tbl_size(struct s390_domain *domain)
+{
+ switch (domain->origin_type) {
+ case ZPCI_TABLE_TYPE_RTX:
+ return ZPCI_TABLE_SIZE_RT - 1;
+ case ZPCI_TABLE_TYPE_RSX:
+ return ZPCI_TABLE_SIZE_RS - 1;
+ case ZPCI_TABLE_TYPE_RFX:
+ return U64_MAX;
+ default:
+ return 0;
+ }
+}
+
static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain *s390_domain;
+ u64 aperture_size;
s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
if (!s390_domain)
@@ -342,9 +540,26 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
kfree(s390_domain);
return NULL;
}
+
+ aperture_size = min(s390_iommu_aperture,
+ zdev->end_dma - zdev->start_dma + 1);
+ if (aperture_size <= (ZPCI_TABLE_SIZE_RT - zdev->start_dma)) {
+ s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
+ } else if (aperture_size <= (ZPCI_TABLE_SIZE_RS - zdev->start_dma) &&
+ (zdev->dtsm & ZPCI_IOTA_DT_RS)) {
+ s390_domain->origin_type = ZPCI_TABLE_TYPE_RSX;
+ } else if (zdev->dtsm & ZPCI_IOTA_DT_RF) {
+ s390_domain->origin_type = ZPCI_TABLE_TYPE_RFX;
+ } else {
+ /* Assume RTX available */
+ s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
+ aperture_size = ZPCI_TABLE_SIZE_RT - zdev->start_dma;
+ }
+ zdev->end_dma = zdev->start_dma + aperture_size - 1;
+
s390_domain->domain.geometry.force_aperture = true;
s390_domain->domain.geometry.aperture_start = 0;
- s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
+ s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain);
spin_lock_init(&s390_domain->list_lock);
INIT_LIST_HEAD_RCU(&s390_domain->devices);
@@ -356,7 +571,7 @@ static void s390_iommu_rcu_free_domain(struct rcu_head *head)
{
struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
- dma_cleanup_tables(s390_domain->dma_table);
+ dma_cleanup_tables(s390_domain);
kfree(s390_domain);
}
@@ -381,6 +596,21 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev,
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
+static u64 get_iota_region_flag(struct s390_domain *domain)
+{
+ switch (domain->origin_type) {
+ case ZPCI_TABLE_TYPE_RTX:
+ return ZPCI_IOTA_RTTO_FLAG;
+ case ZPCI_TABLE_TYPE_RSX:
+ return ZPCI_IOTA_RSTO_FLAG;
+ case ZPCI_TABLE_TYPE_RFX:
+ return ZPCI_IOTA_RFTO_FLAG;
+ default:
+ WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type);
+ return 0;
+ }
+}
+
static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
struct iommu_domain *domain, u8 *status)
{
@@ -399,7 +629,7 @@ static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
default:
s390_domain = to_s390_domain(domain);
iota = virt_to_phys(s390_domain->dma_table) |
- ZPCI_IOTA_RTTO_FLAG;
+ get_iota_region_flag(s390_domain);
rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
zdev->end_dma, iota, status);
}
@@ -482,6 +712,8 @@ static void s390_iommu_get_resv_regions(struct device *dev,
{
struct zpci_dev *zdev = to_zpci_dev(dev);
struct iommu_resv_region *region;
+ u64 max_size, end_resv;
+ unsigned long flags;
if (zdev->start_dma) {
region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
@@ -491,10 +723,21 @@ static void s390_iommu_get_resv_regions(struct device *dev,
list_add_tail(&region->list, list);
}
- if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
- region = iommu_alloc_resv_region(zdev->end_dma + 1,
- ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
- 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+ if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
+ zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY) {
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
+ return;
+ }
+
+ max_size = max_tbl_size(to_s390_domain(zdev->s390_domain));
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
+ if (zdev->end_dma < max_size) {
+ end_resv = max_size - zdev->end_dma;
+ region = iommu_alloc_resv_region(zdev->end_dma + 1, end_resv,
+ 0, IOMMU_RESV_RESERVED,
+ GFP_KERNEL);
if (!region)
return;
list_add_tail(&region->list, list);
@@ -510,13 +753,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
zdev = to_zpci_dev(dev);
- if (zdev->start_dma > zdev->end_dma ||
- zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+ if (zdev->start_dma > zdev->end_dma)
return ERR_PTR(-EINVAL);
- if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
- zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
-
if (zdev->tlb_refresh)
dev->iommu->shadow_on_flush = 1;
@@ -606,8 +845,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
int rc;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
- gfp);
+ entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
if (unlikely(!entry)) {
rc = -ENOMEM;
goto undo_cpu_trans;
@@ -622,8 +860,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
undo_cpu_trans:
while (i-- > 0) {
dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(s390_domain->dma_table,
- dma_addr, gfp);
+ entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
if (!entry)
break;
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
@@ -640,8 +877,7 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
int rc = 0;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
- GFP_ATOMIC);
+ entry = dma_walk_cpu_trans(s390_domain, dma_addr, GFP_ATOMIC);
if (unlikely(!entry)) {
rc = -EINVAL;
break;
@@ -685,6 +921,51 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
return rc;
}
+static unsigned long *get_rso_from_iova(struct s390_domain *domain,
+ dma_addr_t iova)
+{
+ unsigned long *rfo;
+ unsigned long rfe;
+ unsigned int rfx;
+
+ switch (domain->origin_type) {
+ case ZPCI_TABLE_TYPE_RFX:
+ rfo = domain->dma_table;
+ rfx = calc_rfx(iova);
+ rfe = READ_ONCE(rfo[rfx]);
+ if (!reg_entry_isvalid(rfe))
+ return NULL;
+ return get_rf_rso(rfe);
+ case ZPCI_TABLE_TYPE_RSX:
+ return domain->dma_table;
+ default:
+ return NULL;
+ }
+}
+
+static unsigned long *get_rto_from_iova(struct s390_domain *domain,
+ dma_addr_t iova)
+{
+ unsigned long *rso;
+ unsigned long rse;
+ unsigned int rsx;
+
+ switch (domain->origin_type) {
+ case ZPCI_TABLE_TYPE_RFX:
+ case ZPCI_TABLE_TYPE_RSX:
+ rso = get_rso_from_iova(domain, iova);
+ rsx = calc_rsx(iova);
+ rse = READ_ONCE(rso[rsx]);
+ if (!reg_entry_isvalid(rse))
+ return NULL;
+ return get_rs_rto(rse);
+ case ZPCI_TABLE_TYPE_RTX:
+ return domain->dma_table;
+ default:
+ return NULL;
+ }
+}
+
static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
@@ -698,10 +979,13 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
iova > domain->geometry.aperture_end)
return 0;
+ rto = get_rto_from_iova(s390_domain, iova);
+ if (!rto)
+ return 0;
+
rtx = calc_rtx(iova);
sx = calc_sx(iova);
px = calc_px(iova);
- rto = s390_domain->dma_table;
rte = READ_ONCE(rto[rtx]);
if (reg_entry_isvalid(rte)) {
@@ -756,7 +1040,6 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
int zpci_init_iommu(struct zpci_dev *zdev)
{
- u64 aperture_size;
int rc = 0;
rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
@@ -774,12 +1057,6 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_sysfs;
- zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
- aperture_size = min3(s390_iommu_aperture,
- ZPCI_TABLE_SIZE_RT - zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
- zdev->end_dma = zdev->start_dma + aperture_size - 1;
-
return 0;
out_sysfs:
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 8d8f11854676..76c9620af4bb 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -690,8 +690,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev)
if (!sun50i_domain)
return NULL;
- sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
- get_order(DT_SIZE));
+ sun50i_domain->dt =
+ iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, DT_SIZE);
if (!sun50i_domain->dt)
goto err_free_domain;
@@ -713,7 +713,7 @@ static void sun50i_iommu_domain_free(struct iommu_domain *domain)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
- iommu_free_pages(sun50i_domain->dt, get_order(DT_SIZE));
+ iommu_free_pages(sun50i_domain->dt);
sun50i_domain->dt = NULL;
kfree(sun50i_domain);
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 69d353e1df84..61897d50162d 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -51,14 +51,17 @@ struct tegra_smmu {
struct iommu_device iommu; /* IOMMU Core code handle */
};
+struct tegra_pd;
+struct tegra_pt;
+
struct tegra_smmu_as {
struct iommu_domain domain;
struct tegra_smmu *smmu;
unsigned int use_count;
spinlock_t lock;
u32 *count;
- struct page **pts;
- struct page *pd;
+ struct tegra_pt **pts;
+ struct tegra_pd *pd;
dma_addr_t pd_dma;
unsigned id;
u32 attr;
@@ -155,6 +158,14 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
#define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
SMMU_PDE_NONSECURE)
+struct tegra_pd {
+ u32 val[SMMU_NUM_PDE];
+};
+
+struct tegra_pt {
+ u32 val[SMMU_NUM_PTE];
+};
+
static unsigned int iova_pd_index(unsigned long iova)
{
return (iova >> SMMU_PDE_SHIFT) & (SMMU_NUM_PDE - 1);
@@ -284,7 +295,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE;
- as->pd = __iommu_alloc_pages(GFP_KERNEL | __GFP_DMA, 0);
+ as->pd = iommu_alloc_pages_sz(GFP_KERNEL | __GFP_DMA, SMMU_SIZE_PD);
if (!as->pd) {
kfree(as);
return NULL;
@@ -292,7 +303,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
as->count = kcalloc(SMMU_NUM_PDE, sizeof(u32), GFP_KERNEL);
if (!as->count) {
- __iommu_free_pages(as->pd, 0);
+ iommu_free_pages(as->pd);
kfree(as);
return NULL;
}
@@ -300,7 +311,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL);
if (!as->pts) {
kfree(as->count);
- __iommu_free_pages(as->pd, 0);
+ iommu_free_pages(as->pd);
kfree(as);
return NULL;
}
@@ -417,8 +428,8 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
goto unlock;
}
- as->pd_dma = dma_map_page(smmu->dev, as->pd, 0, SMMU_SIZE_PD,
- DMA_TO_DEVICE);
+ as->pd_dma =
+ dma_map_single(smmu->dev, as->pd, SMMU_SIZE_PD, DMA_TO_DEVICE);
if (dma_mapping_error(smmu->dev, as->pd_dma)) {
err = -ENOMEM;
goto unlock;
@@ -450,7 +461,7 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
return 0;
err_unmap:
- dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
+ dma_unmap_single(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
unlock:
mutex_unlock(&smmu->lock);
@@ -469,7 +480,7 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
tegra_smmu_free_asid(smmu, as->id);
- dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
+ dma_unmap_single(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE);
as->smmu = NULL;
@@ -548,11 +559,11 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
{
unsigned int pd_index = iova_pd_index(iova);
struct tegra_smmu *smmu = as->smmu;
- u32 *pd = page_address(as->pd);
+ struct tegra_pd *pd = as->pd;
unsigned long offset = pd_index * sizeof(*pd);
/* Set the page directory entry first */
- pd[pd_index] = value;
+ pd->val[pd_index] = value;
/* The flush the page directory entry from caches */
dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset,
@@ -564,11 +575,9 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
smmu_flush(smmu);
}
-static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova)
+static u32 *tegra_smmu_pte_offset(struct tegra_pt *pt, unsigned long iova)
{
- u32 *pt = page_address(pt_page);
-
- return pt + iova_pt_index(iova);
+ return &pt->val[iova_pt_index(iova)];
}
static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
@@ -576,21 +585,19 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova,
{
unsigned int pd_index = iova_pd_index(iova);
struct tegra_smmu *smmu = as->smmu;
- struct page *pt_page;
- u32 *pd;
+ struct tegra_pt *pt;
- pt_page = as->pts[pd_index];
- if (!pt_page)
+ pt = as->pts[pd_index];
+ if (!pt)
return NULL;
- pd = page_address(as->pd);
- *dmap = smmu_pde_to_dma(smmu, pd[pd_index]);
+ *dmap = smmu_pde_to_dma(smmu, as->pd->val[pd_index]);
- return tegra_smmu_pte_offset(pt_page, iova);
+ return tegra_smmu_pte_offset(pt, iova);
}
static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
- dma_addr_t *dmap, struct page *page)
+ dma_addr_t *dmap, struct tegra_pt *pt)
{
unsigned int pde = iova_pd_index(iova);
struct tegra_smmu *smmu = as->smmu;
@@ -598,30 +605,28 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
if (!as->pts[pde]) {
dma_addr_t dma;
- dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT,
- DMA_TO_DEVICE);
+ dma = dma_map_single(smmu->dev, pt, SMMU_SIZE_PT,
+ DMA_TO_DEVICE);
if (dma_mapping_error(smmu->dev, dma)) {
- __iommu_free_pages(page, 0);
+ iommu_free_pages(pt);
return NULL;
}
if (!smmu_dma_addr_valid(smmu, dma)) {
- dma_unmap_page(smmu->dev, dma, SMMU_SIZE_PT,
- DMA_TO_DEVICE);
- __iommu_free_pages(page, 0);
+ dma_unmap_single(smmu->dev, dma, SMMU_SIZE_PT,
+ DMA_TO_DEVICE);
+ iommu_free_pages(pt);
return NULL;
}
- as->pts[pde] = page;
+ as->pts[pde] = pt;
tegra_smmu_set_pde(as, iova, SMMU_MK_PDE(dma, SMMU_PDE_ATTR |
SMMU_PDE_NEXT));
*dmap = dma;
} else {
- u32 *pd = page_address(as->pd);
-
- *dmap = smmu_pde_to_dma(smmu, pd[pde]);
+ *dmap = smmu_pde_to_dma(smmu, as->pd->val[pde]);
}
return tegra_smmu_pte_offset(as->pts[pde], iova);
@@ -637,7 +642,7 @@ static void tegra_smmu_pte_get_use(struct tegra_smmu_as *as, unsigned long iova)
static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
{
unsigned int pde = iova_pd_index(iova);
- struct page *page = as->pts[pde];
+ struct tegra_pt *pt = as->pts[pde];
/*
* When no entries in this page table are used anymore, return the
@@ -645,13 +650,13 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova)
*/
if (--as->count[pde] == 0) {
struct tegra_smmu *smmu = as->smmu;
- u32 *pd = page_address(as->pd);
- dma_addr_t pte_dma = smmu_pde_to_dma(smmu, pd[pde]);
+ dma_addr_t pte_dma = smmu_pde_to_dma(smmu, as->pd->val[pde]);
tegra_smmu_set_pde(as, iova, 0);
- dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE);
- __iommu_free_pages(page, 0);
+ dma_unmap_single(smmu->dev, pte_dma, SMMU_SIZE_PT,
+ DMA_TO_DEVICE);
+ iommu_free_pages(pt);
as->pts[pde] = NULL;
}
}
@@ -671,16 +676,16 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
smmu_flush(smmu);
}
-static struct page *as_get_pde_page(struct tegra_smmu_as *as,
- unsigned long iova, gfp_t gfp,
- unsigned long *flags)
+static struct tegra_pt *as_get_pde_page(struct tegra_smmu_as *as,
+ unsigned long iova, gfp_t gfp,
+ unsigned long *flags)
{
unsigned int pde = iova_pd_index(iova);
- struct page *page = as->pts[pde];
+ struct tegra_pt *pt = as->pts[pde];
/* at first check whether allocation needs to be done at all */
- if (page)
- return page;
+ if (pt)
+ return pt;
/*
* In order to prevent exhaustion of the atomic memory pool, we
@@ -690,7 +695,7 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as,
if (gfpflags_allow_blocking(gfp))
spin_unlock_irqrestore(&as->lock, *flags);
- page = __iommu_alloc_pages(gfp | __GFP_DMA, 0);
+ pt = iommu_alloc_pages_sz(gfp | __GFP_DMA, SMMU_SIZE_PT);
if (gfpflags_allow_blocking(gfp))
spin_lock_irqsave(&as->lock, *flags);
@@ -701,13 +706,13 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as,
* if allocation succeeded and the allocation failure isn't fatal.
*/
if (as->pts[pde]) {
- if (page)
- __iommu_free_pages(page, 0);
+ if (pt)
+ iommu_free_pages(pt);
- page = as->pts[pde];
+ pt = as->pts[pde];
}
- return page;
+ return pt;
}
static int
@@ -717,15 +722,15 @@ __tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
{
struct tegra_smmu_as *as = to_smmu_as(domain);
dma_addr_t pte_dma;
- struct page *page;
+ struct tegra_pt *pt;
u32 pte_attrs;
u32 *pte;
- page = as_get_pde_page(as, iova, gfp, flags);
- if (!page)
+ pt = as_get_pde_page(as, iova, gfp, flags);
+ if (!pt)
return -ENOMEM;
- pte = as_get_pte(as, iova, &pte_dma, page);
+ pte = as_get_pte(as, iova, &pte_dma, pt);
if (!pte)
return -ENOMEM;
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index b85ce6310ddb..ecd41fb03e5a 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -48,6 +48,7 @@ struct viommu_dev {
u64 pgsize_bitmap;
u32 first_domain;
u32 last_domain;
+ u32 identity_domain_id;
/* Supported MAP flags */
u32 map_flags;
u32 probe_size;
@@ -62,7 +63,6 @@ struct viommu_mapping {
struct viommu_domain {
struct iommu_domain domain;
struct viommu_dev *viommu;
- struct mutex mutex; /* protects viommu pointer */
unsigned int id;
u32 map_flags;
@@ -70,7 +70,6 @@ struct viommu_domain {
struct rb_root_cached mappings;
unsigned long nr_endpoints;
- bool bypass;
};
struct viommu_endpoint {
@@ -97,6 +96,8 @@ struct viommu_event {
};
};
+static struct viommu_domain viommu_identity_domain;
+
#define to_viommu_domain(domain) \
container_of(domain, struct viommu_domain, domain)
@@ -305,6 +306,22 @@ out_unlock:
return ret;
}
+static int viommu_send_attach_req(struct viommu_dev *viommu, struct device *dev,
+ struct virtio_iommu_req_attach *req)
+{
+ int ret;
+ unsigned int i;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ req->endpoint = cpu_to_le32(fwspec->ids[i]);
+ ret = viommu_send_req_sync(viommu, req, sizeof(*req));
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
/*
* viommu_add_mapping - add a mapping to the internal tree
*
@@ -637,71 +654,45 @@ static void viommu_event_handler(struct virtqueue *vq)
/* IOMMU API */
-static struct iommu_domain *viommu_domain_alloc(unsigned type)
+static struct iommu_domain *viommu_domain_alloc_paging(struct device *dev)
{
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
+ struct viommu_dev *viommu = vdev->viommu;
+ unsigned long viommu_page_size;
struct viommu_domain *vdomain;
-
- if (type != IOMMU_DOMAIN_UNMANAGED &&
- type != IOMMU_DOMAIN_DMA &&
- type != IOMMU_DOMAIN_IDENTITY)
- return NULL;
-
- vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
- if (!vdomain)
- return NULL;
-
- mutex_init(&vdomain->mutex);
- spin_lock_init(&vdomain->mappings_lock);
- vdomain->mappings = RB_ROOT_CACHED;
-
- return &vdomain->domain;
-}
-
-static int viommu_domain_finalise(struct viommu_endpoint *vdev,
- struct iommu_domain *domain)
-{
int ret;
- unsigned long viommu_page_size;
- struct viommu_dev *viommu = vdev->viommu;
- struct viommu_domain *vdomain = to_viommu_domain(domain);
viommu_page_size = 1UL << __ffs(viommu->pgsize_bitmap);
if (viommu_page_size > PAGE_SIZE) {
dev_err(vdev->dev,
"granule 0x%lx larger than system page size 0x%lx\n",
viommu_page_size, PAGE_SIZE);
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
}
- ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
- viommu->last_domain, GFP_KERNEL);
- if (ret < 0)
- return ret;
+ vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
+ if (!vdomain)
+ return ERR_PTR(-ENOMEM);
- vdomain->id = (unsigned int)ret;
+ spin_lock_init(&vdomain->mappings_lock);
+ vdomain->mappings = RB_ROOT_CACHED;
- domain->pgsize_bitmap = viommu->pgsize_bitmap;
- domain->geometry = viommu->geometry;
+ ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
+ viommu->last_domain, GFP_KERNEL);
+ if (ret < 0) {
+ kfree(vdomain);
+ return ERR_PTR(ret);
+ }
- vdomain->map_flags = viommu->map_flags;
- vdomain->viommu = viommu;
+ vdomain->id = (unsigned int)ret;
- if (domain->type == IOMMU_DOMAIN_IDENTITY) {
- if (virtio_has_feature(viommu->vdev,
- VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
- vdomain->bypass = true;
- return 0;
- }
+ vdomain->domain.pgsize_bitmap = viommu->pgsize_bitmap;
+ vdomain->domain.geometry = viommu->geometry;
- ret = viommu_domain_map_identity(vdev, vdomain);
- if (ret) {
- ida_free(&viommu->domain_ids, vdomain->id);
- vdomain->viommu = NULL;
- return ret;
- }
- }
+ vdomain->map_flags = viommu->map_flags;
+ vdomain->viommu = viommu;
- return 0;
+ return &vdomain->domain;
}
static void viommu_domain_free(struct iommu_domain *domain)
@@ -717,29 +708,37 @@ static void viommu_domain_free(struct iommu_domain *domain)
kfree(vdomain);
}
+static struct iommu_domain *viommu_domain_alloc_identity(struct device *dev)
+{
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
+ struct iommu_domain *domain;
+ int ret;
+
+ if (virtio_has_feature(vdev->viommu->vdev,
+ VIRTIO_IOMMU_F_BYPASS_CONFIG))
+ return &viommu_identity_domain.domain;
+
+ domain = viommu_domain_alloc_paging(dev);
+ if (IS_ERR(domain))
+ return domain;
+
+ ret = viommu_domain_map_identity(vdev, to_viommu_domain(domain));
+ if (ret) {
+ viommu_domain_free(domain);
+ return ERR_PTR(ret);
+ }
+ return domain;
+}
+
static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
- int i;
int ret = 0;
struct virtio_iommu_req_attach req;
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
struct viommu_domain *vdomain = to_viommu_domain(domain);
- mutex_lock(&vdomain->mutex);
- if (!vdomain->viommu) {
- /*
- * Properly initialize the domain now that we know which viommu
- * owns it.
- */
- ret = viommu_domain_finalise(vdev, domain);
- } else if (vdomain->viommu != vdev->viommu) {
- ret = -EINVAL;
- }
- mutex_unlock(&vdomain->mutex);
-
- if (ret)
- return ret;
+ if (vdomain->viommu != vdev->viommu)
+ return -EINVAL;
/*
* In the virtio-iommu device, when attaching the endpoint to a new
@@ -761,16 +760,9 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
.domain = cpu_to_le32(vdomain->id),
};
- if (vdomain->bypass)
- req.flags |= cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS);
-
- for (i = 0; i < fwspec->num_ids; i++) {
- req.endpoint = cpu_to_le32(fwspec->ids[i]);
-
- ret = viommu_send_req_sync(vdomain->viommu, &req, sizeof(req));
- if (ret)
- return ret;
- }
+ ret = viommu_send_attach_req(vdomain->viommu, dev, &req);
+ if (ret)
+ return ret;
if (!vdomain->nr_endpoints) {
/*
@@ -788,6 +780,40 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
return 0;
}
+static int viommu_attach_identity_domain(struct iommu_domain *domain,
+ struct device *dev)
+{
+ int ret = 0;
+ struct virtio_iommu_req_attach req;
+ struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ req = (struct virtio_iommu_req_attach) {
+ .head.type = VIRTIO_IOMMU_T_ATTACH,
+ .domain = cpu_to_le32(vdev->viommu->identity_domain_id),
+ .flags = cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS),
+ };
+
+ ret = viommu_send_attach_req(vdev->viommu, dev, &req);
+ if (ret)
+ return ret;
+
+ if (vdev->vdomain)
+ vdev->vdomain->nr_endpoints--;
+ vdomain->nr_endpoints++;
+ vdev->vdomain = vdomain;
+ return 0;
+}
+
+static struct viommu_domain viommu_identity_domain = {
+ .domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = viommu_attach_identity_domain,
+ },
+ },
+};
+
static void viommu_detach_dev(struct viommu_endpoint *vdev)
{
int i;
@@ -1062,7 +1088,8 @@ static bool viommu_capable(struct device *dev, enum iommu_cap cap)
static struct iommu_ops viommu_ops = {
.capable = viommu_capable,
- .domain_alloc = viommu_domain_alloc,
+ .domain_alloc_identity = viommu_domain_alloc_identity,
+ .domain_alloc_paging = viommu_domain_alloc_paging,
.probe_device = viommu_probe_device,
.release_device = viommu_release_device,
.device_group = viommu_device_group,
@@ -1184,6 +1211,12 @@ static int viommu_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO))
viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO;
+ /* Reserve an ID to use as the bypass domain */
+ if (virtio_has_feature(viommu->vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
+ viommu->identity_domain_id = viommu->first_domain;
+ viommu->first_domain++;
+ }
+
viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
virtio_device_ready(vdev);