summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst3
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml23
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,iommu.yaml1
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml1
-rw-r--r--drivers/iommu/Kconfig12
-rw-r--r--drivers/iommu/amd/amd_iommu.h9
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h41
-rw-r--r--drivers/iommu/amd/init.c253
-rw-r--r--drivers/iommu/amd/iommu.c532
-rw-r--r--drivers/iommu/amd/pasid.c3
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c15
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c298
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h31
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c8
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-impl.c5
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c2
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c121
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h3
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c43
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.h2
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/cache.c11
-rw-r--r--drivers/iommu/intel/cap_audit.c217
-rw-r--r--drivers/iommu/intel/cap_audit.h131
-rw-r--r--drivers/iommu/intel/iommu.c47
-rw-r--r--drivers/iommu/intel/irq_remapping.c8
-rw-r--r--drivers/iommu/intel/pasid.c22
-rw-r--r--drivers/iommu/intel/pasid.h6
-rw-r--r--drivers/iommu/io-pgtable-arm.c227
-rw-r--r--drivers/iommu/iommu.c37
-rw-r--r--drivers/iommu/msm_iommu.c51
-rw-r--r--drivers/iommu/mtk_iommu.c9
-rw-r--r--drivers/iommu/mtk_iommu_v1.c3
-rw-r--r--drivers/iommu/of_iommu.c2
-rw-r--r--drivers/iommu/riscv/iommu-pci.c8
-rw-r--r--drivers/iommu/riscv/iommu-platform.c108
-rw-r--r--drivers/iommu/riscv/iommu.c14
-rw-r--r--drivers/iommu/riscv/iommu.h1
-rw-r--r--drivers/iommu/rockchip-iommu.c3
-rw-r--r--include/linux/adreno-smmu-priv.h7
-rw-r--r--include/linux/amd-iommu.h4
-rw-r--r--include/linux/io-pgtable.h11
-rw-r--r--include/linux/iommu.h5
43 files changed, 1304 insertions, 1036 deletions
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index b42fea07c5ce..f074f6219f5c 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -198,7 +198,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 |
+----------------+-----------------+-----------------+-----------------------------+
-| ARM | MMU-500 | #841119,826419 | N/A |
+| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
+| | | #562869,1047329 | |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-600 | #1076982,1209401| N/A |
+----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index c1e11bc6b7a0..032fdc27127b 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -61,6 +61,7 @@ properties:
- qcom,sm8450-smmu-500
- qcom,sm8550-smmu-500
- qcom,sm8650-smmu-500
+ - qcom,sm8750-smmu-500
- qcom,x1e80100-smmu-500
- const: qcom,smmu-500
- const: arm,mmu-500
@@ -88,6 +89,7 @@ properties:
items:
- enum:
- qcom,qcm2290-smmu-500
+ - qcom,qcs615-smmu-500
- qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sar2130p-smmu-500
@@ -102,6 +104,7 @@ properties:
- qcom,sm8450-smmu-500
- qcom,sm8550-smmu-500
- qcom,sm8650-smmu-500
+ - qcom,sm8750-smmu-500
- qcom,x1e80100-smmu-500
- const: qcom,adreno-smmu
- const: qcom,smmu-500
@@ -122,6 +125,7 @@ properties:
- qcom,msm8996-smmu-v2
- qcom,sc7180-smmu-v2
- qcom,sdm630-smmu-v2
+ - qcom,sdm670-smmu-v2
- qcom,sdm845-smmu-v2
- qcom,sm6350-smmu-v2
- qcom,sm7150-smmu-v2
@@ -474,6 +478,7 @@ allOf:
items:
- enum:
- qcom,qcm2290-smmu-500
+ - qcom,qcs615-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- const: qcom,adreno-smmu
@@ -550,6 +555,23 @@ allOf:
- description: GPU SNoC bus clock
- description: GPU AHB clock
+ - if:
+ properties:
+ compatible:
+ items:
+ - const: qcom,sm8750-smmu-500
+ - const: qcom,adreno-smmu
+ - const: qcom,smmu-500
+ - const: arm,mmu-500
+ then:
+ properties:
+ clock-names:
+ items:
+ - const: hlos
+ clocks:
+ items:
+ - description: HLOS vote clock
+
# Disallow clocks for all other platforms with specific compatibles
- if:
properties:
@@ -559,7 +581,6 @@ allOf:
- cavium,smmu-v2
- marvell,ap806-smmu-500
- nvidia,smmu-500
- - qcom,qcs615-smmu-500
- qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500
- qcom,sa8255p-smmu-500
diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml
index f8cebc9e8cd9..5ae9a628261f 100644
--- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml
@@ -21,6 +21,7 @@ properties:
- items:
- enum:
- qcom,msm8916-iommu
+ - qcom,msm8917-iommu
- qcom,msm8953-iommu
- const: qcom,msm-iommu-v1
- items:
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
index 621dde0e45d8..6ce41d11ff5e 100644
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
@@ -25,6 +25,7 @@ properties:
- rockchip,rk3568-iommu
- items:
- enum:
+ - rockchip,rk3576-iommu
- rockchip,rk3588-iommu
- const: rockchip,rk3568-iommu
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 47c46e4b739e..ec1b5e32b972 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -367,6 +367,18 @@ config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
'arm-smmu.disable_bypass' will continue to override this
config.
+config ARM_SMMU_MMU_500_CPRE_ERRATA
+ bool "Enable errata workaround for CPRE in SMMU reset path"
+ depends on ARM_SMMU
+ default y
+ help
+ Say Y here (by default) to apply workaround to disable
+ MMU-500's next-page prefetcher for sake of 4 known errata.
+
+ Say N here only when it is sure that any errata related to
+ prefetch enablement are not applicable on the platform.
+ Refer silicon-errata.rst for info on errata IDs.
+
config ARM_SMMU_QCOM
def_tristate y
depends on ARM_SMMU && ARCH_QCOM
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 1bef5d55b2f9..68debf5ee2d7 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -16,7 +16,6 @@ irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_galog(int irq, void *data);
irqreturn_t amd_iommu_int_handler(int irq, void *data);
-void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
u8 cntrl_intr, u8 cntrl_log,
u32 status_run_mask, u32 status_overflow_mask);
@@ -41,13 +40,13 @@ void amd_iommu_disable(void);
int amd_iommu_reenable(int mode);
int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir;
-extern enum io_pgtable_fmt amd_iommu_pgtable;
+extern enum protection_domain_mode amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
-struct protection_domain *protection_domain_alloc(unsigned int type, int nid);
+struct protection_domain *protection_domain_alloc(void);
void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
@@ -89,7 +88,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
*/
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
-void amd_iommu_domain_update(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size);
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
@@ -184,3 +182,6 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
#endif
+
+struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid);
+struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index fdb0357e0bb9..0bbda60d3cdc 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -220,6 +220,8 @@
#define DEV_ENTRY_EX 0x67
#define DEV_ENTRY_SYSMGT1 0x68
#define DEV_ENTRY_SYSMGT2 0x69
+#define DTE_DATA1_SYSMGT_MASK GENMASK_ULL(41, 40)
+
#define DEV_ENTRY_IRQ_TBL_EN 0x80
#define DEV_ENTRY_INIT_PASS 0xb8
#define DEV_ENTRY_EINT_PASS 0xb9
@@ -407,8 +409,7 @@
#define DTE_FLAG_HAD (3ULL << 7)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
-#define DTE_GLX_SHIFT (56)
-#define DTE_GLX_MASK (3)
+#define DTE_GLX GENMASK_ULL(57, 56)
#define DTE_FLAG_IR BIT_ULL(61)
#define DTE_FLAG_IW BIT_ULL(62)
@@ -416,18 +417,18 @@
#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DEV_DOMID_MASK 0xffffULL
-#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
-#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
-#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL)
-
-#define DTE_GCR3_SHIFT_A 58
-#define DTE_GCR3_SHIFT_B 16
-#define DTE_GCR3_SHIFT_C 43
+#define DTE_GCR3_14_12 GENMASK_ULL(60, 58)
+#define DTE_GCR3_30_15 GENMASK_ULL(31, 16)
+#define DTE_GCR3_51_31 GENMASK_ULL(63, 43)
#define DTE_GPT_LEVEL_SHIFT 54
+#define DTE_GPT_LEVEL_MASK GENMASK_ULL(55, 54)
#define GCR3_VALID 0x01ULL
+/* DTE[128:179] | DTE[184:191] */
+#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52)
+
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
@@ -468,7 +469,7 @@ extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
if (amd_iommu_dump) \
- pr_info("AMD-Vi: " format, ## arg); \
+ pr_info(format, ## arg); \
} while(0);
/* global flag if IOMMUs cache non-present entries */
@@ -516,6 +517,9 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \
list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list)
+#define for_each_ivhd_dte_flags(entry) \
+ list_for_each_entry((entry), &amd_ivhd_dev_flags_list, list)
+
struct amd_iommu;
struct iommu_domain;
struct irq_domain;
@@ -837,6 +841,7 @@ struct devid_map {
struct iommu_dev_data {
/*Protect against attach/detach races */
struct mutex mutex;
+ spinlock_t dte_lock; /* DTE lock for 256-bit access */
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
@@ -881,7 +886,21 @@ extern struct list_head amd_iommu_list;
* Structure defining one entry in the device table
*/
struct dev_table_entry {
- u64 data[4];
+ union {
+ u64 data[4];
+ u128 data128[2];
+ };
+};
+
+/*
+ * Structure to sture persistent DTE flags from IVHD
+ */
+struct ivhd_dte_flags {
+ struct list_head list;
+ u16 segid;
+ u16 devid_first;
+ u16 devid_last;
+ struct dev_table_entry dte;
};
/*
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 614f216215ea..c5cd92edada0 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -152,7 +152,7 @@ struct ivmd_header {
bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
-enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
+enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
/* Guest page table level */
int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
@@ -174,8 +174,8 @@ bool amd_iommu_snp_en;
EXPORT_SYMBOL(amd_iommu_snp_en);
LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
-LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
- system */
+LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */
+LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
@@ -984,36 +984,12 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
}
/* sets a specific bit in the device table entry. */
-static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
- u16 devid, u8 bit)
+static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
{
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
- dev_table[devid].data[i] |= (1UL << _bit);
-}
-
-static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
-{
- struct dev_table_entry *dev_table = get_dev_table(iommu);
-
- return __set_dev_entry_bit(dev_table, devid, bit);
-}
-
-static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
- u16 devid, u8 bit)
-{
- int i = (bit >> 6) & 0x03;
- int _bit = bit & 0x3f;
-
- return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
-}
-
-static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
-{
- struct dev_table_entry *dev_table = get_dev_table(iommu);
-
- return __get_dev_entry_bit(dev_table, devid, bit);
+ dte->data[i] |= (1UL << _bit);
}
static bool __copy_device_table(struct amd_iommu *iommu)
@@ -1081,11 +1057,9 @@ static bool __copy_device_table(struct amd_iommu *iommu)
}
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
- tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
- tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31);
pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
- tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
- tmp |= DTE_FLAG_GV;
+ tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV);
pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
}
}
@@ -1136,42 +1110,107 @@ static bool copy_device_table(void)
return true;
}
-void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
+struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid)
{
- int sysmgt;
+ struct ivhd_dte_flags *e;
+ unsigned int best_len = UINT_MAX;
+ struct dev_table_entry *dte = NULL;
- sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
- (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
+ for_each_ivhd_dte_flags(e) {
+ /*
+ * Need to go through the whole list to find the smallest range,
+ * which contains the devid.
+ */
+ if ((e->segid == segid) &&
+ (e->devid_first <= devid) && (devid <= e->devid_last)) {
+ unsigned int len = e->devid_last - e->devid_first;
+
+ if (len < best_len) {
+ dte = &(e->dte);
+ best_len = len;
+ }
+ }
+ }
+ return dte;
+}
+
+static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last)
+{
+ struct ivhd_dte_flags *e;
- if (sysmgt == 0x01)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
+ for_each_ivhd_dte_flags(e) {
+ if ((e->segid == segid) &&
+ (e->devid_first == first) &&
+ (e->devid_last == last))
+ return true;
+ }
+ return false;
}
/*
* This function takes the device specific flags read from the ACPI
* table and sets up the device table entry with that information
*/
-static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
- u16 devid, u32 flags, u32 ext_flags)
+static void __init
+set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
+ u32 flags, u32 ext_flags)
{
- if (flags & ACPI_DEVFLAG_INITPASS)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
- if (flags & ACPI_DEVFLAG_EXTINT)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
- if (flags & ACPI_DEVFLAG_NMI)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
- if (flags & ACPI_DEVFLAG_SYSMGT1)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
- if (flags & ACPI_DEVFLAG_SYSMGT2)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
- if (flags & ACPI_DEVFLAG_LINT0)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
- if (flags & ACPI_DEVFLAG_LINT1)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
+ int i;
+ struct dev_table_entry dte = {};
- amd_iommu_apply_erratum_63(iommu, devid);
+ /* Parse IVHD DTE setting flags and store information */
+ if (flags) {
+ struct ivhd_dte_flags *d;
- amd_iommu_set_rlookup_table(iommu, devid);
+ if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last))
+ return;
+
+ d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL);
+ if (!d)
+ return;
+
+ pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
+
+ if (flags & ACPI_DEVFLAG_INITPASS)
+ set_dte_bit(&dte, DEV_ENTRY_INIT_PASS);
+ if (flags & ACPI_DEVFLAG_EXTINT)
+ set_dte_bit(&dte, DEV_ENTRY_EINT_PASS);
+ if (flags & ACPI_DEVFLAG_NMI)
+ set_dte_bit(&dte, DEV_ENTRY_NMI_PASS);
+ if (flags & ACPI_DEVFLAG_SYSMGT1)
+ set_dte_bit(&dte, DEV_ENTRY_SYSMGT1);
+ if (flags & ACPI_DEVFLAG_SYSMGT2)
+ set_dte_bit(&dte, DEV_ENTRY_SYSMGT2);
+ if (flags & ACPI_DEVFLAG_LINT0)
+ set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS);
+ if (flags & ACPI_DEVFLAG_LINT1)
+ set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
+
+ /* Apply erratum 63, which needs info in initial_dte */
+ if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
+ dte.data[0] |= DTE_FLAG_IW;
+
+ memcpy(&d->dte, &dte, sizeof(dte));
+ d->segid = iommu->pci_seg->id;
+ d->devid_first = first;
+ d->devid_last = last;
+ list_add_tail(&d->list, &amd_ivhd_dev_flags_list);
+ }
+
+ for (i = first; i <= last; i++) {
+ if (flags) {
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+
+ memcpy(&dev_table[i], &dte, sizeof(dte));
+ }
+ amd_iommu_set_rlookup_table(iommu, i);
+ }
+}
+
+static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+ u16 devid, u32 flags, u32 ext_flags)
+{
+ set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags);
}
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
@@ -1239,7 +1278,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
entry->cmd_line = cmd_line;
entry->root_devid = (entry->devid & (~0x7));
- pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
+ pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n",
entry->cmd_line ? "cmd" : "ivrs",
entry->hid, entry->uid, entry->root_devid);
@@ -1331,15 +1370,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
switch (e->type) {
case IVHD_DEV_ALL:
- DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags);
-
- for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
- set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
+ DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags);
+ set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0);
break;
case IVHD_DEV_SELECT:
- DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
- "flags: %02x\n",
+ DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1350,8 +1386,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_SELECT_RANGE_START:
- DUMP_printk(" DEV_SELECT_RANGE_START\t "
- "devid: %04x:%02x:%02x.%x flags: %02x\n",
+ DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1364,8 +1399,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS:
- DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
- "flags: %02x devid_to: %02x:%02x.%x\n",
+ DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1382,9 +1416,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS_RANGE:
- DUMP_printk(" DEV_ALIAS_RANGE\t\t "
- "devid: %04x:%02x:%02x.%x flags: %02x "
- "devid_to: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1401,8 +1433,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT:
- DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
- "flags: %02x ext: %08x\n",
+ DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1414,8 +1445,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT_RANGE:
- DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
- "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
+ DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1428,21 +1458,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_RANGE_END:
- DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid));
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
- if (alias) {
+ if (alias)
pci_seg->alias_table[dev_i] = devid_to;
- set_dev_entry_from_acpi(iommu,
- devid_to, flags, ext_flags);
- }
- set_dev_entry_from_acpi(iommu, dev_i,
- flags, ext_flags);
}
+ set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
+ set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
break;
case IVHD_DEV_SPECIAL: {
u8 handle, type;
@@ -1461,11 +1488,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
else
var = "UNKNOWN";
- DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
var, (int)handle,
seg_id, PCI_BUS_NUM(devid),
PCI_SLOT(devid),
- PCI_FUNC(devid));
+ PCI_FUNC(devid),
+ e->flags);
ret = add_special_device(type, handle, &devid, false);
if (ret)
@@ -1525,11 +1553,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
}
devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
- DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
hid, uid, seg_id,
PCI_BUS_NUM(devid),
PCI_SLOT(devid),
- PCI_FUNC(devid));
+ PCI_FUNC(devid),
+ e->flags);
flags = e->flags;
@@ -1757,13 +1786,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
- /*
- * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
- * GAM also requires GA mode. Therefore, we need to
- * check cmpxchg16b support before enabling it.
- */
- if (!boot_cpu_has(X86_FEATURE_CX16) ||
- ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
+ /* GAM requires GA mode. */
+ if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
case 0x11:
@@ -1773,13 +1797,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
- /*
- * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
- * XT, GAM also requires GA mode. Therefore, we need to
- * check cmpxchg16b support before enabling them.
- */
- if (!boot_cpu_has(X86_FEATURE_CX16) ||
- ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
+ /* XT and GAM require GA mode. */
+ if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
}
@@ -2145,7 +2164,7 @@ static void print_iommu_info(void)
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
pr_info("X2APIC enabled\n");
}
- if (amd_iommu_pgtable == AMD_IOMMU_V2) {
+ if (amd_iommu_pgtable == PD_MODE_V2) {
pr_info("V2 page table enabled (Paging mode : %d level)\n",
amd_iommu_gpt_level);
}
@@ -2575,9 +2594,9 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
return;
for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
- __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
+ set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID);
if (!amd_iommu_snp_en)
- __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
+ set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION);
}
}
@@ -2605,8 +2624,7 @@ static void init_device_table(void)
for_each_pci_segment(pci_seg) {
for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
- __set_dev_entry_bit(pci_seg->dev_table,
- devid, DEV_ENTRY_IRQ_TBL_EN);
+ set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN);
}
}
@@ -3033,6 +3051,11 @@ static int __init early_amd_iommu_init(void)
return -EINVAL;
}
+ if (!boot_cpu_has(X86_FEATURE_CX16)) {
+ pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
+ return -EINVAL;
+ }
+
/*
* Validate checksum here so we don't need to do it when
* we actually parse the table
@@ -3059,10 +3082,10 @@ static int __init early_amd_iommu_init(void)
FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
- if (amd_iommu_pgtable == AMD_IOMMU_V2) {
+ if (amd_iommu_pgtable == PD_MODE_V2) {
if (!amd_iommu_v2_pgtbl_supported()) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
+ amd_iommu_pgtable = PD_MODE_V1;
}
}
@@ -3185,7 +3208,7 @@ static void iommu_snp_enable(void)
goto disable_snp;
}
- if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ if (amd_iommu_pgtable != PD_MODE_V1) {
pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
goto disable_snp;
}
@@ -3398,25 +3421,23 @@ static bool amd_iommu_sme_check(void)
* IOMMUs
*
****************************************************************************/
-int __init amd_iommu_detect(void)
+void __init amd_iommu_detect(void)
{
int ret;
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
- return -ENODEV;
+ return;
if (!amd_iommu_sme_check())
- return -ENODEV;
+ return;
ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
if (ret)
- return ret;
+ return;
amd_iommu_detected = true;
iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
-
- return 1;
}
/****************************************************************************
@@ -3464,9 +3485,9 @@ static int __init parse_amd_iommu_options(char *str)
} else if (strncmp(str, "force_isolation", 15) == 0) {
amd_iommu_force_isolation = true;
} else if (strncmp(str, "pgtbl_v1", 8) == 0) {
- amd_iommu_pgtable = AMD_IOMMU_V1;
+ amd_iommu_pgtable = PD_MODE_V1;
} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
- amd_iommu_pgtable = AMD_IOMMU_V2;
+ amd_iommu_pgtable = PD_MODE_V2;
} else if (strncmp(str, "irtcachedis", 11) == 0) {
amd_iommu_irtcachedis = true;
} else if (strncmp(str, "nohugepages", 11) == 0) {
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 5aaeda77eef2..b48a72bd7b23 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -83,12 +83,142 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
+static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid);
+
+static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid);
+
/****************************************************************************
*
* Helper functions
*
****************************************************************************/
+static __always_inline void amd_iommu_atomic128_set(__int128 *ptr, __int128 val)
+{
+ /*
+ * Note:
+ * We use arch_cmpxchg128_local() because:
+ * - Need cmpxchg16b instruction mainly for 128-bit store to DTE
+ * (not necessary for cmpxchg since this function is already
+ * protected by a spin_lock for this DTE).
+ * - Neither need LOCK_PREFIX nor try loop because of the spin_lock.
+ */
+ arch_cmpxchg128_local(ptr, *ptr, val);
+}
+
+static void write_dte_upper128(struct dev_table_entry *ptr, struct dev_table_entry *new)
+{
+ struct dev_table_entry old;
+
+ old.data128[1] = ptr->data128[1];
+ /*
+ * Preserve DTE_DATA2_INTR_MASK. This needs to be
+ * done here since it requires to be inside
+ * spin_lock(&dev_data->dte_lock) context.
+ */
+ new->data[2] &= ~DTE_DATA2_INTR_MASK;
+ new->data[2] |= old.data[2] & DTE_DATA2_INTR_MASK;
+
+ amd_iommu_atomic128_set(&ptr->data128[1], new->data128[1]);
+}
+
+static void write_dte_lower128(struct dev_table_entry *ptr, struct dev_table_entry *new)
+{
+ amd_iommu_atomic128_set(&ptr->data128[0], new->data128[0]);
+}
+
+/*
+ * Note:
+ * IOMMU reads the entire Device Table entry in a single 256-bit transaction
+ * but the driver is programming DTE using 2 128-bit cmpxchg. So, the driver
+ * need to ensure the following:
+ * - DTE[V|GV] bit is being written last when setting.
+ * - DTE[V|GV] bit is being written first when clearing.
+ *
+ * This function is used only by code, which updates DMA translation part of the DTE.
+ * So, only consider control bits related to DMA when updating the entry.
+ */
+static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+ struct dev_table_entry *new)
+{
+ unsigned long flags;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct dev_table_entry *ptr = &dev_table[dev_data->devid];
+
+ spin_lock_irqsave(&dev_data->dte_lock, flags);
+
+ if (!(ptr->data[0] & DTE_FLAG_V)) {
+ /* Existing DTE is not valid. */
+ write_dte_upper128(ptr, new);
+ write_dte_lower128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (!(new->data[0] & DTE_FLAG_V)) {
+ /* Existing DTE is valid. New DTE is not valid. */
+ write_dte_lower128(ptr, new);
+ write_dte_upper128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (!FIELD_GET(DTE_FLAG_GV, ptr->data[0])) {
+ /*
+ * Both DTEs are valid.
+ * Existing DTE has no guest page table.
+ */
+ write_dte_upper128(ptr, new);
+ write_dte_lower128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (!FIELD_GET(DTE_FLAG_GV, new->data[0])) {
+ /*
+ * Both DTEs are valid.
+ * Existing DTE has guest page table,
+ * new DTE has no guest page table,
+ */
+ write_dte_lower128(ptr, new);
+ write_dte_upper128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (FIELD_GET(DTE_GPT_LEVEL_MASK, ptr->data[2]) !=
+ FIELD_GET(DTE_GPT_LEVEL_MASK, new->data[2])) {
+ /*
+ * Both DTEs are valid and have guest page table,
+ * but have different number of levels. So, we need
+ * to upadte both upper and lower 128-bit value, which
+ * require disabling and flushing.
+ */
+ struct dev_table_entry clear = {};
+
+ /* First disable DTE */
+ write_dte_lower128(ptr, &clear);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+
+ /* Then update DTE */
+ write_dte_upper128(ptr, new);
+ write_dte_lower128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else {
+ /*
+ * Both DTEs are valid and have guest page table,
+ * and same number of levels. We just need to only
+ * update the lower 128-bit. So no need to disable DTE.
+ */
+ write_dte_lower128(ptr, new);
+ }
+
+ spin_unlock_irqrestore(&dev_data->dte_lock, flags);
+}
+
+static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+ struct dev_table_entry *dte)
+{
+ unsigned long flags;
+ struct dev_table_entry *ptr;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+
+ ptr = &dev_table[dev_data->devid];
+
+ spin_lock_irqsave(&dev_data->dte_lock, flags);
+ dte->data128[0] = ptr->data128[0];
+ dte->data128[1] = ptr->data128[1];
+ spin_unlock_irqrestore(&dev_data->dte_lock, flags);
+}
+
static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom)
{
return (pdom && (pdom->pd_mode == PD_MODE_V2));
@@ -209,6 +339,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
return NULL;
mutex_init(&dev_data->mutex);
+ spin_lock_init(&dev_data->dte_lock);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -216,7 +347,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
return dev_data;
}
-static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
+struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
{
struct iommu_dev_data *dev_data;
struct llist_node *node;
@@ -236,9 +367,11 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid
static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
{
+ struct dev_table_entry new;
struct amd_iommu *iommu;
- struct dev_table_entry *dev_table;
+ struct iommu_dev_data *dev_data, *alias_data;
u16 devid = pci_dev_id(pdev);
+ int ret = 0;
if (devid == alias)
return 0;
@@ -247,13 +380,27 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
if (!iommu)
return 0;
- amd_iommu_set_rlookup_table(iommu, alias);
- dev_table = get_dev_table(iommu);
- memcpy(dev_table[alias].data,
- dev_table[devid].data,
- sizeof(dev_table[alias].data));
+ /* Copy the data from pdev */
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ if (!dev_data) {
+ pr_err("%s : Failed to get dev_data for 0x%x\n", __func__, devid);
+ ret = -EINVAL;
+ goto out;
+ }
+ get_dte256(iommu, dev_data, &new);
- return 0;
+ /* Setup alias */
+ alias_data = find_dev_data(iommu, alias);
+ if (!alias_data) {
+ pr_err("%s : Failed to get alias dev_data for 0x%x\n", __func__, alias);
+ ret = -EINVAL;
+ goto out;
+ }
+ update_dte256(iommu, alias_data, &new);
+
+ amd_iommu_set_rlookup_table(iommu, alias);
+out:
+ return ret;
}
static void clone_aliases(struct amd_iommu *iommu, struct device *dev)
@@ -526,6 +673,12 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
return -ENOMEM;
dev_data->dev = dev;
+
+ /*
+ * The dev_iommu_priv_set() needes to be called before setup_aliases.
+ * Otherwise, subsequent call to dev_iommu_priv_get() will fail.
+ */
+ dev_iommu_priv_set(dev, dev_data);
setup_aliases(iommu, dev);
/*
@@ -539,8 +692,6 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}
- dev_iommu_priv_set(dev, dev_data);
-
return 0;
}
@@ -571,10 +722,13 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
static void dump_dte_entry(struct amd_iommu *iommu, u16 devid)
{
int i;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct dev_table_entry dte;
+ struct iommu_dev_data *dev_data = find_dev_data(iommu, devid);
+
+ get_dte256(iommu, dev_data, &dte);
for (i = 0; i < 4; ++i)
- pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]);
+ pr_err("DTE[%d]: %016llx\n", i, dte.data[i]);
}
static void dump_command(unsigned long phys_addr)
@@ -1261,6 +1415,15 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
return iommu_queue_command(iommu, &cmd);
}
+static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid)
+{
+ int ret;
+
+ ret = iommu_flush_dte(iommu, devid);
+ if (!ret)
+ iommu_completion_wait(iommu);
+}
+
static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
{
u32 devid;
@@ -1603,15 +1766,6 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
domain_flush_complete(domain);
}
-void amd_iommu_domain_update(struct protection_domain *domain)
-{
- /* Update device table */
- amd_iommu_update_and_flush_device_table(domain);
-
- /* Flush domain TLB(s) and wait for completion */
- amd_iommu_domain_flush_all(domain);
-}
-
int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
{
struct iommu_dev_data *dev_data;
@@ -1826,90 +1980,109 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
return ret;
}
+static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr,
+ struct dev_table_entry *new)
+{
+ /* All existing DTE must have V bit set */
+ new->data128[0] = DTE_FLAG_V;
+ new->data128[1] = 0;
+}
+
+/*
+ * Note:
+ * The old value for GCR3 table and GPT have been cleared from caller.
+ */
+static void set_dte_gcr3_table(struct amd_iommu *iommu,
+ struct iommu_dev_data *dev_data,
+ struct dev_table_entry *target)
+{
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ u64 gcr3;
+
+ if (!gcr3_info->gcr3_tbl)
+ return;
+
+ pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
+ __func__, dev_data->devid, gcr3_info->glx,
+ (unsigned long long)gcr3_info->gcr3_tbl);
+
+ gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
+
+ target->data[0] |= DTE_FLAG_GV |
+ FIELD_PREP(DTE_GLX, gcr3_info->glx) |
+ FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
+ if (pdom_is_v2_pgtbl_mode(dev_data->domain))
+ target->data[0] |= DTE_FLAG_GIOV;
+
+ target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
+ FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
+
+ /* Guest page table can only support 4 and 5 levels */
+ if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
+ target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
+ else
+ target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
+}
+
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data)
{
- u64 pte_root = 0;
- u64 flags = 0;
- u32 old_domid;
- u16 devid = dev_data->devid;
u16 domid;
+ u32 old_domid;
+ struct dev_table_entry *initial_dte;
+ struct dev_table_entry new = {};
struct protection_domain *domain = dev_data->domain;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
if (gcr3_info && gcr3_info->gcr3_tbl)
domid = dev_data->gcr3_info.domid;
else
domid = domain->id;
+ make_clear_dte(dev_data, dte, &new);
+
if (domain->iop.mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(domain->iop.root);
+ new.data[0] = iommu_virt_to_phys(domain->iop.root);
- pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
+ new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
- pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
+ new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
/*
- * When SNP is enabled, Only set TV bit when IOMMU
- * page translation is in use.
+ * When SNP is enabled, we can only support TV=1 with non-zero domain ID.
+ * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
+ * do_iommu_domain_alloc().
*/
- if (!amd_iommu_snp_en || (domid != 0))
- pte_root |= DTE_FLAG_TV;
-
- flags = dev_table[devid].data[1];
-
- if (dev_data->ats_enabled)
- flags |= DTE_FLAG_IOTLB;
+ WARN_ON(amd_iommu_snp_en && (domid == 0));
+ new.data[0] |= DTE_FLAG_TV;
if (dev_data->ppr)
- pte_root |= 1ULL << DEV_ENTRY_PPR;
+ new.data[0] |= 1ULL << DEV_ENTRY_PPR;
if (domain->dirty_tracking)
- pte_root |= DTE_FLAG_HAD;
-
- if (gcr3_info && gcr3_info->gcr3_tbl) {
- u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
- u64 glx = gcr3_info->glx;
- u64 tmp;
+ new.data[0] |= DTE_FLAG_HAD;
- pte_root |= DTE_FLAG_GV;
- pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
-
- /* First mask out possible old values for GCR3 table */
- tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
- flags &= ~tmp;
-
- tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
- flags &= ~tmp;
-
- /* Encode GCR3 table into DTE */
- tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
- pte_root |= tmp;
-
- tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
- flags |= tmp;
-
- tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
- flags |= tmp;
+ if (dev_data->ats_enabled)
+ new.data[1] |= DTE_FLAG_IOTLB;
- if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
- dev_table[devid].data[2] |=
- ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
- }
+ old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
+ new.data[1] |= domid;
- /* GIOV is supported with V2 page table mode only */
- if (pdom_is_v2_pgtbl_mode(domain))
- pte_root |= DTE_FLAG_GIOV;
+ /*
+ * Restore cached persistent DTE bits, which can be set by information
+ * in IVRS table. See set_dev_entry_from_acpi().
+ */
+ initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);
+ if (initial_dte) {
+ new.data128[0] |= initial_dte->data128[0];
+ new.data128[1] |= initial_dte->data128[1];
}
- flags &= ~DEV_DOMID_MASK;
- flags |= domid;
+ set_dte_gcr3_table(iommu, dev_data, &new);
- old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
- dev_table[devid].data[1] = flags;
- dev_table[devid].data[0] = pte_root;
+ update_dte256(iommu, dev_data, &new);
/*
* A kdump kernel might be replacing a domain ID that was copied from
@@ -1921,19 +2094,16 @@ static void set_dte_entry(struct amd_iommu *iommu,
}
}
-static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
+/*
+ * Clear DMA-remap related flags to block all DMA (blockeded domain)
+ */
+static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data)
{
- struct dev_table_entry *dev_table = get_dev_table(iommu);
-
- /* remove entry from the device table seen by the hardware */
- dev_table[devid].data[0] = DTE_FLAG_V;
-
- if (!amd_iommu_snp_en)
- dev_table[devid].data[0] |= DTE_FLAG_TV;
+ struct dev_table_entry new = {};
+ struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
- dev_table[devid].data[1] &= DTE_FLAG_MASK;
-
- amd_iommu_apply_erratum_63(iommu, devid);
+ make_clear_dte(dev_data, dte, &new);
+ update_dte256(iommu, dev_data, &new);
}
/* Update and flush DTE for the given device */
@@ -1944,7 +2114,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
if (set)
set_dte_entry(iommu, dev_data);
else
- clear_dte_entry(iommu, dev_data->devid);
+ clear_dte_entry(iommu, dev_data);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
@@ -2007,7 +2177,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu,
struct protection_domain *pdom)
{
struct pdom_iommu_info *pdom_iommu_info, *curr;
- struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg;
unsigned long flags;
int ret = 0;
@@ -2036,10 +2205,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu,
goto out_unlock;
}
- /* Update NUMA Node ID */
- if (cfg->amd.nid == NUMA_NO_NODE)
- cfg->amd.nid = dev_to_node(&iommu->dev->dev);
-
out_unlock:
spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
@@ -2276,16 +2441,15 @@ void protection_domain_free(struct protection_domain *domain)
kfree(domain);
}
-static void protection_domain_init(struct protection_domain *domain, int nid)
+static void protection_domain_init(struct protection_domain *domain)
{
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
xa_init(&domain->iommu_array);
- domain->iop.pgtbl.cfg.amd.nid = nid;
}
-struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
+struct protection_domain *protection_domain_alloc(void)
{
struct protection_domain *domain;
int domid;
@@ -2301,42 +2465,37 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
}
domain->id = domid;
- protection_domain_init(domain, nid);
+ protection_domain_init(domain);
return domain;
}
static int pdom_setup_pgtable(struct protection_domain *domain,
- unsigned int type, int pgtable)
+ struct device *dev)
{
struct io_pgtable_ops *pgtbl_ops;
+ enum io_pgtable_fmt fmt;
- /* No need to allocate io pgtable ops in passthrough mode */
- if (!(type & __IOMMU_DOMAIN_PAGING))
- return 0;
-
- switch (pgtable) {
- case AMD_IOMMU_V1:
- domain->pd_mode = PD_MODE_V1;
+ switch (domain->pd_mode) {
+ case PD_MODE_V1:
+ fmt = AMD_IOMMU_V1;
break;
- case AMD_IOMMU_V2:
- domain->pd_mode = PD_MODE_V2;
+ case PD_MODE_V2:
+ fmt = AMD_IOMMU_V2;
break;
- default:
- return -EINVAL;
}
- pgtbl_ops =
- alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain);
+ domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
+ pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain);
if (!pgtbl_ops)
return -ENOMEM;
return 0;
}
-static inline u64 dma_max_address(int pgtable)
+static inline u64 dma_max_address(enum protection_domain_mode pgtable)
{
- if (pgtable == AMD_IOMMU_V1)
+ if (pgtable == PD_MODE_V1)
return ~0ULL;
/* V2 with 4/5 level page table */
@@ -2348,31 +2507,21 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu)
return iommu && (iommu->features & FEATURE_HDSUP);
}
-static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
- struct device *dev,
- u32 flags, int pgtable)
+static struct iommu_domain *
+do_iommu_domain_alloc(struct device *dev, u32 flags,
+ enum protection_domain_mode pgtable)
{
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
struct protection_domain *domain;
- struct amd_iommu *iommu = NULL;
int ret;
- if (dev)
- iommu = get_amd_iommu_from_dev(dev);
-
- /*
- * Since DTE[Mode]=0 is prohibited on SNP-enabled system,
- * default to use IOMMU_DOMAIN_DMA[_FQ].
- */
- if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
- return ERR_PTR(-EINVAL);
-
- domain = protection_domain_alloc(type,
- dev ? dev_to_node(dev) : NUMA_NO_NODE);
+ domain = protection_domain_alloc();
if (!domain)
return ERR_PTR(-ENOMEM);
- ret = pdom_setup_pgtable(domain, type, pgtable);
+ domain->pd_mode = pgtable;
+ ret = pdom_setup_pgtable(domain, dev);
if (ret) {
pdom_id_free(domain->id);
kfree(domain);
@@ -2384,72 +2533,45 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
domain->domain.geometry.force_aperture = true;
domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
- if (iommu) {
- domain->domain.type = type;
- domain->domain.ops = iommu->iommu.ops->default_domain_ops;
+ domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
+ domain->domain.ops = iommu->iommu.ops->default_domain_ops;
- if (dirty_tracking)
- domain->domain.dirty_ops = &amd_dirty_ops;
- }
+ if (dirty_tracking)
+ domain->domain.dirty_ops = &amd_dirty_ops;
return &domain->domain;
}
-static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
-{
- struct iommu_domain *domain;
- int pgtable = amd_iommu_pgtable;
-
- /*
- * Force IOMMU v1 page table when allocating
- * domain for pass-through devices.
- */
- if (type == IOMMU_DOMAIN_UNMANAGED)
- pgtable = AMD_IOMMU_V1;
-
- domain = do_iommu_domain_alloc(type, NULL, 0, pgtable);
- if (IS_ERR(domain))
- return NULL;
-
- return domain;
-}
-
static struct iommu_domain *
amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
const struct iommu_user_data *user_data)
{
- unsigned int type = IOMMU_DOMAIN_UNMANAGED;
- struct amd_iommu *iommu = NULL;
+ struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID;
- if (dev)
- iommu = get_amd_iommu_from_dev(dev);
-
if ((flags & ~supported_flags) || user_data)
return ERR_PTR(-EOPNOTSUPP);
- /* Allocate domain with v2 page table if IOMMU supports PASID. */
- if (flags & IOMMU_HWPT_ALLOC_PASID) {
+ switch (flags & supported_flags) {
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
+ /* Allocate domain with v1 page table for dirty tracking */
+ if (!amd_iommu_hd_support(iommu))
+ break;
+ return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
+ case IOMMU_HWPT_ALLOC_PASID:
+ /* Allocate domain with v2 page table if IOMMU supports PASID. */
if (!amd_iommu_pasid_supported())
- return ERR_PTR(-EOPNOTSUPP);
-
- return do_iommu_domain_alloc(type, dev, flags, AMD_IOMMU_V2);
- }
-
- /* Allocate domain with v1 page table for dirty tracking */
- if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) {
- if (iommu && amd_iommu_hd_support(iommu)) {
- return do_iommu_domain_alloc(type, dev,
- flags, AMD_IOMMU_V1);
- }
-
- return ERR_PTR(-EOPNOTSUPP);
+ break;
+ return do_iommu_domain_alloc(dev, flags, PD_MODE_V2);
+ case 0:
+ /* If nothing specific is required use the kernel commandline default */
+ return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable);
+ default:
+ break;
}
-
- /* If nothing specific is required use the kernel commandline default */
- return do_iommu_domain_alloc(type, dev, 0, amd_iommu_pgtable);
+ return ERR_PTR(-EOPNOTSUPP);
}
void amd_iommu_domain_free(struct iommu_domain *dom)
@@ -2475,10 +2597,19 @@ static int blocked_domain_attach_device(struct iommu_domain *domain,
return 0;
}
+static int blocked_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ amd_iommu_remove_dev_pasid(dev, pasid, old);
+ return 0;
+}
+
static struct iommu_domain blocked_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocked_domain_attach_device,
+ .set_dev_pasid = blocked_domain_set_dev_pasid,
}
};
@@ -2498,7 +2629,7 @@ void amd_iommu_init_identity_domain(void)
identity_domain.id = pdom_id_alloc();
- protection_domain_init(&identity_domain, NUMA_NO_NODE);
+ protection_domain_init(&identity_domain);
}
/* Same as blocked domain except it supports only ops->attach_dev() */
@@ -2666,12 +2797,12 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
bool enable)
{
struct protection_domain *pdomain = to_pdomain(domain);
- struct dev_table_entry *dev_table;
+ struct dev_table_entry *dte;
struct iommu_dev_data *dev_data;
bool domain_flush = false;
struct amd_iommu *iommu;
unsigned long flags;
- u64 pte_root;
+ u64 new;
spin_lock_irqsave(&pdomain->lock, flags);
if (!(pdomain->dirty_tracking ^ enable)) {
@@ -2680,16 +2811,15 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
}
list_for_each_entry(dev_data, &pdomain->dev_list, list) {
+ spin_lock(&dev_data->dte_lock);
iommu = get_amd_iommu_from_dev_data(dev_data);
-
- dev_table = get_dev_table(iommu);
- pte_root = dev_table[dev_data->devid].data[0];
-
- pte_root = (enable ? pte_root | DTE_FLAG_HAD :
- pte_root & ~DTE_FLAG_HAD);
+ dte = &get_dev_table(iommu)[dev_data->devid];
+ new = dte->data[0];
+ new = (enable ? new | DTE_FLAG_HAD : new & ~DTE_FLAG_HAD);
+ dte->data[0] = new;
+ spin_unlock(&dev_data->dte_lock);
/* Flush device DTE */
- dev_table[dev_data->devid].data[0] = pte_root;
device_flush_dte(dev_data);
domain_flush = true;
}
@@ -2890,7 +3020,6 @@ const struct iommu_ops amd_iommu_ops = {
.blocked_domain = &blocked_domain,
.release_domain = &release_domain,
.identity_domain = &identity_domain.domain,
- .domain_alloc = amd_iommu_domain_alloc,
.domain_alloc_paging_flags = amd_iommu_domain_alloc_paging_flags,
.domain_alloc_sva = amd_iommu_domain_alloc_sva,
.probe_device = amd_iommu_probe_device,
@@ -2901,7 +3030,6 @@ const struct iommu_ops amd_iommu_ops = {
.def_domain_type = amd_iommu_def_domain_type,
.dev_enable_feat = amd_iommu_dev_enable_feature,
.dev_disable_feat = amd_iommu_dev_disable_feature,
- .remove_dev_pasid = amd_iommu_remove_dev_pasid,
.page_response = amd_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
@@ -2956,17 +3084,23 @@ out:
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
- u64 dte;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
+ u64 new;
+ struct dev_table_entry *dte = &get_dev_table(iommu)[devid];
+ struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
+
+ if (dev_data)
+ spin_lock(&dev_data->dte_lock);
- dte = dev_table[devid].data[2];
- dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
- dte |= iommu_virt_to_phys(table->table);
- dte |= DTE_IRQ_REMAP_INTCTL;
- dte |= DTE_INTTABLEN;
- dte |= DTE_IRQ_REMAP_ENABLE;
+ new = READ_ONCE(dte->data[2]);
+ new &= ~DTE_IRQ_PHYS_ADDR_MASK;
+ new |= iommu_virt_to_phys(table->table);
+ new |= DTE_IRQ_REMAP_INTCTL;
+ new |= DTE_INTTABLEN;
+ new |= DTE_IRQ_REMAP_ENABLE;
+ WRITE_ONCE(dte->data[2], new);
- dev_table[devid].data[2] = dte;
+ if (dev_data)
+ spin_unlock(&dev_data->dte_lock);
}
static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c
index 8c73a30c2800..11150cfd6718 100644
--- a/drivers/iommu/amd/pasid.c
+++ b/drivers/iommu/amd/pasid.c
@@ -185,12 +185,13 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct protection_domain *pdom;
int ret;
- pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev));
+ pdom = protection_domain_alloc();
if (!pdom)
return ERR_PTR(-ENOMEM);
pdom->domain.ops = &amd_sva_domain_ops;
pdom->mn.ops = &sva_mn;
+ pdom->domain.type = IOMMU_DOMAIN_SVA;
ret = mmu_notifier_register(&pdom->mn, mm);
if (ret) {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 1d3e71569775..9ba596430e7c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -112,6 +112,15 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
* from the current CPU register
*/
target->data[3] = cpu_to_le64(read_sysreg(mair_el1));
+
+ /*
+ * Note that we don't bother with S1PIE on the SMMU, we just rely on
+ * our default encoding scheme matching direct permissions anyway.
+ * SMMU has no notion of S1POE nor GCS, so make sure that is clear if
+ * either is enabled for CPUs, just in case anyone imagines otherwise.
+ */
+ if (system_supports_poe() || system_supports_gcs())
+ dev_warn_once(master->smmu->dev, "SVA devices ignore permission overlays and GCS\n");
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
@@ -206,8 +215,12 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
unsigned long asid_bits;
u32 feat_mask = ARM_SMMU_FEAT_COHERENCY;
- if (vabits_actual == 52)
+ if (vabits_actual == 52) {
+ /* We don't support LPA2 */
+ if (PAGE_SIZE != SZ_64K)
+ return false;
feat_mask |= ARM_SMMU_FEAT_VAX;
+ }
if ((smmu->features & feat_mask) != feat_mask)
return false;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index a5c7002ff75b..358072b4e293 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -26,6 +26,7 @@
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/platform_device.h>
+#include <linux/string_choices.h>
#include <kunit/visibility.h>
#include <uapi/linux/iommufd.h>
@@ -83,8 +84,28 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
-static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_device *smmu, u32 flags);
+static const char * const event_str[] = {
+ [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID",
+ [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH",
+ [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE",
+ [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED",
+ [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID",
+ [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH",
+ [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD",
+ [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION",
+ [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE",
+ [EVT_ID_ACCESS_FAULT] = "F_ACCESS",
+ [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION",
+ [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH",
+};
+
+static const char * const event_class_str[] = {
+ [0] = "CD fetch",
+ [1] = "Stage 1 translation table fetch",
+ [2] = "Input address caused fault",
+ [3] = "Reserved",
+};
+
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
static void parse_driver_options(struct arm_smmu_device *smmu)
@@ -1759,17 +1780,49 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
}
/* IRQ and event handlers */
-static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
+static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ struct arm_smmu_master *master;
+
+ event->id = FIELD_GET(EVTQ_0_ID, raw[0]);
+ event->sid = FIELD_GET(EVTQ_0_SID, raw[0]);
+ event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]);
+ event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID;
+ event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]);
+ event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]);
+ event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]);
+ event->read = FIELD_GET(EVTQ_1_RnW, raw[1]);
+ event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]);
+ event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]);
+ event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]);
+ event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]);
+ event->ipa = raw[3] & EVTQ_3_IPA;
+ event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR;
+ event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]);
+ event->class_tt = false;
+ event->dev = NULL;
+
+ if (event->id == EVT_ID_PERMISSION_FAULT)
+ event->class_tt = (event->class == EVTQ_1_CLASS_TT);
+
+ mutex_lock(&smmu->streams_mutex);
+ master = arm_smmu_find_master(smmu, event->sid);
+ if (master)
+ event->dev = get_device(master->dev);
+ mutex_unlock(&smmu->streams_mutex);
+}
+
+static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
+ struct arm_smmu_event *event)
{
int ret = 0;
u32 perm = 0;
struct arm_smmu_master *master;
- bool ssid_valid = evt[0] & EVTQ_0_SSV;
- u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
struct iopf_fault fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
- switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
+ switch (event->id) {
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
@@ -1779,35 +1832,35 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
return -EOPNOTSUPP;
}
- if (!(evt[1] & EVTQ_1_STALL))
+ if (!event->stall)
return -EOPNOTSUPP;
- if (evt[1] & EVTQ_1_RnW)
+ if (event->read)
perm |= IOMMU_FAULT_PERM_READ;
else
perm |= IOMMU_FAULT_PERM_WRITE;
- if (evt[1] & EVTQ_1_InD)
+ if (event->instruction)
perm |= IOMMU_FAULT_PERM_EXEC;
- if (evt[1] & EVTQ_1_PnU)
+ if (event->privileged)
perm |= IOMMU_FAULT_PERM_PRIV;
flt->type = IOMMU_FAULT_PAGE_REQ;
flt->prm = (struct iommu_fault_page_request) {
.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
+ .grpid = event->stag,
.perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+ .addr = event->iova,
};
- if (ssid_valid) {
+ if (event->ssv) {
flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+ flt->prm.pasid = event->ssid;
}
mutex_lock(&smmu->streams_mutex);
- master = arm_smmu_find_master(smmu, sid);
+ master = arm_smmu_find_master(smmu, event->sid);
if (!master) {
ret = -EINVAL;
goto out_unlock;
@@ -1819,29 +1872,82 @@ out_unlock:
return ret;
}
+static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ int i;
+
+ dev_err(smmu->dev, "event 0x%02x received:\n", event->id);
+
+ for (i = 0; i < EVTQ_ENT_DWORDS; ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", raw[i]);
+}
+
+#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id])
+#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN"
+#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)"
+
+static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *evt,
+ struct ratelimit_state *rs)
+{
+ if (!__ratelimit(rs))
+ return;
+
+ arm_smmu_dump_raw_event(smmu, raw, evt);
+
+ switch (evt->id) {
+ case EVT_ID_TRANSLATION_FAULT:
+ case EVT_ID_ADDR_SIZE_FAULT:
+ case EVT_ID_ACCESS_FAULT:
+ case EVT_ID_PERMISSION_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->iova, evt->ipa);
+
+ dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x",
+ evt->privileged ? "priv" : "unpriv",
+ evt->instruction ? "inst" : "data",
+ str_read_write(evt->read),
+ evt->s2 ? "s2" : "s1", event_class_str[evt->class],
+ evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "",
+ evt->stall ? " stall" : "", evt->stag);
+
+ break;
+
+ case EVT_ID_STE_FETCH_FAULT:
+ case EVT_ID_CD_FETCH_FAULT:
+ case EVT_ID_VMS_FETCH_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->fetch_addr);
+
+ break;
+
+ default:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid);
+ }
+}
+
static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
{
- int i, ret;
+ u64 evt[EVTQ_ENT_DWORDS];
+ struct arm_smmu_event event = {0};
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- u64 evt[EVTQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt)) {
- u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
-
- ret = arm_smmu_handle_evt(smmu, evt);
- if (!ret || !__ratelimit(&rs))
- continue;
-
- dev_info(smmu->dev, "event 0x%02x received:\n", id);
- for (i = 0; i < ARRAY_SIZE(evt); ++i)
- dev_info(smmu->dev, "\t0x%016llx\n",
- (unsigned long long)evt[i]);
+ arm_smmu_decode_event(smmu, evt, &event);
+ if (arm_smmu_handle_event(smmu, &event))
+ arm_smmu_dump_event(smmu, evt, &event, &rs);
+ put_device(event.dev);
cond_resched();
}
@@ -2353,39 +2459,12 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void)
if (!smmu_domain)
return ERR_PTR(-ENOMEM);
- mutex_init(&smmu_domain->init_mutex);
INIT_LIST_HEAD(&smmu_domain->devices);
spin_lock_init(&smmu_domain->devices_lock);
return smmu_domain;
}
-static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
-{
- struct arm_smmu_domain *smmu_domain;
-
- /*
- * Allocate the domain and initialise some of its data structures.
- * We can't really do anything meaningful until we've added a
- * master.
- */
- smmu_domain = arm_smmu_domain_alloc();
- if (IS_ERR(smmu_domain))
- return ERR_CAST(smmu_domain);
-
- if (dev) {
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- int ret;
-
- ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
- if (ret) {
- kfree(smmu_domain);
- return ERR_PTR(ret);
- }
- }
- return &smmu_domain->domain;
-}
-
static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -2451,12 +2530,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_domain *smmu_domain);
bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
- /* Restrict the stage to what we can actually support */
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
- smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
@@ -2745,9 +2818,14 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
* Translation Requests and Translated transactions are denied
* as though ATS is disabled for the stream (STE.EATS == 0b00),
* causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
- * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
- * enabled if we have arm_smmu_domain, those always have page
- * tables.
+ * (IHI0070Ea 5.2 Stream Table Entry).
+ *
+ * However, if we have installed a CD table and are using S1DSS
+ * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
+ * skipping stage 1".
+ *
+ * Disable ATS if we are going to create a normal 0b100 bypass
+ * STE.
*/
state->ats_enabled = !state->disable_ats &&
arm_smmu_ats_supported(master);
@@ -2853,15 +2931,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
state.master = master = dev_iommu_priv_get(dev);
smmu = master->smmu;
- mutex_lock(&smmu_domain->init_mutex);
-
- if (!smmu_domain->smmu) {
- ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
- } else if (smmu_domain->smmu != smmu)
- ret = -EINVAL;
-
- mutex_unlock(&smmu_domain->init_mutex);
- if (ret)
+ if (smmu_domain->smmu != smmu)
return ret;
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
@@ -2918,16 +2988,9 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cd target_cd;
- int ret = 0;
- mutex_lock(&smmu_domain->init_mutex);
- if (!smmu_domain->smmu)
- ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
- else if (smmu_domain->smmu != smmu)
- ret = -EINVAL;
- mutex_unlock(&smmu_domain->init_mutex);
- if (ret)
- return ret;
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
return -EINVAL;
@@ -3016,13 +3079,12 @@ out_unlock:
return ret;
}
-static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
- struct iommu_domain *domain)
+static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old_domain)
{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- struct arm_smmu_domain *smmu_domain;
-
- smmu_domain = to_smmu_domain(domain);
mutex_lock(&arm_smmu_asid_lock);
arm_smmu_clear_cd(master, pasid);
@@ -3043,6 +3105,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
sid_domain->type == IOMMU_DOMAIN_BLOCKED)
sid_domain->ops->attach_dev(sid_domain, dev);
}
+ return 0;
}
static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
@@ -3070,8 +3133,10 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
if (arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
- * on even though the RID will fail ATS queries with UR. This is
- * because we have no idea what the PASID's need.
+ * on because we have to assume a PASID is using ATS. For
+ * IDENTITY this will setup things so that S1DSS=bypass which
+ * follows the explanation in "13.6.4 Full ATS skipping stage 1"
+ * and allows for ATS on the RID to work.
*/
state.cd_needs_ats = true;
arm_smmu_attach_prepare(&state, domain);
@@ -3124,6 +3189,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
static const struct iommu_domain_ops arm_smmu_blocked_ops = {
.attach_dev = arm_smmu_attach_dev_blocked,
+ .set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
};
static struct iommu_domain arm_smmu_blocked_domain = {
@@ -3136,6 +3202,7 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
const struct iommu_user_data *user_data)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = master->smmu;
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID |
IOMMU_HWPT_ALLOC_NEST_PARENT;
@@ -3147,25 +3214,43 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
if (user_data)
return ERR_PTR(-EOPNOTSUPP);
- if (flags & IOMMU_HWPT_ALLOC_PASID)
- return arm_smmu_domain_alloc_paging(dev);
-
smmu_domain = arm_smmu_domain_alloc();
if (IS_ERR(smmu_domain))
return ERR_CAST(smmu_domain);
- if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) {
- if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) {
+ switch (flags) {
+ case 0:
+ /* Prefer S1 if available */
+ if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ break;
+ case IOMMU_HWPT_ALLOC_NEST_PARENT:
+ if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) {
ret = -EOPNOTSUPP;
goto err_free;
}
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
smmu_domain->nest_parent = true;
+ break;
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID:
+ case IOMMU_HWPT_ALLOC_PASID:
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto err_free;
}
smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
- ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
+ ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags);
if (ret)
goto err_free;
return &smmu_domain->domain;
@@ -3237,8 +3322,8 @@ static struct platform_driver arm_smmu_driver;
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
- fwnode);
+ struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
@@ -3543,7 +3628,6 @@ static struct iommu_ops arm_smmu_ops = {
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
.hw_info = arm_smmu_hw_info,
- .domain_alloc_paging = arm_smmu_domain_alloc_paging,
.domain_alloc_sva = arm_smmu_sva_domain_alloc,
.domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
.probe_device = arm_smmu_probe_device,
@@ -3551,7 +3635,6 @@ static struct iommu_ops arm_smmu_ops = {
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .remove_dev_pasid = arm_smmu_remove_dev_pasid,
.dev_enable_feat = arm_smmu_dev_enable_feature,
.dev_disable_feat = arm_smmu_dev_disable_feature,
.page_response = arm_smmu_page_response,
@@ -4239,7 +4322,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
*/
if (!!(reg & IDR0_COHACC) != coherent)
dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
- coherent ? "true" : "false");
+ str_true_false(coherent));
switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
case IDR0_STALL_MODEL_FORCE:
@@ -4663,7 +4746,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
/* Initialise in-memory data structures */
ret = arm_smmu_init_structures(smmu);
if (ret)
- return ret;
+ goto err_free_iopf;
/* Record our private device structure */
platform_set_drvdata(pdev, smmu);
@@ -4674,22 +4757,29 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
/* Reset the device */
ret = arm_smmu_device_reset(smmu);
if (ret)
- return ret;
+ goto err_disable;
/* And we're up. Go go go! */
ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
"smmu3.%pa", &ioaddr);
if (ret)
- return ret;
+ goto err_disable;
ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
- iommu_device_sysfs_remove(&smmu->iommu);
- return ret;
+ goto err_free_sysfs;
}
return 0;
+
+err_free_sysfs:
+ iommu_device_sysfs_remove(&smmu->iommu);
+err_disable:
+ arm_smmu_device_disable(smmu);
+err_free_iopf:
+ iopf_queue_free(smmu->evtq.iopf);
+ return ret;
}
static void arm_smmu_device_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 0107d3f333a1..bd9d7c85576a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -452,10 +452,18 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define EVTQ_0_ID GENMASK_ULL(7, 0)
+#define EVT_ID_BAD_STREAMID_CONFIG 0x02
+#define EVT_ID_STE_FETCH_FAULT 0x03
+#define EVT_ID_BAD_STE_CONFIG 0x04
+#define EVT_ID_STREAM_DISABLED_FAULT 0x06
+#define EVT_ID_BAD_SUBSTREAMID_CONFIG 0x08
+#define EVT_ID_CD_FETCH_FAULT 0x09
+#define EVT_ID_BAD_CD_CONFIG 0x0a
#define EVT_ID_TRANSLATION_FAULT 0x10
#define EVT_ID_ADDR_SIZE_FAULT 0x11
#define EVT_ID_ACCESS_FAULT 0x12
#define EVT_ID_PERMISSION_FAULT 0x13
+#define EVT_ID_VMS_FETCH_FAULT 0x25
#define EVTQ_0_SSV (1UL << 11)
#define EVTQ_0_SSID GENMASK_ULL(31, 12)
@@ -467,9 +475,11 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define EVTQ_1_RnW (1UL << 35)
#define EVTQ_1_S2 (1UL << 39)
#define EVTQ_1_CLASS GENMASK_ULL(41, 40)
+#define EVTQ_1_CLASS_TT 0x01
#define EVTQ_1_TT_READ (1UL << 44)
#define EVTQ_2_ADDR GENMASK_ULL(63, 0)
#define EVTQ_3_IPA GENMASK_ULL(51, 12)
+#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3)
/* PRI queue */
#define PRIQ_ENT_SZ_SHIFT 4
@@ -789,6 +799,26 @@ struct arm_smmu_stream {
struct rb_node node;
};
+struct arm_smmu_event {
+ u8 stall : 1,
+ ssv : 1,
+ privileged : 1,
+ instruction : 1,
+ s2 : 1,
+ read : 1,
+ ttrnw : 1,
+ class_tt : 1;
+ u8 id;
+ u8 class;
+ u16 stag;
+ u32 sid;
+ u32 ssid;
+ u64 iova;
+ u64 ipa;
+ u64 fetch_addr;
+ struct device *dev;
+};
+
/* SMMU private data for each master */
struct arm_smmu_master {
struct arm_smmu_device *smmu;
@@ -813,7 +843,6 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
- struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
atomic_t nr_ats_masters;
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index 6e41ddaa24d6..d525ab43a4ae 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -79,7 +79,6 @@
#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q))
#define VCMDQ_ADDR GENMASK(47, 5)
#define VCMDQ_LOG2SIZE GENMASK(4, 0)
-#define VCMDQ_LOG2SIZE_MAX 19
#define TEGRA241_VCMDQ_BASE 0x00000
#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008
@@ -505,12 +504,15 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq;
struct arm_smmu_queue *q = &cmdq->q;
char name[16];
+ u32 regval;
int ret;
snprintf(name, 16, "vcmdq%u", vcmdq->idx);
- /* Queue size, capped to ensure natural alignment */
- q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX);
+ /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */
+ regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
+ q->llq.max_n_shift =
+ min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval));
/* Use the common helper to init the VCMDQ, and then... */
ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 99030e6b16e7..db9b9a8e139c 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -110,7 +110,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm
int arm_mmu500_reset(struct arm_smmu_device *smmu)
{
u32 reg, major;
- int i;
/*
* On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before
* writes to the context bank ACTLRs will stick. And we just hope that
@@ -128,11 +127,12 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu)
reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg);
+#ifdef CONFIG_ARM_SMMU_MMU_500_CPRE_ERRATA
/*
* Disable MMU-500's not-particularly-beneficial next-page
* prefetcher for the sake of at least 5 known errata.
*/
- for (i = 0; i < smmu->num_context_banks; ++i) {
+ for (int i = 0; i < smmu->num_context_banks; ++i) {
reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
reg &= ~ARM_MMU500_ACTLR_CPRE;
arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
@@ -140,6 +140,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu)
if (reg & ARM_MMU500_ACTLR_CPRE)
dev_warn_once(smmu->dev, "Failed to disable prefetcher for errata workarounds, check SACR.CACHE_LOCK\n");
}
+#endif
return 0;
}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index 548783f3f8e8..d03b2239baad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -73,7 +73,7 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
if (__ratelimit(&rs)) {
dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n");
- cfg = qsmmu->cfg;
+ cfg = qsmmu->data->cfg;
if (!cfg)
return;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 6372f3e25c4b..59d02687280e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -16,6 +16,40 @@
#define QCOM_DUMMY_VAL -1
+/*
+ * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
+ * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
+ * buffer). The remaining bits are implementation defined and vary across
+ * SoCs.
+ */
+
+#define CPRE (1 << 1)
+#define CMTLB (1 << 0)
+#define PREFETCH_SHIFT 8
+#define PREFETCH_DEFAULT 0
+#define PREFETCH_SHALLOW (1 << PREFETCH_SHIFT)
+#define PREFETCH_MODERATE (2 << PREFETCH_SHIFT)
+#define PREFETCH_DEEP (3 << PREFETCH_SHIFT)
+#define GFX_ACTLR_PRR (1 << 5)
+
+static const struct of_device_id qcom_smmu_actlr_client_of_match[] = {
+ { .compatible = "qcom,adreno",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,adreno-gmu",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,adreno-smmu",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,fastrpc",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,sc7280-mdss",
+ .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
+ { .compatible = "qcom,sc7280-venus",
+ .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
+ { .compatible = "qcom,sm8550-mdss",
+ .data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
+ { }
+};
+
static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
{
return container_of(smmu, struct qcom_smmu, smmu);
@@ -99,6 +133,47 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
}
+static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
+{
+ struct arm_smmu_domain *smmu_domain = (void *)cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ u32 reg = 0;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return;
+ }
+
+ reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+ reg &= ~GFX_ACTLR_PRR;
+ if (set)
+ reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+ arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+ pm_runtime_put_autosuspend(smmu->dev);
+}
+
+static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
+{
+ struct arm_smmu_domain *smmu_domain = (void *)cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return;
+ }
+
+ writel_relaxed(lower_32_bits(page_addr),
+ smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+ writel_relaxed(upper_32_bits(page_addr),
+ smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+ pm_runtime_put_autosuspend(smmu->dev);
+}
+
#define QCOM_ADRENO_SMMU_GPU_SID 0
static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -207,13 +282,37 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
return true;
}
+static void qcom_smmu_set_actlr_dev(struct device *dev, struct arm_smmu_device *smmu, int cbndx,
+ const struct of_device_id *client_match)
+{
+ const struct of_device_id *match =
+ of_match_device(client_match, dev);
+
+ if (!match) {
+ dev_dbg(dev, "no ACTLR settings present\n");
+ return;
+ }
+
+ arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, (unsigned long)match->data);
+}
+
static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
+ const struct device_node *np = smmu_domain->smmu->dev->of_node;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ const struct of_device_id *client_match;
+ int cbndx = smmu_domain->cfg.cbndx;
struct adreno_smmu_priv *priv;
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+ client_match = qsmmu->data->client_match;
+
+ if (client_match)
+ qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match);
+
/* Only enable split pagetables for the GPU device (SID 0) */
if (!qcom_adreno_smmu_is_gpu_device(dev))
return 0;
@@ -239,6 +338,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
priv->set_stall = qcom_adreno_smmu_set_stall;
priv->resume_translation = qcom_adreno_smmu_resume_translation;
+ priv->set_prr_bit = NULL;
+ priv->set_prr_addr = NULL;
+
+ if (of_device_is_compatible(np, "qcom,smmu-500") &&
+ of_device_is_compatible(np, "qcom,adreno-smmu")) {
+ priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
+ priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
+ }
return 0;
}
@@ -269,8 +376,18 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ const struct of_device_id *client_match;
+ int cbndx = smmu_domain->cfg.cbndx;
+
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+ client_match = qsmmu->data->client_match;
+
+ if (client_match)
+ qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match);
+
return 0;
}
@@ -507,7 +624,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
return ERR_PTR(-ENOMEM);
qsmmu->smmu.impl = impl;
- qsmmu->cfg = data->cfg;
+ qsmmu->data = data;
return &qsmmu->smmu;
}
@@ -550,6 +667,7 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
.impl = &qcom_smmu_500_impl,
.adreno_impl = &qcom_adreno_smmu_500_impl,
.cfg = &qcom_smmu_impl0_cfg,
+ .client_match = qcom_smmu_actlr_client_of_match,
};
/*
@@ -567,6 +685,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data },
+ { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data},
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
index 3c134d1a6277..8addd453f5f1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
@@ -8,7 +8,7 @@
struct qcom_smmu {
struct arm_smmu_device smmu;
- const struct qcom_smmu_config *cfg;
+ const struct qcom_smmu_match_data *data;
bool bypass_quirk;
u8 bypass_cbndx;
u32 stall_enabled;
@@ -28,6 +28,7 @@ struct qcom_smmu_match_data {
const struct qcom_smmu_config *cfg;
const struct arm_smmu_impl *impl;
const struct arm_smmu_impl *adreno_impl;
+ const struct of_device_id * const client_match;
};
irqreturn_t qcom_smmu_context_fault(int irq, void *dev);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 650664e0f6e3..de205a34ffc6 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -34,6 +34,7 @@
#include <linux/pm_runtime.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/fsl/mc.h>
@@ -1411,8 +1412,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
- fwnode);
+ struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
@@ -1437,17 +1438,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
goto out_free;
} else {
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
-
- /*
- * Defer probe if the relevant SMMU instance hasn't finished
- * probing yet. This is a fragile hack and we'd ideally
- * avoid this race in the core code. Until that's ironed
- * out, however, this is the most pragmatic option on the
- * table.
- */
- if (!smmu)
- return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER,
- "smmu dev has not bound yet\n"));
}
ret = -EINVAL;
@@ -2117,7 +2107,7 @@ static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
}
dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
- cnt == 1 ? "" : "s");
+ str_plural(cnt));
iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}
@@ -2227,29 +2217,26 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
i, irq);
}
+ platform_set_drvdata(pdev, smmu);
+
+ /* Check for RMRs and install bypass SMRs if any */
+ arm_smmu_rmr_install_bypass_smr(smmu);
+
+ arm_smmu_device_reset(smmu);
+ arm_smmu_test_smr_masks(smmu);
+
err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
"smmu.%pa", &smmu->ioaddr);
- if (err) {
- dev_err(dev, "Failed to register iommu in sysfs\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(dev, err, "Failed to register iommu in sysfs\n");
err = iommu_device_register(&smmu->iommu, &arm_smmu_ops,
using_legacy_binding ? NULL : dev);
if (err) {
- dev_err(dev, "Failed to register iommu\n");
iommu_device_sysfs_remove(&smmu->iommu);
- return err;
+ return dev_err_probe(dev, err, "Failed to register iommu\n");
}
- platform_set_drvdata(pdev, smmu);
-
- /* Check for RMRs and install bypass SMRs if any */
- arm_smmu_rmr_install_bypass_smr(smmu);
-
- arm_smmu_device_reset(smmu);
- arm_smmu_test_smr_masks(smmu);
-
/*
* We want to avoid touching dev->power.lock in fastpaths unless
* it's really going to do something useful - pm_runtime_enabled()
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index e2aeb511ae90..2dbf3243b5ad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_SCTLR_M BIT(0)
#define ARM_SMMU_CB_ACTLR 0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR 0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR 0x600C
#define ARM_SMMU_CB_RESUME 0x8
#define ARM_SMMU_RESUME_TERMINATE BIT(0)
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index d3bb0798092d..6c7528130cf9 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o
-obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
+obj-$(CONFIG_DMAR_TABLE) += trace.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 09694cca8752..fc35cba59145 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag, *temp;
+ struct list_head *prev;
unsigned long flags;
tag = kzalloc(sizeof(*tag), GFP_KERNEL);
@@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
tag->dev = iommu->iommu.dev;
spin_lock_irqsave(&domain->cache_lock, flags);
+ prev = &domain->cache_tags;
list_for_each_entry(temp, &domain->cache_tags, node) {
if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
temp->users++;
@@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
trace_cache_tag_assign(temp);
return 0;
}
+ if (temp->iommu == iommu)
+ prev = &temp->node;
}
- list_add_tail(&tag->node, &domain->cache_tags);
+ /*
+ * Link cache tags of same iommu unit together, so corresponding
+ * flush ops can be batched for iommu unit.
+ */
+ list_add(&tag->node, prev);
+
spin_unlock_irqrestore(&domain->cache_lock, flags);
trace_cache_tag_assign(tag);
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
deleted file mode 100644
index 9862dc20b35e..000000000000
--- a/drivers/iommu/intel/cap_audit.c
+++ /dev/null
@@ -1,217 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * cap_audit.c - audit iommu capabilities for boot time and hot plug
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Author: Kyung Min Park <kyung.min.park@intel.com>
- * Lu Baolu <baolu.lu@linux.intel.com>
- */
-
-#define pr_fmt(fmt) "DMAR: " fmt
-
-#include "iommu.h"
-#include "cap_audit.h"
-
-static u64 intel_iommu_cap_sanity;
-static u64 intel_iommu_ecap_sanity;
-
-static inline void check_irq_capabilities(struct intel_iommu *a,
- struct intel_iommu *b)
-{
- CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK);
-}
-
-static inline void check_dmar_capabilities(struct intel_iommu *a,
- struct intel_iommu *b)
-{
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK);
- MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK);
- MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK);
- MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK);
-
- CHECK_FEATURE_MISMATCH(a, b, cap, fl5lp_support, CAP_FL5LP_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK);
-}
-
-static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type)
-{
- bool mismatch = false;
- u64 old_cap = intel_iommu_cap_sanity;
- u64 old_ecap = intel_iommu_ecap_sanity;
-
- if (type == CAP_AUDIT_HOTPLUG_IRQR) {
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK);
- goto out;
- }
-
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl5lp_support, CAP_FL5LP_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK);
-
- /* Abort hot plug if the hot plug iommu feature is smaller than global */
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch);
-
-out:
- if (mismatch) {
- intel_iommu_cap_sanity = old_cap;
- intel_iommu_ecap_sanity = old_ecap;
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
-{
- struct dmar_drhd_unit *d;
- struct intel_iommu *i;
- int rc = 0;
-
- rcu_read_lock();
- if (list_empty(&dmar_drhd_units))
- goto out;
-
- for_each_active_iommu(i, d) {
- if (!iommu) {
- intel_iommu_ecap_sanity = i->ecap;
- intel_iommu_cap_sanity = i->cap;
- iommu = i;
- continue;
- }
-
- if (type == CAP_AUDIT_STATIC_DMAR)
- check_dmar_capabilities(iommu, i);
- else
- check_irq_capabilities(iommu, i);
- }
-
- /*
- * If the system is sane to support scalable mode, either SL or FL
- * should be sane.
- */
- if (intel_cap_smts_sanity() &&
- !intel_cap_flts_sanity() && !intel_cap_slts_sanity())
- rc = -EOPNOTSUPP;
-
-out:
- rcu_read_unlock();
- return rc;
-}
-
-int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu)
-{
- switch (type) {
- case CAP_AUDIT_STATIC_DMAR:
- case CAP_AUDIT_STATIC_IRQR:
- return cap_audit_static(iommu, type);
- case CAP_AUDIT_HOTPLUG_DMAR:
- case CAP_AUDIT_HOTPLUG_IRQR:
- return cap_audit_hotplug(iommu, type);
- default:
- break;
- }
-
- return -EFAULT;
-}
-
-bool intel_cap_smts_sanity(void)
-{
- return ecap_smts(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_pasid_sanity(void)
-{
- return ecap_pasid(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_nest_sanity(void)
-{
- return ecap_nest(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_flts_sanity(void)
-{
- return ecap_flts(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_slts_sanity(void)
-{
- return ecap_slts(intel_iommu_ecap_sanity);
-}
diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h
deleted file mode 100644
index d07b75938961..000000000000
--- a/drivers/iommu/intel/cap_audit.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * cap_audit.h - audit iommu capabilities header
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Author: Kyung Min Park <kyung.min.park@intel.com>
- */
-
-/*
- * Capability Register Mask
- */
-#define CAP_FL5LP_MASK BIT_ULL(60)
-#define CAP_PI_MASK BIT_ULL(59)
-#define CAP_FL1GP_MASK BIT_ULL(56)
-#define CAP_RD_MASK BIT_ULL(55)
-#define CAP_WD_MASK BIT_ULL(54)
-#define CAP_MAMV_MASK GENMASK_ULL(53, 48)
-#define CAP_NFR_MASK GENMASK_ULL(47, 40)
-#define CAP_PSI_MASK BIT_ULL(39)
-#define CAP_SLLPS_MASK GENMASK_ULL(37, 34)
-#define CAP_FRO_MASK GENMASK_ULL(33, 24)
-#define CAP_ZLR_MASK BIT_ULL(22)
-#define CAP_MGAW_MASK GENMASK_ULL(21, 16)
-#define CAP_SAGAW_MASK GENMASK_ULL(12, 8)
-#define CAP_CM_MASK BIT_ULL(7)
-#define CAP_PHMR_MASK BIT_ULL(6)
-#define CAP_PLMR_MASK BIT_ULL(5)
-#define CAP_RWBF_MASK BIT_ULL(4)
-#define CAP_AFL_MASK BIT_ULL(3)
-#define CAP_NDOMS_MASK GENMASK_ULL(2, 0)
-
-/*
- * Extended Capability Register Mask
- */
-#define ECAP_RPS_MASK BIT_ULL(49)
-#define ECAP_SMPWC_MASK BIT_ULL(48)
-#define ECAP_FLTS_MASK BIT_ULL(47)
-#define ECAP_SLTS_MASK BIT_ULL(46)
-#define ECAP_SLADS_MASK BIT_ULL(45)
-#define ECAP_VCS_MASK BIT_ULL(44)
-#define ECAP_SMTS_MASK BIT_ULL(43)
-#define ECAP_PDS_MASK BIT_ULL(42)
-#define ECAP_DIT_MASK BIT_ULL(41)
-#define ECAP_PASID_MASK BIT_ULL(40)
-#define ECAP_PSS_MASK GENMASK_ULL(39, 35)
-#define ECAP_EAFS_MASK BIT_ULL(34)
-#define ECAP_NWFS_MASK BIT_ULL(33)
-#define ECAP_SRS_MASK BIT_ULL(31)
-#define ECAP_ERS_MASK BIT_ULL(30)
-#define ECAP_PRS_MASK BIT_ULL(29)
-#define ECAP_NEST_MASK BIT_ULL(26)
-#define ECAP_MTS_MASK BIT_ULL(25)
-#define ECAP_MHMV_MASK GENMASK_ULL(23, 20)
-#define ECAP_IRO_MASK GENMASK_ULL(17, 8)
-#define ECAP_SC_MASK BIT_ULL(7)
-#define ECAP_PT_MASK BIT_ULL(6)
-#define ECAP_EIM_MASK BIT_ULL(4)
-#define ECAP_DT_MASK BIT_ULL(2)
-#define ECAP_QI_MASK BIT_ULL(1)
-#define ECAP_C_MASK BIT_ULL(0)
-
-/*
- * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each
- * IOMMU gets audited.
- */
-#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \
-do { \
- if (cap##_##feature(a) != cap##_##feature(b)) { \
- intel_iommu_##cap##_sanity &= ~(MASK); \
- pr_info("IOMMU feature %s inconsistent", #feature); \
- } \
-} while (0)
-
-#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \
- DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK)
-
-#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \
-do { \
- if (cap##_##feature(intel_iommu_##cap##_sanity)) \
- DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \
- (b)->cap, cap, feature, MASK); \
-} while (0)
-
-#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \
-do { \
- u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \
- min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \
- intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \
- min_feature; \
-} while (0)
-
-#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \
-do { \
- if ((intel_iommu_##cap##_sanity & (MASK)) > \
- (cap##_##feature((iommu)->cap))) \
- mismatch = true; \
- else \
- (iommu)->cap = ((iommu)->cap & ~(MASK)) | \
- (intel_iommu_##cap##_sanity & (MASK)); \
-} while (0)
-
-enum cap_audit_type {
- CAP_AUDIT_STATIC_DMAR,
- CAP_AUDIT_STATIC_IRQR,
- CAP_AUDIT_HOTPLUG_DMAR,
- CAP_AUDIT_HOTPLUG_IRQR,
-};
-
-bool intel_cap_smts_sanity(void);
-bool intel_cap_pasid_sanity(void);
-bool intel_cap_nest_sanity(void);
-bool intel_cap_flts_sanity(void);
-bool intel_cap_slts_sanity(void);
-
-static inline bool scalable_mode_support(void)
-{
- return (intel_iommu_sm && intel_cap_smts_sanity());
-}
-
-static inline bool pasid_mode_support(void)
-{
- return scalable_mode_support() && intel_cap_pasid_sanity();
-}
-
-static inline bool nested_mode_support(void)
-{
- return scalable_mode_support() && intel_cap_nest_sanity();
-}
-
-int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 79e0da9eb626..85173fe0a18a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -29,7 +29,6 @@
#include "../irq_remapping.h"
#include "../iommu-pages.h"
#include "pasid.h"
-#include "cap_audit.h"
#include "perfmon.h"
#define ROOT_SIZE VTD_PAGE_SIZE
@@ -2118,10 +2117,6 @@ static int __init init_dmars(void)
struct intel_iommu *iommu;
int ret;
- ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
- if (ret)
- goto free_iommu;
-
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
iommu_disable_translation(iommu);
@@ -2617,10 +2612,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
struct intel_iommu *iommu = dmaru->iommu;
int ret;
- ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
- if (ret)
- goto out;
-
/*
* Disable translation if already enabled prior to OS handover.
*/
@@ -3250,10 +3241,15 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
return 0;
}
+static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old);
+
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
+ .set_dev_pasid = blocking_domain_set_dev_pasid,
}
};
@@ -4090,22 +4086,26 @@ void domain_remove_dev_pasid(struct iommu_domain *domain,
break;
}
}
- WARN_ON_ONCE(!dev_pasid);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
cache_tag_unassign_domain(dmar_domain, dev, pasid);
domain_detach_iommu(dmar_domain, iommu);
- intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
- kfree(dev_pasid);
+ if (!WARN_ON_ONCE(!dev_pasid)) {
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
+ kfree(dev_pasid);
+ }
}
-static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
- struct iommu_domain *domain)
+static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
- domain_remove_dev_pasid(domain, dev, pasid);
+ domain_remove_dev_pasid(old, dev, pasid);
+
+ return 0;
}
struct dev_pasid_info *
@@ -4445,21 +4445,6 @@ static struct iommu_domain identity_domain = {
},
};
-static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
- struct dmar_domain *dmar_domain;
- bool first_stage;
-
- first_stage = first_level_by_default(iommu);
- dmar_domain = paging_domain_alloc(dev, first_stage);
- if (IS_ERR(dmar_domain))
- return ERR_CAST(dmar_domain);
-
- return &dmar_domain->domain;
-}
-
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
@@ -4468,7 +4453,6 @@ const struct iommu_ops intel_iommu_ops = {
.hw_info = intel_iommu_hw_info,
.domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
.domain_alloc_sva = intel_svm_domain_alloc,
- .domain_alloc_paging = intel_iommu_domain_alloc_paging,
.domain_alloc_nested = intel_iommu_domain_alloc_nested,
.probe_device = intel_iommu_probe_device,
.release_device = intel_iommu_release_device,
@@ -4478,7 +4462,6 @@ const struct iommu_ops intel_iommu_ops = {
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
- .remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
.page_response = intel_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index f5402df72a9b..ad795c772f21 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -24,7 +24,6 @@
#include "iommu.h"
#include "../irq_remapping.h"
#include "../iommu-pages.h"
-#include "cap_audit.h"
enum irq_mode {
IRQ_REMAPPING,
@@ -727,9 +726,6 @@ static int __init intel_prepare_irq_remapping(void)
if (dmar_table_init() < 0)
return -ENODEV;
- if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL))
- return -ENODEV;
-
if (!dmar_ir_support())
return -ENODEV;
@@ -1533,10 +1529,6 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
int ret;
int eim = x2apic_enabled();
- ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu);
- if (ret)
- return ret;
-
if (eim && !ecap_eim_support(iommu->ecap)) {
pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
iommu->reg_phys, iommu->ecap);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 5b7d85f1e143..fb59a7d35958 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -244,11 +244,31 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
+ if (WARN_ON(!pte)) {
spin_unlock(&iommu->lock);
return;
}
+ if (!pasid_pte_is_present(pte)) {
+ if (!pasid_pte_is_fault_disabled(pte)) {
+ WARN_ON(READ_ONCE(pte->val[0]) != 0);
+ spin_unlock(&iommu->lock);
+ return;
+ }
+
+ /*
+ * When a PASID is used for SVA by a device, it's possible
+ * that the pasid entry is non-present with the Fault
+ * Processing Disabled bit set. Clear the pasid entry and
+ * drain the PRQ for the PASID before return.
+ */
+ pasid_clear_entry(pte);
+ spin_unlock(&iommu->lock);
+ intel_iommu_drain_pasid_prq(dev, pasid);
+
+ return;
+ }
+
did = pasid_get_domain_id(pte);
pgtt = pasid_pte_get_pgtt(pte);
intel_pasid_clear_entry(dev, pasid, fault_ignore);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 082f4fe20216..668d8ece6b14 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -73,6 +73,12 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte)
return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
}
+/* Get FPD(Fault Processing Disable) bit of a PASID table entry */
+static inline bool pasid_pte_is_fault_disabled(struct pasid_entry *pte)
+{
+ return READ_ONCE(pte->val[0]) & PASID_PTE_FPD;
+}
+
/* Get PGTT field of a PASID table entry */
static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte)
{
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 6b9bb58a414f..7632c80edea6 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -223,6 +223,34 @@ static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
return ptes_per_table - (i & (ptes_per_table - 1));
}
+/*
+ * Check if concatenated PGDs are mandatory according to Arm DDI0487 (K.a)
+ * 1) R_DXBSH: For 16KB, and 48-bit input size, use level 1 instead of 0.
+ * 2) R_SRKBC: After de-ciphering the table for PA size and valid initial lookup
+ * a) 40 bits PA size with 4K: use level 1 instead of level 0 (2 tables for ias = oas)
+ * b) 40 bits PA size with 16K: use level 2 instead of level 1 (16 tables for ias = oas)
+ * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas)
+ * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas)
+ */
+static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg,
+ struct arm_lpae_io_pgtable *data)
+{
+ unsigned int ias = cfg->ias;
+ unsigned int oas = cfg->oas;
+
+ /* Covers 1 and 2.d */
+ if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0))
+ return (oas == 48) || (ias == 48);
+
+ /* Covers 2.a and 2.c */
+ if ((ARM_LPAE_GRANULE(data) == SZ_4K) && (data->start_level == 0))
+ return (oas == 40) || (oas == 42);
+
+ /* Case 2.b */
+ return (ARM_LPAE_GRANULE(data) == SZ_16K) &&
+ (data->start_level == 1) && (oas == 40);
+}
+
static bool selftest_running = false;
static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -676,85 +704,107 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
data->start_level, ptep);
}
+struct io_pgtable_walk_data {
+ struct io_pgtable *iop;
+ void *data;
+ int (*visit)(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size);
+ unsigned long flags;
+ u64 addr;
+ const u64 end;
+};
+
+static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep,
+ int lvl);
+
+struct iova_to_phys_data {
+ arm_lpae_iopte pte;
+ int lvl;
+};
+
+static int visit_iova_to_phys(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size)
+{
+ struct iova_to_phys_data *data = walk_data->data;
+ data->pte = *ptep;
+ data->lvl = lvl;
+ return 0;
+}
+
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
unsigned long iova)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
- arm_lpae_iopte pte, *ptep = data->pgd;
- int lvl = data->start_level;
-
- do {
- /* Valid IOPTE pointer? */
- if (!ptep)
- return 0;
-
- /* Grab the IOPTE we're interested in */
- ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
- pte = READ_ONCE(*ptep);
-
- /* Valid entry? */
- if (!pte)
- return 0;
+ struct iova_to_phys_data d;
+ struct io_pgtable_walk_data walk_data = {
+ .data = &d,
+ .visit = visit_iova_to_phys,
+ .addr = iova,
+ .end = iova + 1,
+ };
+ int ret;
- /* Leaf entry? */
- if (iopte_leaf(pte, lvl, data->iop.fmt))
- goto found_translation;
+ ret = __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level);
+ if (ret)
+ return 0;
- /* Take it to the next level */
- ptep = iopte_deref(pte, data);
- } while (++lvl < ARM_LPAE_MAX_LEVELS);
+ iova &= (ARM_LPAE_BLOCK_SIZE(d.lvl, data) - 1);
+ return iopte_to_paddr(d.pte, data) | iova;
+}
- /* Ran out of page tables to walk */
+static int visit_pgtable_walk(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size)
+{
+ struct arm_lpae_io_pgtable_walk_data *data = walk_data->data;
+ data->ptes[lvl] = *ptep;
return 0;
-
-found_translation:
- iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
- return iopte_to_paddr(pte, data) | iova;
}
-struct io_pgtable_walk_data {
- struct iommu_dirty_bitmap *dirty;
- unsigned long flags;
- u64 addr;
- const u64 end;
-};
+static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
+ void *wd)
+{
+ struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_walk_data walk_data = {
+ .data = wd,
+ .visit = visit_pgtable_walk,
+ .addr = iova,
+ .end = iova + 1,
+ };
-static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
- struct io_pgtable_walk_data *walk_data,
- arm_lpae_iopte *ptep,
- int lvl);
+ return __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level);
+}
-static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data,
- struct io_pgtable_walk_data *walk_data,
- arm_lpae_iopte *ptep, int lvl)
+static int io_pgtable_visit(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep, int lvl)
{
struct io_pgtable *iop = &data->iop;
arm_lpae_iopte pte = READ_ONCE(*ptep);
- if (iopte_leaf(pte, lvl, iop->fmt)) {
- size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ int ret = walk_data->visit(walk_data, lvl, ptep, size);
+ if (ret)
+ return ret;
- if (iopte_writeable_dirty(pte)) {
- iommu_dirty_bitmap_record(walk_data->dirty,
- walk_data->addr, size);
- if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
- iopte_set_writeable_clean(ptep);
- }
+ if (iopte_leaf(pte, lvl, iop->fmt)) {
walk_data->addr += size;
return 0;
}
- if (WARN_ON(!iopte_table(pte, lvl)))
+ if (!iopte_table(pte, lvl)) {
return -EINVAL;
+ }
ptep = iopte_deref(pte, data);
- return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1);
+ return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1);
}
-static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
- struct io_pgtable_walk_data *walk_data,
- arm_lpae_iopte *ptep,
- int lvl)
+static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep,
+ int lvl)
{
u32 idx;
int max_entries, ret;
@@ -769,7 +819,7 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
(idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
- ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl);
+ ret = io_pgtable_visit(data, walk_data, ptep + idx, lvl);
if (ret)
return ret;
}
@@ -777,6 +827,23 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
return 0;
}
+static int visit_dirty(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size)
+{
+ struct iommu_dirty_bitmap *dirty = walk_data->data;
+
+ if (!iopte_leaf(*ptep, lvl, walk_data->iop->fmt))
+ return 0;
+
+ if (iopte_writeable_dirty(*ptep)) {
+ iommu_dirty_bitmap_record(dirty, walk_data->addr, size);
+ if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
+ iopte_set_writeable_clean(ptep);
+ }
+
+ return 0;
+}
+
static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
unsigned long iova, size_t size,
unsigned long flags,
@@ -785,7 +852,9 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
struct io_pgtable_walk_data walk_data = {
- .dirty = dirty,
+ .iop = &data->iop,
+ .data = dirty,
+ .visit = visit_dirty,
.flags = flags,
.addr = iova,
.end = iova + size,
@@ -800,7 +869,7 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
if (data->iop.fmt != ARM_64_LPAE_S1)
return -EINVAL;
- return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl);
+ return __arm_lpae_iopte_walk(data, &walk_data, ptep, lvl);
}
static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
@@ -882,6 +951,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
.unmap_pages = arm_lpae_unmap_pages,
.iova_to_phys = arm_lpae_iova_to_phys,
.read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
+ .pgtable_walk = arm_lpae_pgtable_walk,
};
return data;
@@ -1006,18 +1076,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
if (!data)
return NULL;
- /*
- * Concatenate PGDs at level 1 if possible in order to reduce
- * the depth of the stage-2 walk.
- */
- if (data->start_level == 0) {
- unsigned long pgd_pages;
-
- pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
- if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
- data->pgd_bits += data->bits_per_level;
- data->start_level++;
- }
+ if (arm_lpae_concat_mandatory(cfg, data)) {
+ if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) >
+ ARM_LPAE_S2_MAX_CONCAT_PAGES))
+ return NULL;
+ data->pgd_bits += data->bits_per_level;
+ data->start_level++;
}
/* VTCR */
@@ -1364,15 +1428,14 @@ static int __init arm_lpae_do_selftests(void)
SZ_64K | SZ_512M,
};
- static const unsigned int ias[] __initconst = {
+ static const unsigned int address_size[] __initconst = {
32, 36, 40, 42, 44, 48,
};
- int i, j, pass = 0, fail = 0;
+ int i, j, k, pass = 0, fail = 0;
struct device dev;
struct io_pgtable_cfg cfg = {
.tlb = &dummy_tlb_ops,
- .oas = 48,
.coherent_walk = true,
.iommu_dev = &dev,
};
@@ -1381,15 +1444,19 @@ static int __init arm_lpae_do_selftests(void)
set_dev_node(&dev, NUMA_NO_NODE);
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
- for (j = 0; j < ARRAY_SIZE(ias); ++j) {
- cfg.pgsize_bitmap = pgsize[i];
- cfg.ias = ias[j];
- pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
- pgsize[i], ias[j]);
- if (arm_lpae_run_tests(&cfg))
- fail++;
- else
- pass++;
+ for (j = 0; j < ARRAY_SIZE(address_size); ++j) {
+ /* Don't use ias > oas as it is not valid for stage-2. */
+ for (k = 0; k <= j; ++k) {
+ cfg.pgsize_bitmap = pgsize[i];
+ cfg.ias = address_size[k];
+ cfg.oas = address_size[j];
+ pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u OAS %u\n",
+ pgsize[i], cfg.ias, cfg.oas);
+ if (arm_lpae_run_tests(&cfg))
+ fail++;
+ else
+ pass++;
+ }
}
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 599030e1e890..870c3cdbd0f6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2819,7 +2819,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode)
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (!ops)
- return -EPROBE_DEFER;
+ return driver_deferred_probe_check_state(dev);
if (fwspec)
return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
@@ -3312,6 +3312,16 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
+ struct iommu_domain *domain)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_domain *blocked_domain = ops->blocked_domain;
+
+ WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain,
+ dev, pasid, domain));
+}
+
static int __iommu_set_group_pasid(struct iommu_domain *domain,
struct iommu_group *group, ioasid_t pasid)
{
@@ -3330,11 +3340,9 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
err_revert:
last_gdev = device;
for_each_group_device(group, device) {
- const struct iommu_ops *ops = dev_iommu_ops(device->dev);
-
if (device == last_gdev)
break;
- ops->remove_dev_pasid(device->dev, pasid, domain);
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
}
return ret;
}
@@ -3344,12 +3352,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
struct iommu_domain *domain)
{
struct group_device *device;
- const struct iommu_ops *ops;
- for_each_group_device(group, device) {
- ops = dev_iommu_ops(device->dev);
- ops->remove_dev_pasid(device->dev, pasid, domain);
- }
+ for_each_group_device(group, device)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
}
/*
@@ -3368,16 +3373,20 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
+ const struct iommu_ops *ops;
int ret;
- if (!domain->ops->set_dev_pasid)
- return -EOPNOTSUPP;
-
if (!group)
return -ENODEV;
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
- pasid == IOMMU_NO_PASID)
+ ops = dev_iommu_ops(dev);
+
+ if (!domain->ops->set_dev_pasid ||
+ !ops->blocked_domain ||
+ !ops->blocked_domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (ops != domain->owner || pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index ce40f0a419ea..2769e4544038 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -725,47 +725,32 @@ static int msm_iommu_probe(struct platform_device *pdev)
iommu->dev = &pdev->dev;
INIT_LIST_HEAD(&iommu->ctx_list);
- iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk");
+ iommu->pclk = devm_clk_get_prepared(iommu->dev, "smmu_pclk");
if (IS_ERR(iommu->pclk))
return dev_err_probe(iommu->dev, PTR_ERR(iommu->pclk),
"could not get smmu_pclk\n");
- ret = clk_prepare(iommu->pclk);
- if (ret)
- return dev_err_probe(iommu->dev, ret,
- "could not prepare smmu_pclk\n");
-
- iommu->clk = devm_clk_get(iommu->dev, "iommu_clk");
- if (IS_ERR(iommu->clk)) {
- clk_unprepare(iommu->pclk);
+ iommu->clk = devm_clk_get_prepared(iommu->dev, "iommu_clk");
+ if (IS_ERR(iommu->clk))
return dev_err_probe(iommu->dev, PTR_ERR(iommu->clk),
"could not get iommu_clk\n");
- }
-
- ret = clk_prepare(iommu->clk);
- if (ret) {
- clk_unprepare(iommu->pclk);
- return dev_err_probe(iommu->dev, ret, "could not prepare iommu_clk\n");
- }
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
iommu->base = devm_ioremap_resource(iommu->dev, r);
if (IS_ERR(iommu->base)) {
ret = dev_err_probe(iommu->dev, PTR_ERR(iommu->base), "could not get iommu base\n");
- goto fail;
+ return ret;
}
ioaddr = r->start;
iommu->irq = platform_get_irq(pdev, 0);
- if (iommu->irq < 0) {
- ret = -ENODEV;
- goto fail;
- }
+ if (iommu->irq < 0)
+ return -ENODEV;
ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val);
if (ret) {
dev_err(iommu->dev, "could not get ncb\n");
- goto fail;
+ return ret;
}
iommu->ncb = val;
@@ -780,8 +765,7 @@ static int msm_iommu_probe(struct platform_device *pdev)
if (!par) {
pr_err("Invalid PAR value detected\n");
- ret = -ENODEV;
- goto fail;
+ return -ENODEV;
}
ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL,
@@ -791,7 +775,7 @@ static int msm_iommu_probe(struct platform_device *pdev)
iommu);
if (ret) {
pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret);
- goto fail;
+ return ret;
}
list_add(&iommu->dev_node, &qcom_iommu_devices);
@@ -800,23 +784,19 @@ static int msm_iommu_probe(struct platform_device *pdev)
"msm-smmu.%pa", &ioaddr);
if (ret) {
pr_err("Could not add msm-smmu at %pa to sysfs\n", &ioaddr);
- goto fail;
+ return ret;
}
ret = iommu_device_register(&iommu->iommu, &msm_iommu_ops, &pdev->dev);
if (ret) {
pr_err("Could not register msm-smmu at %pa\n", &ioaddr);
- goto fail;
+ return ret;
}
pr_info("device mapped at %p, irq %d with %d ctx banks\n",
iommu->base, iommu->irq, iommu->ncb);
return ret;
-fail:
- clk_unprepare(iommu->clk);
- clk_unprepare(iommu->pclk);
- return ret;
}
static const struct of_device_id msm_iommu_dt_match[] = {
@@ -824,20 +804,11 @@ static const struct of_device_id msm_iommu_dt_match[] = {
{}
};
-static void msm_iommu_remove(struct platform_device *pdev)
-{
- struct msm_iommu_dev *iommu = platform_get_drvdata(pdev);
-
- clk_unprepare(iommu->clk);
- clk_unprepare(iommu->pclk);
-}
-
static struct platform_driver msm_iommu_driver = {
.driver = {
.name = "msm_iommu",
.of_match_table = msm_iommu_dt_match,
},
.probe = msm_iommu_probe,
- .remove = msm_iommu_remove,
};
builtin_platform_driver(msm_iommu_driver);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index ab60901f8f92..034b0e670384 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/soc/mediatek/infracfg.h>
#include <linux/soc/mediatek/mtk_sip_svc.h>
+#include <linux/string_choices.h>
#include <asm/barrier.h>
#include <soc/mediatek/smi.h>
@@ -510,7 +511,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
bank->parent_dev,
"fault type=0x%x iova=0x%llx pa=0x%llx master=0x%x(larb=%d port=%d) layer=%d %s\n",
int_state, fault_iova, fault_pa, regval, fault_larb, fault_port,
- layer, write ? "write" : "read");
+ layer, str_write_read(write));
}
/* Interrupt clear */
@@ -602,7 +603,7 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev,
larb_mmu->bank[portid] = upper_32_bits(region->iova_base);
dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n",
- enable ? "enable" : "disable", dev_name(larb_mmu->dev),
+ str_enable_disable(enable), dev_name(larb_mmu->dev),
portid_msk, regionid, upper_32_bits(region->iova_base));
if (enable)
@@ -630,8 +631,8 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev,
}
if (ret)
dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n",
- enable ? "enable" : "disable",
- dev_name(data->dev), portid_msk, ret);
+ str_enable_disable(enable), dev_name(data->dev),
+ portid_msk, ret);
}
return ret;
}
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index b6de1ca00cef..a565b9e40f4a 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -25,6 +25,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/string_choices.h>
#include <asm/barrier.h>
#include <asm/dma-iommu.h>
#include <dt-bindings/memory/mtk-memory-port.h>
@@ -243,7 +244,7 @@ static void mtk_iommu_v1_config(struct mtk_iommu_v1_data *data,
larb_mmu = &data->larb_imu[larbid];
dev_dbg(dev, "%s iommu port: %d\n",
- enable ? "enable" : "disable", portid);
+ str_enable_disable(enable), portid);
if (enable)
larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e7a6a1611d19..97987cd78da9 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -29,8 +29,6 @@ static int of_iommu_xlate(struct device *dev,
return -ENODEV;
ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np));
- if (ret == -EPROBE_DEFER)
- return driver_deferred_probe_check_state(dev);
if (ret)
return ret;
diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c
index c7a89143014c..d82d2b00904c 100644
--- a/drivers/iommu/riscv/iommu-pci.c
+++ b/drivers/iommu/riscv/iommu-pci.c
@@ -101,6 +101,13 @@ static void riscv_iommu_pci_remove(struct pci_dev *pdev)
riscv_iommu_remove(iommu);
}
+static void riscv_iommu_pci_shutdown(struct pci_dev *pdev)
+{
+ struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev);
+
+ riscv_iommu_disable(iommu);
+}
+
static const struct pci_device_id riscv_iommu_pci_tbl[] = {
{PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), 0},
{PCI_VDEVICE(RIVOS, PCI_DEVICE_ID_RIVOS_RISCV_IOMMU_GA), 0},
@@ -112,6 +119,7 @@ static struct pci_driver riscv_iommu_pci_driver = {
.id_table = riscv_iommu_pci_tbl,
.probe = riscv_iommu_pci_probe,
.remove = riscv_iommu_pci_remove,
+ .shutdown = riscv_iommu_pci_shutdown,
.driver = {
.suppress_bind_attrs = true,
},
diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c
index 382ba2841849..725e919b97ef 100644
--- a/drivers/iommu/riscv/iommu-platform.c
+++ b/drivers/iommu/riscv/iommu-platform.c
@@ -11,18 +11,43 @@
*/
#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include "iommu-bits.h"
#include "iommu.h"
+static void riscv_iommu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct riscv_iommu_device *iommu = dev_get_drvdata(dev);
+ u16 idx = desc->msi_index;
+ u64 addr;
+
+ addr = ((u64)msg->address_hi << 32) | msg->address_lo;
+
+ if (addr != (addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR)) {
+ dev_err_once(dev,
+ "uh oh, the IOMMU can't send MSIs to 0x%llx, sending to 0x%llx instead\n",
+ addr, addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR);
+ }
+
+ addr &= RISCV_IOMMU_MSI_CFG_TBL_ADDR;
+
+ riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_ADDR(idx), addr);
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_DATA(idx), msg->data);
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_CTRL(idx), 0);
+}
+
static int riscv_iommu_platform_probe(struct platform_device *pdev)
{
+ enum riscv_iommu_igs_settings igs;
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu = NULL;
struct resource *res = NULL;
- int vec;
+ int vec, ret;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -40,16 +65,6 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);
iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
- /* For now we only support WSI */
- switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) {
- case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
- case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
- break;
- default:
- return dev_err_probe(dev, -ENODEV,
- "unable to use wire-signaled interrupts\n");
- }
-
iommu->irqs_count = platform_irq_count(pdev);
if (iommu->irqs_count <= 0)
return dev_err_probe(dev, -ENODEV,
@@ -57,13 +72,58 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
- for (vec = 0; vec < iommu->irqs_count; vec++)
- iommu->irqs[vec] = platform_get_irq(pdev, vec);
+ igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps);
+ switch (igs) {
+ case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
+ case RISCV_IOMMU_CAPABILITIES_IGS_MSI:
+ if (is_of_node(dev->fwnode))
+ of_msi_configure(dev, to_of_node(dev->fwnode));
+
+ if (!dev_get_msi_domain(dev)) {
+ dev_warn(dev, "failed to find an MSI domain\n");
+ goto msi_fail;
+ }
+
+ ret = platform_device_msi_init_and_alloc_irqs(dev, iommu->irqs_count,
+ riscv_iommu_write_msi_msg);
+ if (ret) {
+ dev_warn(dev, "failed to allocate MSIs\n");
+ goto msi_fail;
+ }
+
+ for (vec = 0; vec < iommu->irqs_count; vec++)
+ iommu->irqs[vec] = msi_get_virq(dev, vec);
+
+ /* Enable message-signaled interrupts, fctl.WSI */
+ if (iommu->fctl & RISCV_IOMMU_FCTL_WSI) {
+ iommu->fctl ^= RISCV_IOMMU_FCTL_WSI;
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
+ }
+
+ dev_info(dev, "using MSIs\n");
+ break;
+
+msi_fail:
+ if (igs != RISCV_IOMMU_CAPABILITIES_IGS_BOTH) {
+ return dev_err_probe(dev, -ENODEV,
+ "unable to use wire-signaled interrupts\n");
+ }
- /* Enable wire-signaled interrupts, fctl.WSI */
- if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
- iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
- riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
+ fallthrough;
+
+ case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
+ for (vec = 0; vec < iommu->irqs_count; vec++)
+ iommu->irqs[vec] = platform_get_irq(pdev, vec);
+
+ /* Enable wire-signaled interrupts, fctl.WSI */
+ if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
+ iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
+ }
+ dev_info(dev, "using wire-signaled interrupts\n");
+ break;
+ default:
+ return dev_err_probe(dev, -ENODEV, "invalid IGS\n");
}
return riscv_iommu_init(iommu);
@@ -71,7 +131,18 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
static void riscv_iommu_platform_remove(struct platform_device *pdev)
{
- riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
+ struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev);
+ bool msi = !(iommu->fctl & RISCV_IOMMU_FCTL_WSI);
+
+ riscv_iommu_remove(iommu);
+
+ if (msi)
+ platform_device_msi_free_irqs_all(&pdev->dev);
+};
+
+static void riscv_iommu_platform_shutdown(struct platform_device *pdev)
+{
+ riscv_iommu_disable(dev_get_drvdata(&pdev->dev));
};
static const struct of_device_id riscv_iommu_of_match[] = {
@@ -82,6 +153,7 @@ static const struct of_device_id riscv_iommu_of_match[] = {
static struct platform_driver riscv_iommu_platform_driver = {
.probe = riscv_iommu_platform_probe,
.remove = riscv_iommu_platform_remove,
+ .shutdown = riscv_iommu_platform_shutdown,
.driver = {
.name = "riscv,iommu",
.of_match_table = riscv_iommu_of_match,
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 8a05def774bd..8f049d4a0e2c 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -240,6 +240,12 @@ static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu,
return rc;
}
+ /* Empty queue before enabling it */
+ if (queue->qid == RISCV_IOMMU_INTR_CQ)
+ riscv_iommu_writel(queue->iommu, Q_TAIL(queue), 0);
+ else
+ riscv_iommu_writel(queue->iommu, Q_HEAD(queue), 0);
+
/*
* Enable queue with interrupts, clear any memory fault if any.
* Wait for the hardware to acknowledge request and activate queue
@@ -645,9 +651,11 @@ static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iomm
* This is best effort IOMMU translation shutdown flow.
* Disable IOMMU without waiting for hardware response.
*/
-static void riscv_iommu_disable(struct riscv_iommu_device *iommu)
+void riscv_iommu_disable(struct riscv_iommu_device *iommu)
{
- riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, 0);
+ riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP,
+ FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE,
+ RISCV_IOMMU_DDTP_IOMMU_MODE_BARE));
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQCSR, 0);
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FQCSR, 0);
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0);
@@ -1270,7 +1278,7 @@ static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
dma_addr_t iova)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
- unsigned long pte_size;
+ size_t pte_size;
unsigned long *ptr;
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index b1c4664542b4..46df79dd5495 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -64,6 +64,7 @@ struct riscv_iommu_device {
int riscv_iommu_init(struct riscv_iommu_device *iommu);
void riscv_iommu_remove(struct riscv_iommu_device *iommu);
+void riscv_iommu_disable(struct riscv_iommu_device *iommu);
#define riscv_iommu_readl(iommu, addr) \
readl_relaxed((iommu)->reg + (addr))
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4b369419b32c..323cc665c357 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -25,6 +25,7 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/string_choices.h>
#include "iommu-pages.h"
@@ -611,7 +612,7 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
dev_err(iommu->dev, "Page fault at %pad of type %s\n",
&iova,
- (flags == IOMMU_FAULT_WRITE) ? "write" : "read");
+ str_write_read(flags == IOMMU_FAULT_WRITE));
log_iova(iommu, i, iova);
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index c637e0997f6d..abec23c7744f 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -50,6 +50,11 @@ struct adreno_smmu_fault_info {
* the GPU driver must call resume_translation()
* @resume_translation: Resume translation after a fault
*
+ * @set_prr_bit: [optional] Configure the GPU's Partially Resident
+ * Region (PRR) bit in the ACTLR register.
+ * @set_prr_addr: [optional] Configure the PRR_CFG_*ADDR register with
+ * the physical address of PRR page passed from GPU
+ * driver.
*
* The GPU driver (drm/msm) and adreno-smmu work together for controlling
* the GPU's SMMU instance. This is by necessity, as the GPU is directly
@@ -67,6 +72,8 @@ struct adreno_smmu_priv {
void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
void (*set_stall)(const void *cookie, bool enabled);
void (*resume_translation)(const void *cookie, bool terminate);
+ void (*set_prr_bit)(const void *cookie, bool set);
+ void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
};
#endif /* __ADRENO_SMMU_PRIV_H */
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 2b90c48a6a87..062fbd4c9b77 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -31,11 +31,11 @@ struct amd_iommu_pi_data {
struct task_struct;
struct pci_dev;
-extern int amd_iommu_detect(void);
+extern void amd_iommu_detect(void);
#else /* CONFIG_AMD_IOMMU */
-static inline int amd_iommu_detect(void) { return -ENODEV; }
+static inline void amd_iommu_detect(void) { }
#endif /* CONFIG_AMD_IOMMU */
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ce86b09ae80f..bba2a51c87d2 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -181,11 +181,21 @@ struct io_pgtable_cfg {
};
/**
+ * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk
+ *
+ * @ptes: The recorded PTE values from the walk
+ */
+struct arm_lpae_io_pgtable_walk_data {
+ u64 ptes[4];
+};
+
+/**
* struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
*
* @map_pages: Map a physically contiguous range of pages of the same size.
* @unmap_pages: Unmap a range of virtually contiguous pages of the same size.
* @iova_to_phys: Translate iova to physical address.
+ * @pgtable_walk: (optional) Perform a page table walk for a given iova.
*
* These functions map directly onto the iommu_ops member functions with
* the same names.
@@ -199,6 +209,7 @@ struct io_pgtable_ops {
struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
+ int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd);
int (*read_and_clear_dirty)(struct io_pgtable_ops *ops,
unsigned long iova, size_t size,
unsigned long flags,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 318d27841130..38c65e92ecd0 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -587,9 +587,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
* - IOMMU_DOMAIN_DMA: must use a dma domain
* - 0: use the default setting
* @default_domain_ops: the default ops for domains
- * @remove_dev_pasid: Remove any translation configurations of a specific
- * pasid, so that any DMA transactions with this pasid
- * will be blocked by the hardware.
* @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
* the @dev, as the set of virtualization resources shared/passed
* to user space IOMMU instance. And associate it with a nesting
@@ -647,8 +644,6 @@ struct iommu_ops {
struct iommu_page_response *msg);
int (*def_domain_type)(struct device *dev);
- void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
- struct iommu_domain *domain);
struct iommufd_viommu *(*viommu_alloc)(
struct device *dev, struct iommu_domain *parent_domain,