diff options
43 files changed, 1304 insertions, 1036 deletions
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index b42fea07c5ce..f074f6219f5c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -198,7 +198,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | MMU-500 | #841119,826419 | N/A | +| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA| +| | | #562869,1047329 | | +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-600 | #1076982,1209401| N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index c1e11bc6b7a0..032fdc27127b 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -61,6 +61,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,sm8750-smmu-500 - qcom,x1e80100-smmu-500 - const: qcom,smmu-500 - const: arm,mmu-500 @@ -88,6 +89,7 @@ properties: items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qcs615-smmu-500 - qcom,sa8255p-smmu-500 - qcom,sa8775p-smmu-500 - qcom,sar2130p-smmu-500 @@ -102,6 +104,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,sm8750-smmu-500 - qcom,x1e80100-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 @@ -122,6 +125,7 @@ properties: - qcom,msm8996-smmu-v2 - qcom,sc7180-smmu-v2 - qcom,sdm630-smmu-v2 + - qcom,sdm670-smmu-v2 - qcom,sdm845-smmu-v2 - qcom,sm6350-smmu-v2 - qcom,sm7150-smmu-v2 @@ -474,6 +478,7 @@ allOf: items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qcs615-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6125-smmu-500 - const: qcom,adreno-smmu @@ -550,6 +555,23 @@ allOf: - description: GPU SNoC bus clock - description: GPU AHB clock + - if: + properties: + compatible: + items: + - const: qcom,sm8750-smmu-500 + - const: qcom,adreno-smmu + - const: qcom,smmu-500 + - const: arm,mmu-500 + then: + properties: + clock-names: + items: + - const: hlos + clocks: + items: + - description: HLOS vote clock + # Disallow clocks for all other platforms with specific compatibles - if: properties: @@ -559,7 +581,6 @@ allOf: - cavium,smmu-v2 - marvell,ap806-smmu-500 - nvidia,smmu-500 - - qcom,qcs615-smmu-500 - qcom,qcs8300-smmu-500 - qcom,qdu1000-smmu-500 - qcom,sa8255p-smmu-500 diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index f8cebc9e8cd9..5ae9a628261f 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -21,6 +21,7 @@ properties: - items: - enum: - qcom,msm8916-iommu + - qcom,msm8917-iommu - qcom,msm8953-iommu - const: qcom,msm-iommu-v1 - items: diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml index 621dde0e45d8..6ce41d11ff5e 100644 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -25,6 +25,7 @@ properties: - rockchip,rk3568-iommu - items: - enum: + - rockchip,rk3576-iommu - rockchip,rk3588-iommu - const: rockchip,rk3568-iommu diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 47c46e4b739e..ec1b5e32b972 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -367,6 +367,18 @@ config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT 'arm-smmu.disable_bypass' will continue to override this config. +config ARM_SMMU_MMU_500_CPRE_ERRATA + bool "Enable errata workaround for CPRE in SMMU reset path" + depends on ARM_SMMU + default y + help + Say Y here (by default) to apply workaround to disable + MMU-500's next-page prefetcher for sake of 4 known errata. + + Say N here only when it is sure that any errata related to + prefetch enablement are not applicable on the platform. + Refer silicon-errata.rst for info on errata IDs. + config ARM_SMMU_QCOM def_tristate y depends on ARM_SMMU && ARCH_QCOM diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1bef5d55b2f9..68debf5ee2d7 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -16,7 +16,6 @@ irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_galog(int irq, void *data); irqreturn_t amd_iommu_int_handler(int irq, void *data); -void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, u8 cntrl_intr, u8 cntrl_log, u32 status_run_mask, u32 status_overflow_mask); @@ -41,13 +40,13 @@ void amd_iommu_disable(void); int amd_iommu_reenable(int mode); int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; -extern enum io_pgtable_fmt amd_iommu_pgtable; +extern enum protection_domain_mode amd_iommu_pgtable; extern int amd_iommu_gpt_level; extern unsigned long amd_iommu_pgsize_bitmap; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); -struct protection_domain *protection_domain_alloc(unsigned int type, int nid); +struct protection_domain *protection_domain_alloc(void); void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); @@ -89,7 +88,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); */ void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); -void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, @@ -184,3 +182,6 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain, struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); #endif + +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); +struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index fdb0357e0bb9..0bbda60d3cdc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -220,6 +220,8 @@ #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 +#define DTE_DATA1_SYSMGT_MASK GENMASK_ULL(41, 40) + #define DEV_ENTRY_IRQ_TBL_EN 0x80 #define DEV_ENTRY_INIT_PASS 0xb8 #define DEV_ENTRY_EINT_PASS 0xb9 @@ -407,8 +409,7 @@ #define DTE_FLAG_HAD (3ULL << 7) #define DTE_FLAG_GIOV BIT_ULL(54) #define DTE_FLAG_GV BIT_ULL(55) -#define DTE_GLX_SHIFT (56) -#define DTE_GLX_MASK (3) +#define DTE_GLX GENMASK_ULL(57, 56) #define DTE_FLAG_IR BIT_ULL(61) #define DTE_FLAG_IW BIT_ULL(62) @@ -416,18 +417,18 @@ #define DTE_FLAG_MASK (0x3ffULL << 32) #define DEV_DOMID_MASK 0xffffULL -#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) -#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) -#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) - -#define DTE_GCR3_SHIFT_A 58 -#define DTE_GCR3_SHIFT_B 16 -#define DTE_GCR3_SHIFT_C 43 +#define DTE_GCR3_14_12 GENMASK_ULL(60, 58) +#define DTE_GCR3_30_15 GENMASK_ULL(31, 16) +#define DTE_GCR3_51_31 GENMASK_ULL(63, 43) #define DTE_GPT_LEVEL_SHIFT 54 +#define DTE_GPT_LEVEL_MASK GENMASK_ULL(55, 54) #define GCR3_VALID 0x01ULL +/* DTE[128:179] | DTE[184:191] */ +#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52) + #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) #define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD) @@ -468,7 +469,7 @@ extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ do { \ if (amd_iommu_dump) \ - pr_info("AMD-Vi: " format, ## arg); \ + pr_info(format, ## arg); \ } while(0); /* global flag if IOMMUs cache non-present entries */ @@ -516,6 +517,9 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \ list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list) +#define for_each_ivhd_dte_flags(entry) \ + list_for_each_entry((entry), &amd_ivhd_dev_flags_list, list) + struct amd_iommu; struct iommu_domain; struct irq_domain; @@ -837,6 +841,7 @@ struct devid_map { struct iommu_dev_data { /*Protect against attach/detach races */ struct mutex mutex; + spinlock_t dte_lock; /* DTE lock for 256-bit access */ struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ @@ -881,7 +886,21 @@ extern struct list_head amd_iommu_list; * Structure defining one entry in the device table */ struct dev_table_entry { - u64 data[4]; + union { + u64 data[4]; + u128 data128[2]; + }; +}; + +/* + * Structure to sture persistent DTE flags from IVHD + */ +struct ivhd_dte_flags { + struct list_head list; + u16 segid; + u16 devid_first; + u16 devid_last; + struct dev_table_entry dte; }; /* diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 614f216215ea..c5cd92edada0 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -152,7 +152,7 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; -enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; +enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; /* Guest page table level */ int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; @@ -174,8 +174,8 @@ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ -LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the - system */ +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ /* Number of IOMMUs present in the system */ static int amd_iommus_present; @@ -984,36 +984,12 @@ static void iommu_enable_gt(struct amd_iommu *iommu) } /* sets a specific bit in the device table entry. */ -static void __set_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) +static void set_dte_bit(struct dev_table_entry *dte, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - dev_table[devid].data[i] |= (1UL << _bit); -} - -static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) -{ - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __set_dev_entry_bit(dev_table, devid, bit); -} - -static int __get_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) -{ - int i = (bit >> 6) & 0x03; - int _bit = bit & 0x3f; - - return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; -} - -static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) -{ - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __get_dev_entry_bit(dev_table, devid, bit); + dte->data[i] |= (1UL << _bit); } static bool __copy_device_table(struct amd_iommu *iommu) @@ -1081,11 +1057,9 @@ static bool __copy_device_table(struct amd_iommu *iommu) } /* If gcr3 table existed, mask it out */ if (old_devtb[devid].data[0] & DTE_FLAG_GV) { - tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; - tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31); pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; - tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; - tmp |= DTE_FLAG_GV; + tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV); pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; } } @@ -1136,42 +1110,107 @@ static bool copy_device_table(void) return true; } -void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) { - int sysmgt; + struct ivhd_dte_flags *e; + unsigned int best_len = UINT_MAX; + struct dev_table_entry *dte = NULL; - sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); + for_each_ivhd_dte_flags(e) { + /* + * Need to go through the whole list to find the smallest range, + * which contains the devid. + */ + if ((e->segid == segid) && + (e->devid_first <= devid) && (devid <= e->devid_last)) { + unsigned int len = e->devid_last - e->devid_first; + + if (len < best_len) { + dte = &(e->dte); + best_len = len; + } + } + } + return dte; +} + +static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) +{ + struct ivhd_dte_flags *e; - if (sysmgt == 0x01) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); + for_each_ivhd_dte_flags(e) { + if ((e->segid == segid) && + (e->devid_first == first) && + (e->devid_last == last)) + return true; + } + return false; } /* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information */ -static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, - u16 devid, u32 flags, u32 ext_flags) +static void __init +set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, + u32 flags, u32 ext_flags) { - if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); - if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); - if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); - if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); - if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); - if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); - if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); + int i; + struct dev_table_entry dte = {}; - amd_iommu_apply_erratum_63(iommu, devid); + /* Parse IVHD DTE setting flags and store information */ + if (flags) { + struct ivhd_dte_flags *d; - amd_iommu_set_rlookup_table(iommu, devid); + if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) + return; + + d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL); + if (!d) + return; + + pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); + + if (flags & ACPI_DEVFLAG_INITPASS) + set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); + + /* Apply erratum 63, which needs info in initial_dte */ + if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) + dte.data[0] |= DTE_FLAG_IW; + + memcpy(&d->dte, &dte, sizeof(dte)); + d->segid = iommu->pci_seg->id; + d->devid_first = first; + d->devid_last = last; + list_add_tail(&d->list, &amd_ivhd_dev_flags_list); + } + + for (i = first; i <= last; i++) { + if (flags) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + + memcpy(&dev_table[i], &dte, sizeof(dte)); + } + amd_iommu_set_rlookup_table(iommu, i); + } +} + +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) +{ + set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); } int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) @@ -1239,7 +1278,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, entry->cmd_line = cmd_line; entry->root_devid = (entry->devid & (~0x7)); - pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", + pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", entry->cmd_line ? "cmd" : "ivrs", entry->hid, entry->uid, entry->root_devid); @@ -1331,15 +1370,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - - for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); + DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); + set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x\n", + DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1350,8 +1386,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_SELECT_RANGE_START: - DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %04x:%02x:%02x.%x flags: %02x\n", + DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1364,8 +1399,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x devid_to: %02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1382,9 +1416,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS_RANGE: - DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %04x:%02x:%02x.%x flags: %02x " - "devid_to: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1401,8 +1433,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1414,8 +1445,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT_RANGE: - DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1428,21 +1458,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) { + if (alias) pci_seg->alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - devid_to, flags, ext_flags); - } - set_dev_entry_from_acpi(iommu, dev_i, - flags, ext_flags); } + set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); + set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); break; case IVHD_DEV_SPECIAL: { u8 handle, type; @@ -1461,11 +1488,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", var, (int)handle, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); ret = add_special_device(type, handle, &devid, false); if (ret) @@ -1525,11 +1553,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, } devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); flags = e->flags; @@ -1757,13 +1786,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling it. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + /* GAM requires GA mode. */ + if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1773,13 +1797,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * XT, GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling them. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { + /* XT and GAM require GA mode. */ + if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; } @@ -2145,7 +2164,7 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } - if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (amd_iommu_pgtable == PD_MODE_V2) { pr_info("V2 page table enabled (Paging mode : %d level)\n", amd_iommu_gpt_level); } @@ -2575,9 +2594,9 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); if (!amd_iommu_snp_en) - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); + set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); } } @@ -2605,8 +2624,7 @@ static void init_device_table(void) for_each_pci_segment(pci_seg) { for (devid = 0; devid <= pci_seg->last_bdf; ++devid) - __set_dev_entry_bit(pci_seg->dev_table, - devid, DEV_ENTRY_IRQ_TBL_EN); + set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); } } @@ -3033,6 +3051,11 @@ static int __init early_amd_iommu_init(void) return -EINVAL; } + if (!boot_cpu_has(X86_FEATURE_CX16)) { + pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); + return -EINVAL; + } + /* * Validate checksum here so we don't need to do it when * we actually parse the table @@ -3059,10 +3082,10 @@ static int __init early_amd_iommu_init(void) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; - if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } } @@ -3185,7 +3208,7 @@ static void iommu_snp_enable(void) goto disable_snp; } - if (amd_iommu_pgtable != AMD_IOMMU_V1) { + if (amd_iommu_pgtable != PD_MODE_V1) { pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); goto disable_snp; } @@ -3398,25 +3421,23 @@ static bool amd_iommu_sme_check(void) * IOMMUs * ****************************************************************************/ -int __init amd_iommu_detect(void) +void __init amd_iommu_detect(void) { int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return -ENODEV; + return; if (!amd_iommu_sme_check()) - return -ENODEV; + return; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return ret; + return; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; - - return 1; } /**************************************************************************** @@ -3464,9 +3485,9 @@ static int __init parse_amd_iommu_options(char *str) } else if (strncmp(str, "force_isolation", 15) == 0) { amd_iommu_force_isolation = true; } else if (strncmp(str, "pgtbl_v1", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } else if (strncmp(str, "pgtbl_v2", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V2; + amd_iommu_pgtable = PD_MODE_V2; } else if (strncmp(str, "irtcachedis", 11) == 0) { amd_iommu_irtcachedis = true; } else if (strncmp(str, "nohugepages", 11) == 0) { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5aaeda77eef2..b48a72bd7b23 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -83,12 +83,142 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, static void set_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data); +static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid); + +static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid); + /**************************************************************************** * * Helper functions * ****************************************************************************/ +static __always_inline void amd_iommu_atomic128_set(__int128 *ptr, __int128 val) +{ + /* + * Note: + * We use arch_cmpxchg128_local() because: + * - Need cmpxchg16b instruction mainly for 128-bit store to DTE + * (not necessary for cmpxchg since this function is already + * protected by a spin_lock for this DTE). + * - Neither need LOCK_PREFIX nor try loop because of the spin_lock. + */ + arch_cmpxchg128_local(ptr, *ptr, val); +} + +static void write_dte_upper128(struct dev_table_entry *ptr, struct dev_table_entry *new) +{ + struct dev_table_entry old; + + old.data128[1] = ptr->data128[1]; + /* + * Preserve DTE_DATA2_INTR_MASK. This needs to be + * done here since it requires to be inside + * spin_lock(&dev_data->dte_lock) context. + */ + new->data[2] &= ~DTE_DATA2_INTR_MASK; + new->data[2] |= old.data[2] & DTE_DATA2_INTR_MASK; + + amd_iommu_atomic128_set(&ptr->data128[1], new->data128[1]); +} + +static void write_dte_lower128(struct dev_table_entry *ptr, struct dev_table_entry *new) +{ + amd_iommu_atomic128_set(&ptr->data128[0], new->data128[0]); +} + +/* + * Note: + * IOMMU reads the entire Device Table entry in a single 256-bit transaction + * but the driver is programming DTE using 2 128-bit cmpxchg. So, the driver + * need to ensure the following: + * - DTE[V|GV] bit is being written last when setting. + * - DTE[V|GV] bit is being written first when clearing. + * + * This function is used only by code, which updates DMA translation part of the DTE. + * So, only consider control bits related to DMA when updating the entry. + */ +static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data, + struct dev_table_entry *new) +{ + unsigned long flags; + struct dev_table_entry *dev_table = get_dev_table(iommu); + struct dev_table_entry *ptr = &dev_table[dev_data->devid]; + + spin_lock_irqsave(&dev_data->dte_lock, flags); + + if (!(ptr->data[0] & DTE_FLAG_V)) { + /* Existing DTE is not valid. */ + write_dte_upper128(ptr, new); + write_dte_lower128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (!(new->data[0] & DTE_FLAG_V)) { + /* Existing DTE is valid. New DTE is not valid. */ + write_dte_lower128(ptr, new); + write_dte_upper128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (!FIELD_GET(DTE_FLAG_GV, ptr->data[0])) { + /* + * Both DTEs are valid. + * Existing DTE has no guest page table. + */ + write_dte_upper128(ptr, new); + write_dte_lower128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (!FIELD_GET(DTE_FLAG_GV, new->data[0])) { + /* + * Both DTEs are valid. + * Existing DTE has guest page table, + * new DTE has no guest page table, + */ + write_dte_lower128(ptr, new); + write_dte_upper128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (FIELD_GET(DTE_GPT_LEVEL_MASK, ptr->data[2]) != + FIELD_GET(DTE_GPT_LEVEL_MASK, new->data[2])) { + /* + * Both DTEs are valid and have guest page table, + * but have different number of levels. So, we need + * to upadte both upper and lower 128-bit value, which + * require disabling and flushing. + */ + struct dev_table_entry clear = {}; + + /* First disable DTE */ + write_dte_lower128(ptr, &clear); + iommu_flush_dte_sync(iommu, dev_data->devid); + + /* Then update DTE */ + write_dte_upper128(ptr, new); + write_dte_lower128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else { + /* + * Both DTEs are valid and have guest page table, + * and same number of levels. We just need to only + * update the lower 128-bit. So no need to disable DTE. + */ + write_dte_lower128(ptr, new); + } + + spin_unlock_irqrestore(&dev_data->dte_lock, flags); +} + +static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data, + struct dev_table_entry *dte) +{ + unsigned long flags; + struct dev_table_entry *ptr; + struct dev_table_entry *dev_table = get_dev_table(iommu); + + ptr = &dev_table[dev_data->devid]; + + spin_lock_irqsave(&dev_data->dte_lock, flags); + dte->data128[0] = ptr->data128[0]; + dte->data128[1] = ptr->data128[1]; + spin_unlock_irqrestore(&dev_data->dte_lock, flags); +} + static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) { return (pdom && (pdom->pd_mode == PD_MODE_V2)); @@ -209,6 +339,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) return NULL; mutex_init(&dev_data->mutex); + spin_lock_init(&dev_data->dte_lock); dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); @@ -216,7 +347,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) return dev_data; } -static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) +struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; struct llist_node *node; @@ -236,9 +367,11 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { + struct dev_table_entry new; struct amd_iommu *iommu; - struct dev_table_entry *dev_table; + struct iommu_dev_data *dev_data, *alias_data; u16 devid = pci_dev_id(pdev); + int ret = 0; if (devid == alias) return 0; @@ -247,13 +380,27 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) if (!iommu) return 0; - amd_iommu_set_rlookup_table(iommu, alias); - dev_table = get_dev_table(iommu); - memcpy(dev_table[alias].data, - dev_table[devid].data, - sizeof(dev_table[alias].data)); + /* Copy the data from pdev */ + dev_data = dev_iommu_priv_get(&pdev->dev); + if (!dev_data) { + pr_err("%s : Failed to get dev_data for 0x%x\n", __func__, devid); + ret = -EINVAL; + goto out; + } + get_dte256(iommu, dev_data, &new); - return 0; + /* Setup alias */ + alias_data = find_dev_data(iommu, alias); + if (!alias_data) { + pr_err("%s : Failed to get alias dev_data for 0x%x\n", __func__, alias); + ret = -EINVAL; + goto out; + } + update_dte256(iommu, alias_data, &new); + + amd_iommu_set_rlookup_table(iommu, alias); +out: + return ret; } static void clone_aliases(struct amd_iommu *iommu, struct device *dev) @@ -526,6 +673,12 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) return -ENOMEM; dev_data->dev = dev; + + /* + * The dev_iommu_priv_set() needes to be called before setup_aliases. + * Otherwise, subsequent call to dev_iommu_priv_get() will fail. + */ + dev_iommu_priv_set(dev, dev_data); setup_aliases(iommu, dev); /* @@ -539,8 +692,6 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) dev_data->flags = pdev_get_caps(to_pci_dev(dev)); } - dev_iommu_priv_set(dev, dev_data); - return 0; } @@ -571,10 +722,13 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) static void dump_dte_entry(struct amd_iommu *iommu, u16 devid) { int i; - struct dev_table_entry *dev_table = get_dev_table(iommu); + struct dev_table_entry dte; + struct iommu_dev_data *dev_data = find_dev_data(iommu, devid); + + get_dte256(iommu, dev_data, &dte); for (i = 0; i < 4; ++i) - pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]); + pr_err("DTE[%d]: %016llx\n", i, dte.data[i]); } static void dump_command(unsigned long phys_addr) @@ -1261,6 +1415,15 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } +static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid) +{ + int ret; + + ret = iommu_flush_dte(iommu, devid); + if (!ret) + iommu_completion_wait(iommu); +} + static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; @@ -1603,15 +1766,6 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_complete(domain); } -void amd_iommu_domain_update(struct protection_domain *domain) -{ - /* Update device table */ - amd_iommu_update_and_flush_device_table(domain); - - /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_all(domain); -} - int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag) { struct iommu_dev_data *dev_data; @@ -1826,90 +1980,109 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid) return ret; } +static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr, + struct dev_table_entry *new) +{ + /* All existing DTE must have V bit set */ + new->data128[0] = DTE_FLAG_V; + new->data128[1] = 0; +} + +/* + * Note: + * The old value for GCR3 table and GPT have been cleared from caller. + */ +static void set_dte_gcr3_table(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data, + struct dev_table_entry *target) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + u64 gcr3; + + if (!gcr3_info->gcr3_tbl) + return; + + pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n", + __func__, dev_data->devid, gcr3_info->glx, + (unsigned long long)gcr3_info->gcr3_tbl); + + gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); + + target->data[0] |= DTE_FLAG_GV | + FIELD_PREP(DTE_GLX, gcr3_info->glx) | + FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12); + if (pdom_is_v2_pgtbl_mode(dev_data->domain)) + target->data[0] |= DTE_FLAG_GIOV; + + target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) | + FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31); + + /* Guest page table can only support 4 and 5 levels */ + if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) + target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL); + else + target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL); +} + static void set_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { - u64 pte_root = 0; - u64 flags = 0; - u32 old_domid; - u16 devid = dev_data->devid; u16 domid; + u32 old_domid; + struct dev_table_entry *initial_dte; + struct dev_table_entry new = {}; struct protection_domain *domain = dev_data->domain; - struct dev_table_entry *dev_table = get_dev_table(iommu); struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid]; if (gcr3_info && gcr3_info->gcr3_tbl) domid = dev_data->gcr3_info.domid; else domid = domain->id; + make_clear_dte(dev_data, dte, &new); + if (domain->iop.mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(domain->iop.root); + new.data[0] = iommu_virt_to_phys(domain->iop.root); - pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) + new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; /* - * When SNP is enabled, Only set TV bit when IOMMU - * page translation is in use. + * When SNP is enabled, we can only support TV=1 with non-zero domain ID. + * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in + * do_iommu_domain_alloc(). */ - if (!amd_iommu_snp_en || (domid != 0)) - pte_root |= DTE_FLAG_TV; - - flags = dev_table[devid].data[1]; - - if (dev_data->ats_enabled) - flags |= DTE_FLAG_IOTLB; + WARN_ON(amd_iommu_snp_en && (domid == 0)); + new.data[0] |= DTE_FLAG_TV; if (dev_data->ppr) - pte_root |= 1ULL << DEV_ENTRY_PPR; + new.data[0] |= 1ULL << DEV_ENTRY_PPR; if (domain->dirty_tracking) - pte_root |= DTE_FLAG_HAD; - - if (gcr3_info && gcr3_info->gcr3_tbl) { - u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); - u64 glx = gcr3_info->glx; - u64 tmp; + new.data[0] |= DTE_FLAG_HAD; - pte_root |= DTE_FLAG_GV; - pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; - - /* First mask out possible old values for GCR3 table */ - tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; - flags &= ~tmp; - - tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - flags &= ~tmp; - - /* Encode GCR3 table into DTE */ - tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; - pte_root |= tmp; - - tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; - flags |= tmp; - - tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; - flags |= tmp; + if (dev_data->ats_enabled) + new.data[1] |= DTE_FLAG_IOTLB; - if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) { - dev_table[devid].data[2] |= - ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); - } + old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK; + new.data[1] |= domid; - /* GIOV is supported with V2 page table mode only */ - if (pdom_is_v2_pgtbl_mode(domain)) - pte_root |= DTE_FLAG_GIOV; + /* + * Restore cached persistent DTE bits, which can be set by information + * in IVRS table. See set_dev_entry_from_acpi(). + */ + initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid); + if (initial_dte) { + new.data128[0] |= initial_dte->data128[0]; + new.data128[1] |= initial_dte->data128[1]; } - flags &= ~DEV_DOMID_MASK; - flags |= domid; + set_dte_gcr3_table(iommu, dev_data, &new); - old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; - dev_table[devid].data[1] = flags; - dev_table[devid].data[0] = pte_root; + update_dte256(iommu, dev_data, &new); /* * A kdump kernel might be replacing a domain ID that was copied from @@ -1921,19 +2094,16 @@ static void set_dte_entry(struct amd_iommu *iommu, } } -static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) +/* + * Clear DMA-remap related flags to block all DMA (blockeded domain) + */ +static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { - struct dev_table_entry *dev_table = get_dev_table(iommu); - - /* remove entry from the device table seen by the hardware */ - dev_table[devid].data[0] = DTE_FLAG_V; - - if (!amd_iommu_snp_en) - dev_table[devid].data[0] |= DTE_FLAG_TV; + struct dev_table_entry new = {}; + struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid]; - dev_table[devid].data[1] &= DTE_FLAG_MASK; - - amd_iommu_apply_erratum_63(iommu, devid); + make_clear_dte(dev_data, dte, &new); + update_dte256(iommu, dev_data, &new); } /* Update and flush DTE for the given device */ @@ -1944,7 +2114,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set) if (set) set_dte_entry(iommu, dev_data); else - clear_dte_entry(iommu, dev_data->devid); + clear_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); @@ -2007,7 +2177,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu, struct protection_domain *pdom) { struct pdom_iommu_info *pdom_iommu_info, *curr; - struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg; unsigned long flags; int ret = 0; @@ -2036,10 +2205,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu, goto out_unlock; } - /* Update NUMA Node ID */ - if (cfg->amd.nid == NUMA_NO_NODE) - cfg->amd.nid = dev_to_node(&iommu->dev->dev); - out_unlock: spin_unlock_irqrestore(&pdom->lock, flags); return ret; @@ -2276,16 +2441,15 @@ void protection_domain_free(struct protection_domain *domain) kfree(domain); } -static void protection_domain_init(struct protection_domain *domain, int nid) +static void protection_domain_init(struct protection_domain *domain) { spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); INIT_LIST_HEAD(&domain->dev_data_list); xa_init(&domain->iommu_array); - domain->iop.pgtbl.cfg.amd.nid = nid; } -struct protection_domain *protection_domain_alloc(unsigned int type, int nid) +struct protection_domain *protection_domain_alloc(void) { struct protection_domain *domain; int domid; @@ -2301,42 +2465,37 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid) } domain->id = domid; - protection_domain_init(domain, nid); + protection_domain_init(domain); return domain; } static int pdom_setup_pgtable(struct protection_domain *domain, - unsigned int type, int pgtable) + struct device *dev) { struct io_pgtable_ops *pgtbl_ops; + enum io_pgtable_fmt fmt; - /* No need to allocate io pgtable ops in passthrough mode */ - if (!(type & __IOMMU_DOMAIN_PAGING)) - return 0; - - switch (pgtable) { - case AMD_IOMMU_V1: - domain->pd_mode = PD_MODE_V1; + switch (domain->pd_mode) { + case PD_MODE_V1: + fmt = AMD_IOMMU_V1; break; - case AMD_IOMMU_V2: - domain->pd_mode = PD_MODE_V2; + case PD_MODE_V2: + fmt = AMD_IOMMU_V2; break; - default: - return -EINVAL; } - pgtbl_ops = - alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain); + domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev); + pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain); if (!pgtbl_ops) return -ENOMEM; return 0; } -static inline u64 dma_max_address(int pgtable) +static inline u64 dma_max_address(enum protection_domain_mode pgtable) { - if (pgtable == AMD_IOMMU_V1) + if (pgtable == PD_MODE_V1) return ~0ULL; /* V2 with 4/5 level page table */ @@ -2348,31 +2507,21 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu) return iommu && (iommu->features & FEATURE_HDSUP); } -static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, - struct device *dev, - u32 flags, int pgtable) +static struct iommu_domain * +do_iommu_domain_alloc(struct device *dev, u32 flags, + enum protection_domain_mode pgtable) { bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); struct protection_domain *domain; - struct amd_iommu *iommu = NULL; int ret; - if (dev) - iommu = get_amd_iommu_from_dev(dev); - - /* - * Since DTE[Mode]=0 is prohibited on SNP-enabled system, - * default to use IOMMU_DOMAIN_DMA[_FQ]. - */ - if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) - return ERR_PTR(-EINVAL); - - domain = protection_domain_alloc(type, - dev ? dev_to_node(dev) : NUMA_NO_NODE); + domain = protection_domain_alloc(); if (!domain) return ERR_PTR(-ENOMEM); - ret = pdom_setup_pgtable(domain, type, pgtable); + domain->pd_mode = pgtable; + ret = pdom_setup_pgtable(domain, dev); if (ret) { pdom_id_free(domain->id); kfree(domain); @@ -2384,72 +2533,45 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, domain->domain.geometry.force_aperture = true; domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap; - if (iommu) { - domain->domain.type = type; - domain->domain.ops = iommu->iommu.ops->default_domain_ops; + domain->domain.type = IOMMU_DOMAIN_UNMANAGED; + domain->domain.ops = iommu->iommu.ops->default_domain_ops; - if (dirty_tracking) - domain->domain.dirty_ops = &amd_dirty_ops; - } + if (dirty_tracking) + domain->domain.dirty_ops = &amd_dirty_ops; return &domain->domain; } -static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type) -{ - struct iommu_domain *domain; - int pgtable = amd_iommu_pgtable; - - /* - * Force IOMMU v1 page table when allocating - * domain for pass-through devices. - */ - if (type == IOMMU_DOMAIN_UNMANAGED) - pgtable = AMD_IOMMU_V1; - - domain = do_iommu_domain_alloc(type, NULL, 0, pgtable); - if (IS_ERR(domain)) - return NULL; - - return domain; -} - static struct iommu_domain * amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { - unsigned int type = IOMMU_DOMAIN_UNMANAGED; - struct amd_iommu *iommu = NULL; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID; - if (dev) - iommu = get_amd_iommu_from_dev(dev); - if ((flags & ~supported_flags) || user_data) return ERR_PTR(-EOPNOTSUPP); - /* Allocate domain with v2 page table if IOMMU supports PASID. */ - if (flags & IOMMU_HWPT_ALLOC_PASID) { + switch (flags & supported_flags) { + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING: + /* Allocate domain with v1 page table for dirty tracking */ + if (!amd_iommu_hd_support(iommu)) + break; + return do_iommu_domain_alloc(dev, flags, PD_MODE_V1); + case IOMMU_HWPT_ALLOC_PASID: + /* Allocate domain with v2 page table if IOMMU supports PASID. */ if (!amd_iommu_pasid_supported()) - return ERR_PTR(-EOPNOTSUPP); - - return do_iommu_domain_alloc(type, dev, flags, AMD_IOMMU_V2); - } - - /* Allocate domain with v1 page table for dirty tracking */ - if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) { - if (iommu && amd_iommu_hd_support(iommu)) { - return do_iommu_domain_alloc(type, dev, - flags, AMD_IOMMU_V1); - } - - return ERR_PTR(-EOPNOTSUPP); + break; + return do_iommu_domain_alloc(dev, flags, PD_MODE_V2); + case 0: + /* If nothing specific is required use the kernel commandline default */ + return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable); + default: + break; } - - /* If nothing specific is required use the kernel commandline default */ - return do_iommu_domain_alloc(type, dev, 0, amd_iommu_pgtable); + return ERR_PTR(-EOPNOTSUPP); } void amd_iommu_domain_free(struct iommu_domain *dom) @@ -2475,10 +2597,19 @@ static int blocked_domain_attach_device(struct iommu_domain *domain, return 0; } +static int blocked_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) +{ + amd_iommu_remove_dev_pasid(dev, pasid, old); + return 0; +} + static struct iommu_domain blocked_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { .attach_dev = blocked_domain_attach_device, + .set_dev_pasid = blocked_domain_set_dev_pasid, } }; @@ -2498,7 +2629,7 @@ void amd_iommu_init_identity_domain(void) identity_domain.id = pdom_id_alloc(); - protection_domain_init(&identity_domain, NUMA_NO_NODE); + protection_domain_init(&identity_domain); } /* Same as blocked domain except it supports only ops->attach_dev() */ @@ -2666,12 +2797,12 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, bool enable) { struct protection_domain *pdomain = to_pdomain(domain); - struct dev_table_entry *dev_table; + struct dev_table_entry *dte; struct iommu_dev_data *dev_data; bool domain_flush = false; struct amd_iommu *iommu; unsigned long flags; - u64 pte_root; + u64 new; spin_lock_irqsave(&pdomain->lock, flags); if (!(pdomain->dirty_tracking ^ enable)) { @@ -2680,16 +2811,15 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, } list_for_each_entry(dev_data, &pdomain->dev_list, list) { + spin_lock(&dev_data->dte_lock); iommu = get_amd_iommu_from_dev_data(dev_data); - - dev_table = get_dev_table(iommu); - pte_root = dev_table[dev_data->devid].data[0]; - - pte_root = (enable ? pte_root | DTE_FLAG_HAD : - pte_root & ~DTE_FLAG_HAD); + dte = &get_dev_table(iommu)[dev_data->devid]; + new = dte->data[0]; + new = (enable ? new | DTE_FLAG_HAD : new & ~DTE_FLAG_HAD); + dte->data[0] = new; + spin_unlock(&dev_data->dte_lock); /* Flush device DTE */ - dev_table[dev_data->devid].data[0] = pte_root; device_flush_dte(dev_data); domain_flush = true; } @@ -2890,7 +3020,6 @@ const struct iommu_ops amd_iommu_ops = { .blocked_domain = &blocked_domain, .release_domain = &release_domain, .identity_domain = &identity_domain.domain, - .domain_alloc = amd_iommu_domain_alloc, .domain_alloc_paging_flags = amd_iommu_domain_alloc_paging_flags, .domain_alloc_sva = amd_iommu_domain_alloc_sva, .probe_device = amd_iommu_probe_device, @@ -2901,7 +3030,6 @@ const struct iommu_ops amd_iommu_ops = { .def_domain_type = amd_iommu_def_domain_type, .dev_enable_feat = amd_iommu_dev_enable_feature, .dev_disable_feat = amd_iommu_dev_disable_feature, - .remove_dev_pasid = amd_iommu_remove_dev_pasid, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, @@ -2956,17 +3084,23 @@ out: static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { - u64 dte; - struct dev_table_entry *dev_table = get_dev_table(iommu); + u64 new; + struct dev_table_entry *dte = &get_dev_table(iommu)[devid]; + struct iommu_dev_data *dev_data = search_dev_data(iommu, devid); + + if (dev_data) + spin_lock(&dev_data->dte_lock); - dte = dev_table[devid].data[2]; - dte &= ~DTE_IRQ_PHYS_ADDR_MASK; - dte |= iommu_virt_to_phys(table->table); - dte |= DTE_IRQ_REMAP_INTCTL; - dte |= DTE_INTTABLEN; - dte |= DTE_IRQ_REMAP_ENABLE; + new = READ_ONCE(dte->data[2]); + new &= ~DTE_IRQ_PHYS_ADDR_MASK; + new |= iommu_virt_to_phys(table->table); + new |= DTE_IRQ_REMAP_INTCTL; + new |= DTE_INTTABLEN; + new |= DTE_IRQ_REMAP_ENABLE; + WRITE_ONCE(dte->data[2], new); - dev_table[devid].data[2] = dte; + if (dev_data) + spin_unlock(&dev_data->dte_lock); } static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index 8c73a30c2800..11150cfd6718 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -185,12 +185,13 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct protection_domain *pdom; int ret; - pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev)); + pdom = protection_domain_alloc(); if (!pdom) return ERR_PTR(-ENOMEM); pdom->domain.ops = &amd_sva_domain_ops; pdom->mn.ops = &sva_mn; + pdom->domain.type = IOMMU_DOMAIN_SVA; ret = mmu_notifier_register(&pdom->mn, mm); if (ret) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 1d3e71569775..9ba596430e7c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -112,6 +112,15 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, * from the current CPU register */ target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); + + /* + * Note that we don't bother with S1PIE on the SMMU, we just rely on + * our default encoding scheme matching direct permissions anyway. + * SMMU has no notion of S1POE nor GCS, so make sure that is clear if + * either is enabled for CPUs, just in case anyone imagines otherwise. + */ + if (system_supports_poe() || system_supports_gcs()) + dev_warn_once(master->smmu->dev, "SVA devices ignore permission overlays and GCS\n"); } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); @@ -206,8 +215,12 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) unsigned long asid_bits; u32 feat_mask = ARM_SMMU_FEAT_COHERENCY; - if (vabits_actual == 52) + if (vabits_actual == 52) { + /* We don't support LPA2 */ + if (PAGE_SIZE != SZ_64K) + return false; feat_mask |= ARM_SMMU_FEAT_VAX; + } if ((smmu->features & feat_mask) != feat_mask) return false; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a5c7002ff75b..358072b4e293 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -26,6 +26,7 @@ #include <linux/pci.h> #include <linux/pci-ats.h> #include <linux/platform_device.h> +#include <linux/string_choices.h> #include <kunit/visibility.h> #include <uapi/linux/iommufd.h> @@ -83,8 +84,28 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; -static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu, u32 flags); +static const char * const event_str[] = { + [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID", + [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH", + [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE", + [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED", + [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID", + [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH", + [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD", + [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION", + [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE", + [EVT_ID_ACCESS_FAULT] = "F_ACCESS", + [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION", + [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH", +}; + +static const char * const event_class_str[] = { + [0] = "CD fetch", + [1] = "Stage 1 translation table fetch", + [2] = "Input address caused fault", + [3] = "Reserved", +}; + static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -1759,17 +1780,49 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) } /* IRQ and event handlers */ -static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) +static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *event) +{ + struct arm_smmu_master *master; + + event->id = FIELD_GET(EVTQ_0_ID, raw[0]); + event->sid = FIELD_GET(EVTQ_0_SID, raw[0]); + event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]); + event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID; + event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]); + event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]); + event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]); + event->read = FIELD_GET(EVTQ_1_RnW, raw[1]); + event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]); + event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]); + event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]); + event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]); + event->ipa = raw[3] & EVTQ_3_IPA; + event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR; + event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]); + event->class_tt = false; + event->dev = NULL; + + if (event->id == EVT_ID_PERMISSION_FAULT) + event->class_tt = (event->class == EVTQ_1_CLASS_TT); + + mutex_lock(&smmu->streams_mutex); + master = arm_smmu_find_master(smmu, event->sid); + if (master) + event->dev = get_device(master->dev); + mutex_unlock(&smmu->streams_mutex); +} + +static int arm_smmu_handle_event(struct arm_smmu_device *smmu, + struct arm_smmu_event *event) { int ret = 0; u32 perm = 0; struct arm_smmu_master *master; - bool ssid_valid = evt[0] & EVTQ_0_SSV; - u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); struct iopf_fault fault_evt = { }; struct iommu_fault *flt = &fault_evt.fault; - switch (FIELD_GET(EVTQ_0_ID, evt[0])) { + switch (event->id) { case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: @@ -1779,35 +1832,35 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) return -EOPNOTSUPP; } - if (!(evt[1] & EVTQ_1_STALL)) + if (!event->stall) return -EOPNOTSUPP; - if (evt[1] & EVTQ_1_RnW) + if (event->read) perm |= IOMMU_FAULT_PERM_READ; else perm |= IOMMU_FAULT_PERM_WRITE; - if (evt[1] & EVTQ_1_InD) + if (event->instruction) perm |= IOMMU_FAULT_PERM_EXEC; - if (evt[1] & EVTQ_1_PnU) + if (event->privileged) perm |= IOMMU_FAULT_PERM_PRIV; flt->type = IOMMU_FAULT_PAGE_REQ; flt->prm = (struct iommu_fault_page_request) { .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), + .grpid = event->stag, .perm = perm, - .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + .addr = event->iova, }; - if (ssid_valid) { + if (event->ssv) { flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + flt->prm.pasid = event->ssid; } mutex_lock(&smmu->streams_mutex); - master = arm_smmu_find_master(smmu, sid); + master = arm_smmu_find_master(smmu, event->sid); if (!master) { ret = -EINVAL; goto out_unlock; @@ -1819,29 +1872,82 @@ out_unlock: return ret; } +static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *event) +{ + int i; + + dev_err(smmu->dev, "event 0x%02x received:\n", event->id); + + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) + dev_err(smmu->dev, "\t0x%016llx\n", raw[i]); +} + +#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id]) +#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN" +#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)" + +static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *evt, + struct ratelimit_state *rs) +{ + if (!__ratelimit(rs)) + return; + + arm_smmu_dump_raw_event(smmu, raw, evt); + + switch (evt->id) { + case EVT_ID_TRANSLATION_FAULT: + case EVT_ID_ADDR_SIZE_FAULT: + case EVT_ID_ACCESS_FAULT: + case EVT_ID_PERMISSION_FAULT: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid, evt->iova, evt->ipa); + + dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x", + evt->privileged ? "priv" : "unpriv", + evt->instruction ? "inst" : "data", + str_read_write(evt->read), + evt->s2 ? "s2" : "s1", event_class_str[evt->class], + evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "", + evt->stall ? " stall" : "", evt->stag); + + break; + + case EVT_ID_STE_FETCH_FAULT: + case EVT_ID_CD_FETCH_FAULT: + case EVT_ID_VMS_FETCH_FAULT: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid, evt->fetch_addr); + + break; + + default: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid); + } +} + static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { - int i, ret; + u64 evt[EVTQ_ENT_DWORDS]; + struct arm_smmu_event event = {0}; struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; struct arm_smmu_ll_queue *llq = &q->llq; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - u64 evt[EVTQ_ENT_DWORDS]; do { while (!queue_remove_raw(q, evt)) { - u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); - - ret = arm_smmu_handle_evt(smmu, evt); - if (!ret || !__ratelimit(&rs)) - continue; - - dev_info(smmu->dev, "event 0x%02x received:\n", id); - for (i = 0; i < ARRAY_SIZE(evt); ++i) - dev_info(smmu->dev, "\t0x%016llx\n", - (unsigned long long)evt[i]); + arm_smmu_decode_event(smmu, evt, &event); + if (arm_smmu_handle_event(smmu, &event)) + arm_smmu_dump_event(smmu, evt, &event, &rs); + put_device(event.dev); cond_resched(); } @@ -2353,39 +2459,12 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) if (!smmu_domain) return ERR_PTR(-ENOMEM); - mutex_init(&smmu_domain->init_mutex); INIT_LIST_HEAD(&smmu_domain->devices); spin_lock_init(&smmu_domain->devices_lock); return smmu_domain; } -static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) -{ - struct arm_smmu_domain *smmu_domain; - - /* - * Allocate the domain and initialise some of its data structures. - * We can't really do anything meaningful until we've added a - * master. - */ - smmu_domain = arm_smmu_domain_alloc(); - if (IS_ERR(smmu_domain)) - return ERR_CAST(smmu_domain); - - if (dev) { - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - int ret; - - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); - if (ret) { - kfree(smmu_domain); - return ERR_PTR(ret); - } - } - return &smmu_domain->domain; -} - static void arm_smmu_domain_free_paging(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2451,12 +2530,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct arm_smmu_domain *smmu_domain); bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; - /* Restrict the stage to what we can actually support */ - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - pgtbl_cfg = (struct io_pgtable_cfg) { .pgsize_bitmap = smmu->pgsize_bitmap, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, @@ -2745,9 +2818,14 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * Translation Requests and Translated transactions are denied * as though ATS is disabled for the stream (STE.EATS == 0b00), * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events - * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be - * enabled if we have arm_smmu_domain, those always have page - * tables. + * (IHI0070Ea 5.2 Stream Table Entry). + * + * However, if we have installed a CD table and are using S1DSS + * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS + * skipping stage 1". + * + * Disable ATS if we are going to create a normal 0b100 bypass + * STE. */ state->ats_enabled = !state->disable_ats && arm_smmu_ats_supported(master); @@ -2853,15 +2931,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; - mutex_lock(&smmu_domain->init_mutex); - - if (!smmu_domain->smmu) { - ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); - } else if (smmu_domain->smmu != smmu) - ret = -EINVAL; - - mutex_unlock(&smmu_domain->init_mutex); - if (ret) + if (smmu_domain->smmu != smmu) return ret; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { @@ -2918,16 +2988,9 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_cd target_cd; - int ret = 0; - mutex_lock(&smmu_domain->init_mutex); - if (!smmu_domain->smmu) - ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); - else if (smmu_domain->smmu != smmu) - ret = -EINVAL; - mutex_unlock(&smmu_domain->init_mutex); - if (ret) - return ret; + if (smmu_domain->smmu != smmu) + return -EINVAL; if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) return -EINVAL; @@ -3016,13 +3079,12 @@ out_unlock: return ret; } -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) +static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old_domain) { + struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_domain *smmu_domain; - - smmu_domain = to_smmu_domain(domain); mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); @@ -3043,6 +3105,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, sid_domain->type == IOMMU_DOMAIN_BLOCKED) sid_domain->ops->attach_dev(sid_domain, dev); } + return 0; } static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, @@ -3070,8 +3133,10 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, if (arm_smmu_ssids_in_use(&master->cd_table)) { /* * If a CD table has to be present then we need to run with ATS - * on even though the RID will fail ATS queries with UR. This is - * because we have no idea what the PASID's need. + * on because we have to assume a PASID is using ATS. For + * IDENTITY this will setup things so that S1DSS=bypass which + * follows the explanation in "13.6.4 Full ATS skipping stage 1" + * and allows for ATS on the RID to work. */ state.cd_needs_ats = true; arm_smmu_attach_prepare(&state, domain); @@ -3124,6 +3189,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, static const struct iommu_domain_ops arm_smmu_blocked_ops = { .attach_dev = arm_smmu_attach_dev_blocked, + .set_dev_pasid = arm_smmu_blocking_set_dev_pasid, }; static struct iommu_domain arm_smmu_blocked_domain = { @@ -3136,6 +3202,7 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID | IOMMU_HWPT_ALLOC_NEST_PARENT; @@ -3147,25 +3214,43 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, if (user_data) return ERR_PTR(-EOPNOTSUPP); - if (flags & IOMMU_HWPT_ALLOC_PASID) - return arm_smmu_domain_alloc_paging(dev); - smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); - if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) { - if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) { + switch (flags) { + case 0: + /* Prefer S1 if available */ + if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + break; + case IOMMU_HWPT_ALLOC_NEST_PARENT: + if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) { ret = -EOPNOTSUPP; goto err_free; } smmu_domain->stage = ARM_SMMU_DOMAIN_S2; smmu_domain->nest_parent = true; + break; + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING: + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID: + case IOMMU_HWPT_ALLOC_PASID: + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) { + ret = -EOPNOTSUPP; + goto err_free; + } + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -EOPNOTSUPP; + goto err_free; } smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags); if (ret) goto err_free; return &smmu_domain->domain; @@ -3237,8 +3322,8 @@ static struct platform_driver arm_smmu_driver; static struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, - fwnode); + struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); + put_device(dev); return dev ? dev_get_drvdata(dev) : NULL; } @@ -3543,7 +3628,6 @@ static struct iommu_ops arm_smmu_ops = { .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, .hw_info = arm_smmu_hw_info, - .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags, .probe_device = arm_smmu_probe_device, @@ -3551,7 +3635,6 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .remove_dev_pasid = arm_smmu_remove_dev_pasid, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, @@ -4239,7 +4322,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) */ if (!!(reg & IDR0_COHACC) != coherent) dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", - coherent ? "true" : "false"); + str_true_false(coherent)); switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { case IDR0_STALL_MODEL_FORCE: @@ -4663,7 +4746,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Initialise in-memory data structures */ ret = arm_smmu_init_structures(smmu); if (ret) - return ret; + goto err_free_iopf; /* Record our private device structure */ platform_set_drvdata(pdev, smmu); @@ -4674,22 +4757,29 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) - return ret; + goto err_disable; /* And we're up. Go go go! */ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu3.%pa", &ioaddr); if (ret) - return ret; + goto err_disable; ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - iommu_device_sysfs_remove(&smmu->iommu); - return ret; + goto err_free_sysfs; } return 0; + +err_free_sysfs: + iommu_device_sysfs_remove(&smmu->iommu); +err_disable: + arm_smmu_device_disable(smmu); +err_free_iopf: + iopf_queue_free(smmu->evtq.iopf); + return ret; } static void arm_smmu_device_remove(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 0107d3f333a1..bd9d7c85576a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -452,10 +452,18 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVT_ID_BAD_STREAMID_CONFIG 0x02 +#define EVT_ID_STE_FETCH_FAULT 0x03 +#define EVT_ID_BAD_STE_CONFIG 0x04 +#define EVT_ID_STREAM_DISABLED_FAULT 0x06 +#define EVT_ID_BAD_SUBSTREAMID_CONFIG 0x08 +#define EVT_ID_CD_FETCH_FAULT 0x09 +#define EVT_ID_BAD_CD_CONFIG 0x0a #define EVT_ID_TRANSLATION_FAULT 0x10 #define EVT_ID_ADDR_SIZE_FAULT 0x11 #define EVT_ID_ACCESS_FAULT 0x12 #define EVT_ID_PERMISSION_FAULT 0x13 +#define EVT_ID_VMS_FETCH_FAULT 0x25 #define EVTQ_0_SSV (1UL << 11) #define EVTQ_0_SSID GENMASK_ULL(31, 12) @@ -467,9 +475,11 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_1_RnW (1UL << 35) #define EVTQ_1_S2 (1UL << 39) #define EVTQ_1_CLASS GENMASK_ULL(41, 40) +#define EVTQ_1_CLASS_TT 0x01 #define EVTQ_1_TT_READ (1UL << 44) #define EVTQ_2_ADDR GENMASK_ULL(63, 0) #define EVTQ_3_IPA GENMASK_ULL(51, 12) +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 @@ -789,6 +799,26 @@ struct arm_smmu_stream { struct rb_node node; }; +struct arm_smmu_event { + u8 stall : 1, + ssv : 1, + privileged : 1, + instruction : 1, + s2 : 1, + read : 1, + ttrnw : 1, + class_tt : 1; + u8 id; + u8 class; + u16 stag; + u32 sid; + u32 ssid; + u64 iova; + u64 ipa; + u64 fetch_addr; + struct device *dev; +}; + /* SMMU private data for each master */ struct arm_smmu_master { struct arm_smmu_device *smmu; @@ -813,7 +843,6 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; - struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; atomic_t nr_ats_masters; diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 6e41ddaa24d6..d525ab43a4ae 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -79,7 +79,6 @@ #define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) #define VCMDQ_ADDR GENMASK(47, 5) #define VCMDQ_LOG2SIZE GENMASK(4, 0) -#define VCMDQ_LOG2SIZE_MAX 19 #define TEGRA241_VCMDQ_BASE 0x00000 #define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 @@ -505,12 +504,15 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; struct arm_smmu_queue *q = &cmdq->q; char name[16]; + u32 regval; int ret; snprintf(name, 16, "vcmdq%u", vcmdq->idx); - /* Queue size, capped to ensure natural alignment */ - q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX); + /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */ + regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1); + q->llq.max_n_shift = + min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval)); /* Use the common helper to init the VCMDQ, and then... */ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 99030e6b16e7..db9b9a8e139c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -110,7 +110,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm int arm_mmu500_reset(struct arm_smmu_device *smmu) { u32 reg, major; - int i; /* * On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before * writes to the context bank ACTLRs will stick. And we just hope that @@ -128,11 +127,12 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg); +#ifdef CONFIG_ARM_SMMU_MMU_500_CPRE_ERRATA /* * Disable MMU-500's not-particularly-beneficial next-page * prefetcher for the sake of at least 5 known errata. */ - for (i = 0; i < smmu->num_context_banks; ++i) { + for (int i = 0; i < smmu->num_context_banks; ++i) { reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); reg &= ~ARM_MMU500_ACTLR_CPRE; arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); @@ -140,6 +140,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) if (reg & ARM_MMU500_ACTLR_CPRE) dev_warn_once(smmu->dev, "Failed to disable prefetcher for errata workarounds, check SACR.CACHE_LOCK\n"); } +#endif return 0; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 548783f3f8e8..d03b2239baad 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -73,7 +73,7 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) if (__ratelimit(&rs)) { dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); - cfg = qsmmu->cfg; + cfg = qsmmu->data->cfg; if (!cfg) return; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6372f3e25c4b..59d02687280e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -16,6 +16,40 @@ #define QCOM_DUMMY_VAL -1 +/* + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch + * buffer). The remaining bits are implementation defined and vary across + * SoCs. + */ + +#define CPRE (1 << 1) +#define CMTLB (1 << 0) +#define PREFETCH_SHIFT 8 +#define PREFETCH_DEFAULT 0 +#define PREFETCH_SHALLOW (1 << PREFETCH_SHIFT) +#define PREFETCH_MODERATE (2 << PREFETCH_SHIFT) +#define PREFETCH_DEEP (3 << PREFETCH_SHIFT) +#define GFX_ACTLR_PRR (1 << 5) + +static const struct of_device_id qcom_smmu_actlr_client_of_match[] = { + { .compatible = "qcom,adreno", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,adreno-gmu", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,adreno-smmu", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,fastrpc", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,sc7280-mdss", + .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) }, + { .compatible = "qcom,sc7280-venus", + .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) }, + { .compatible = "qcom,sm8550-mdss", + .data = (const void *) (PREFETCH_DEFAULT | CMTLB) }, + { } +}; + static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) { return container_of(smmu, struct qcom_smmu, smmu); @@ -99,6 +133,47 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg); } +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set) +{ + struct arm_smmu_domain *smmu_domain = (void *)cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + u32 reg = 0; + int ret; + + ret = pm_runtime_resume_and_get(smmu->dev); + if (ret < 0) { + dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret); + return; + } + + reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR); + reg &= ~GFX_ACTLR_PRR; + if (set) + reg |= FIELD_PREP(GFX_ACTLR_PRR, 1); + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg); + pm_runtime_put_autosuspend(smmu->dev); +} + +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr) +{ + struct arm_smmu_domain *smmu_domain = (void *)cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + int ret; + + ret = pm_runtime_resume_and_get(smmu->dev); + if (ret < 0) { + dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret); + return; + } + + writel_relaxed(lower_32_bits(page_addr), + smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR); + writel_relaxed(upper_32_bits(page_addr), + smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR); + pm_runtime_put_autosuspend(smmu->dev); +} + #define QCOM_ADRENO_SMMU_GPU_SID 0 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev) @@ -207,13 +282,37 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) return true; } +static void qcom_smmu_set_actlr_dev(struct device *dev, struct arm_smmu_device *smmu, int cbndx, + const struct of_device_id *client_match) +{ + const struct of_device_id *match = + of_match_device(client_match, dev); + + if (!match) { + dev_dbg(dev, "no ACTLR settings present\n"); + return; + } + + arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, (unsigned long)match->data); +} + static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { + const struct device_node *np = smmu_domain->smmu->dev->of_node; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + const struct of_device_id *client_match; + int cbndx = smmu_domain->cfg.cbndx; struct adreno_smmu_priv *priv; smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; + client_match = qsmmu->data->client_match; + + if (client_match) + qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match); + /* Only enable split pagetables for the GPU device (SID 0) */ if (!qcom_adreno_smmu_is_gpu_device(dev)) return 0; @@ -239,6 +338,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, priv->get_fault_info = qcom_adreno_smmu_get_fault_info; priv->set_stall = qcom_adreno_smmu_set_stall; priv->resume_translation = qcom_adreno_smmu_resume_translation; + priv->set_prr_bit = NULL; + priv->set_prr_addr = NULL; + + if (of_device_is_compatible(np, "qcom,smmu-500") && + of_device_is_compatible(np, "qcom,adreno-smmu")) { + priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit; + priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr; + } return 0; } @@ -269,8 +376,18 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + const struct of_device_id *client_match; + int cbndx = smmu_domain->cfg.cbndx; + smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; + client_match = qsmmu->data->client_match; + + if (client_match) + qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match); + return 0; } @@ -507,7 +624,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; - qsmmu->cfg = data->cfg; + qsmmu->data = data; return &qsmmu->smmu; } @@ -550,6 +667,7 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { .impl = &qcom_smmu_500_impl, .adreno_impl = &qcom_adreno_smmu_500_impl, .cfg = &qcom_smmu_impl0_cfg, + .client_match = qcom_smmu_actlr_client_of_match, }; /* @@ -567,6 +685,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 3c134d1a6277..8addd453f5f1 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -8,7 +8,7 @@ struct qcom_smmu { struct arm_smmu_device smmu; - const struct qcom_smmu_config *cfg; + const struct qcom_smmu_match_data *data; bool bypass_quirk; u8 bypass_cbndx; u32 stall_enabled; @@ -28,6 +28,7 @@ struct qcom_smmu_match_data { const struct qcom_smmu_config *cfg; const struct arm_smmu_impl *impl; const struct arm_smmu_impl *adreno_impl; + const struct of_device_id * const client_match; }; irqreturn_t qcom_smmu_context_fault(int irq, void *dev); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 650664e0f6e3..de205a34ffc6 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -34,6 +34,7 @@ #include <linux/pm_runtime.h> #include <linux/ratelimit.h> #include <linux/slab.h> +#include <linux/string_choices.h> #include <linux/fsl/mc.h> @@ -1411,8 +1412,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) static struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, - fwnode); + struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); + put_device(dev); return dev ? dev_get_drvdata(dev) : NULL; } @@ -1437,17 +1438,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) goto out_free; } else { smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); - - /* - * Defer probe if the relevant SMMU instance hasn't finished - * probing yet. This is a fragile hack and we'd ideally - * avoid this race in the core code. Until that's ironed - * out, however, this is the most pragmatic option on the - * table. - */ - if (!smmu) - return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER, - "smmu dev has not bound yet\n")); } ret = -EINVAL; @@ -2117,7 +2107,7 @@ static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu) } dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt, - cnt == 1 ? "" : "s"); + str_plural(cnt)); iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } @@ -2227,29 +2217,26 @@ static int arm_smmu_device_probe(struct platform_device *pdev) i, irq); } + platform_set_drvdata(pdev, smmu); + + /* Check for RMRs and install bypass SMRs if any */ + arm_smmu_rmr_install_bypass_smr(smmu); + + arm_smmu_device_reset(smmu); + arm_smmu_test_smr_masks(smmu); + err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, "smmu.%pa", &smmu->ioaddr); - if (err) { - dev_err(dev, "Failed to register iommu in sysfs\n"); - return err; - } + if (err) + return dev_err_probe(dev, err, "Failed to register iommu in sysfs\n"); err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, using_legacy_binding ? NULL : dev); if (err) { - dev_err(dev, "Failed to register iommu\n"); iommu_device_sysfs_remove(&smmu->iommu); - return err; + return dev_err_probe(dev, err, "Failed to register iommu\n"); } - platform_set_drvdata(pdev, smmu); - - /* Check for RMRs and install bypass SMRs if any */ - arm_smmu_rmr_install_bypass_smr(smmu); - - arm_smmu_device_reset(smmu); - arm_smmu_test_smr_masks(smmu); - /* * We want to avoid touching dev->power.lock in fastpaths unless * it's really going to do something useful - pm_runtime_enabled() diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index e2aeb511ae90..2dbf3243b5ad 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_SCTLR_M BIT(0) #define ARM_SMMU_CB_ACTLR 0x4 +#define ARM_SMMU_GFX_PRR_CFG_LADDR 0x6008 +#define ARM_SMMU_GFX_PRR_CFG_UADDR 0x600C #define ARM_SMMU_CB_RESUME 0x8 #define ARM_SMMU_RESUME_TERMINATE BIT(0) diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index d3bb0798092d..6c7528130cf9 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o -obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o +obj-$(CONFIG_DMAR_TABLE) += trace.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 09694cca8752..fc35cba59145 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct cache_tag *tag, *temp; + struct list_head *prev; unsigned long flags; tag = kzalloc(sizeof(*tag), GFP_KERNEL); @@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, tag->dev = iommu->iommu.dev; spin_lock_irqsave(&domain->cache_lock, flags); + prev = &domain->cache_tags; list_for_each_entry(temp, &domain->cache_tags, node) { if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { temp->users++; @@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, trace_cache_tag_assign(temp); return 0; } + if (temp->iommu == iommu) + prev = &temp->node; } - list_add_tail(&tag->node, &domain->cache_tags); + /* + * Link cache tags of same iommu unit together, so corresponding + * flush ops can be batched for iommu unit. + */ + list_add(&tag->node, prev); + spin_unlock_irqrestore(&domain->cache_lock, flags); trace_cache_tag_assign(tag); diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c deleted file mode 100644 index 9862dc20b35e..000000000000 --- a/drivers/iommu/intel/cap_audit.c +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * cap_audit.c - audit iommu capabilities for boot time and hot plug - * - * Copyright (C) 2021 Intel Corporation - * - * Author: Kyung Min Park <kyung.min.park@intel.com> - * Lu Baolu <baolu.lu@linux.intel.com> - */ - -#define pr_fmt(fmt) "DMAR: " fmt - -#include "iommu.h" -#include "cap_audit.h" - -static u64 intel_iommu_cap_sanity; -static u64 intel_iommu_ecap_sanity; - -static inline void check_irq_capabilities(struct intel_iommu *a, - struct intel_iommu *b) -{ - CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK); -} - -static inline void check_dmar_capabilities(struct intel_iommu *a, - struct intel_iommu *b) -{ - MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK); - MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK); - MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK); - MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK); - - CHECK_FEATURE_MISMATCH(a, b, cap, fl5lp_support, CAP_FL5LP_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK); -} - -static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type) -{ - bool mismatch = false; - u64 old_cap = intel_iommu_cap_sanity; - u64 old_ecap = intel_iommu_ecap_sanity; - - if (type == CAP_AUDIT_HOTPLUG_IRQR) { - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK); - goto out; - } - - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl5lp_support, CAP_FL5LP_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK); - - /* Abort hot plug if the hot plug iommu feature is smaller than global */ - MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch); - -out: - if (mismatch) { - intel_iommu_cap_sanity = old_cap; - intel_iommu_ecap_sanity = old_ecap; - return -EFAULT; - } - - return 0; -} - -static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) -{ - struct dmar_drhd_unit *d; - struct intel_iommu *i; - int rc = 0; - - rcu_read_lock(); - if (list_empty(&dmar_drhd_units)) - goto out; - - for_each_active_iommu(i, d) { - if (!iommu) { - intel_iommu_ecap_sanity = i->ecap; - intel_iommu_cap_sanity = i->cap; - iommu = i; - continue; - } - - if (type == CAP_AUDIT_STATIC_DMAR) - check_dmar_capabilities(iommu, i); - else - check_irq_capabilities(iommu, i); - } - - /* - * If the system is sane to support scalable mode, either SL or FL - * should be sane. - */ - if (intel_cap_smts_sanity() && - !intel_cap_flts_sanity() && !intel_cap_slts_sanity()) - rc = -EOPNOTSUPP; - -out: - rcu_read_unlock(); - return rc; -} - -int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) -{ - switch (type) { - case CAP_AUDIT_STATIC_DMAR: - case CAP_AUDIT_STATIC_IRQR: - return cap_audit_static(iommu, type); - case CAP_AUDIT_HOTPLUG_DMAR: - case CAP_AUDIT_HOTPLUG_IRQR: - return cap_audit_hotplug(iommu, type); - default: - break; - } - - return -EFAULT; -} - -bool intel_cap_smts_sanity(void) -{ - return ecap_smts(intel_iommu_ecap_sanity); -} - -bool intel_cap_pasid_sanity(void) -{ - return ecap_pasid(intel_iommu_ecap_sanity); -} - -bool intel_cap_nest_sanity(void) -{ - return ecap_nest(intel_iommu_ecap_sanity); -} - -bool intel_cap_flts_sanity(void) -{ - return ecap_flts(intel_iommu_ecap_sanity); -} - -bool intel_cap_slts_sanity(void) -{ - return ecap_slts(intel_iommu_ecap_sanity); -} diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h deleted file mode 100644 index d07b75938961..000000000000 --- a/drivers/iommu/intel/cap_audit.h +++ /dev/null @@ -1,131 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * cap_audit.h - audit iommu capabilities header - * - * Copyright (C) 2021 Intel Corporation - * - * Author: Kyung Min Park <kyung.min.park@intel.com> - */ - -/* - * Capability Register Mask - */ -#define CAP_FL5LP_MASK BIT_ULL(60) -#define CAP_PI_MASK BIT_ULL(59) -#define CAP_FL1GP_MASK BIT_ULL(56) -#define CAP_RD_MASK BIT_ULL(55) -#define CAP_WD_MASK BIT_ULL(54) -#define CAP_MAMV_MASK GENMASK_ULL(53, 48) -#define CAP_NFR_MASK GENMASK_ULL(47, 40) -#define CAP_PSI_MASK BIT_ULL(39) -#define CAP_SLLPS_MASK GENMASK_ULL(37, 34) -#define CAP_FRO_MASK GENMASK_ULL(33, 24) -#define CAP_ZLR_MASK BIT_ULL(22) -#define CAP_MGAW_MASK GENMASK_ULL(21, 16) -#define CAP_SAGAW_MASK GENMASK_ULL(12, 8) -#define CAP_CM_MASK BIT_ULL(7) -#define CAP_PHMR_MASK BIT_ULL(6) -#define CAP_PLMR_MASK BIT_ULL(5) -#define CAP_RWBF_MASK BIT_ULL(4) -#define CAP_AFL_MASK BIT_ULL(3) -#define CAP_NDOMS_MASK GENMASK_ULL(2, 0) - -/* - * Extended Capability Register Mask - */ -#define ECAP_RPS_MASK BIT_ULL(49) -#define ECAP_SMPWC_MASK BIT_ULL(48) -#define ECAP_FLTS_MASK BIT_ULL(47) -#define ECAP_SLTS_MASK BIT_ULL(46) -#define ECAP_SLADS_MASK BIT_ULL(45) -#define ECAP_VCS_MASK BIT_ULL(44) -#define ECAP_SMTS_MASK BIT_ULL(43) -#define ECAP_PDS_MASK BIT_ULL(42) -#define ECAP_DIT_MASK BIT_ULL(41) -#define ECAP_PASID_MASK BIT_ULL(40) -#define ECAP_PSS_MASK GENMASK_ULL(39, 35) -#define ECAP_EAFS_MASK BIT_ULL(34) -#define ECAP_NWFS_MASK BIT_ULL(33) -#define ECAP_SRS_MASK BIT_ULL(31) -#define ECAP_ERS_MASK BIT_ULL(30) -#define ECAP_PRS_MASK BIT_ULL(29) -#define ECAP_NEST_MASK BIT_ULL(26) -#define ECAP_MTS_MASK BIT_ULL(25) -#define ECAP_MHMV_MASK GENMASK_ULL(23, 20) -#define ECAP_IRO_MASK GENMASK_ULL(17, 8) -#define ECAP_SC_MASK BIT_ULL(7) -#define ECAP_PT_MASK BIT_ULL(6) -#define ECAP_EIM_MASK BIT_ULL(4) -#define ECAP_DT_MASK BIT_ULL(2) -#define ECAP_QI_MASK BIT_ULL(1) -#define ECAP_C_MASK BIT_ULL(0) - -/* - * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each - * IOMMU gets audited. - */ -#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ -do { \ - if (cap##_##feature(a) != cap##_##feature(b)) { \ - intel_iommu_##cap##_sanity &= ~(MASK); \ - pr_info("IOMMU feature %s inconsistent", #feature); \ - } \ -} while (0) - -#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ - DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK) - -#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \ -do { \ - if (cap##_##feature(intel_iommu_##cap##_sanity)) \ - DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \ - (b)->cap, cap, feature, MASK); \ -} while (0) - -#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \ -do { \ - u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \ - min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \ - intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \ - min_feature; \ -} while (0) - -#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \ -do { \ - if ((intel_iommu_##cap##_sanity & (MASK)) > \ - (cap##_##feature((iommu)->cap))) \ - mismatch = true; \ - else \ - (iommu)->cap = ((iommu)->cap & ~(MASK)) | \ - (intel_iommu_##cap##_sanity & (MASK)); \ -} while (0) - -enum cap_audit_type { - CAP_AUDIT_STATIC_DMAR, - CAP_AUDIT_STATIC_IRQR, - CAP_AUDIT_HOTPLUG_DMAR, - CAP_AUDIT_HOTPLUG_IRQR, -}; - -bool intel_cap_smts_sanity(void); -bool intel_cap_pasid_sanity(void); -bool intel_cap_nest_sanity(void); -bool intel_cap_flts_sanity(void); -bool intel_cap_slts_sanity(void); - -static inline bool scalable_mode_support(void) -{ - return (intel_iommu_sm && intel_cap_smts_sanity()); -} - -static inline bool pasid_mode_support(void) -{ - return scalable_mode_support() && intel_cap_pasid_sanity(); -} - -static inline bool nested_mode_support(void) -{ - return scalable_mode_support() && intel_cap_nest_sanity(); -} - -int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 79e0da9eb626..85173fe0a18a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -29,7 +29,6 @@ #include "../irq_remapping.h" #include "../iommu-pages.h" #include "pasid.h" -#include "cap_audit.h" #include "perfmon.h" #define ROOT_SIZE VTD_PAGE_SIZE @@ -2118,10 +2117,6 @@ static int __init init_dmars(void) struct intel_iommu *iommu; int ret; - ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); - if (ret) - goto free_iommu; - for_each_iommu(iommu, drhd) { if (drhd->ignored) { iommu_disable_translation(iommu); @@ -2617,10 +2612,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) struct intel_iommu *iommu = dmaru->iommu; int ret; - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); - if (ret) - goto out; - /* * Disable translation if already enabled prior to OS handover. */ @@ -3250,10 +3241,15 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain, return 0; } +static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old); + static struct iommu_domain blocking_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { .attach_dev = blocking_domain_attach_dev, + .set_dev_pasid = blocking_domain_set_dev_pasid, } }; @@ -4090,22 +4086,26 @@ void domain_remove_dev_pasid(struct iommu_domain *domain, break; } } - WARN_ON_ONCE(!dev_pasid); spin_unlock_irqrestore(&dmar_domain->lock, flags); cache_tag_unassign_domain(dmar_domain, dev, pasid); domain_detach_iommu(dmar_domain, iommu); - intel_iommu_debugfs_remove_dev_pasid(dev_pasid); - kfree(dev_pasid); + if (!WARN_ON_ONCE(!dev_pasid)) { + intel_iommu_debugfs_remove_dev_pasid(dev_pasid); + kfree(dev_pasid); + } } -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) +static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) { struct device_domain_info *info = dev_iommu_priv_get(dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); - domain_remove_dev_pasid(domain, dev, pasid); + domain_remove_dev_pasid(old, dev, pasid); + + return 0; } struct dev_pasid_info * @@ -4445,21 +4445,6 @@ static struct iommu_domain identity_domain = { }, }; -static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; - struct dmar_domain *dmar_domain; - bool first_stage; - - first_stage = first_level_by_default(iommu); - dmar_domain = paging_domain_alloc(dev, first_stage); - if (IS_ERR(dmar_domain)) - return ERR_CAST(dmar_domain); - - return &dmar_domain->domain; -} - const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, @@ -4468,7 +4453,6 @@ const struct iommu_ops intel_iommu_ops = { .hw_info = intel_iommu_hw_info, .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags, .domain_alloc_sva = intel_svm_domain_alloc, - .domain_alloc_paging = intel_iommu_domain_alloc_paging, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, .release_device = intel_iommu_release_device, @@ -4478,7 +4462,6 @@ const struct iommu_ops intel_iommu_ops = { .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, - .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, .page_response = intel_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index f5402df72a9b..ad795c772f21 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -24,7 +24,6 @@ #include "iommu.h" #include "../irq_remapping.h" #include "../iommu-pages.h" -#include "cap_audit.h" enum irq_mode { IRQ_REMAPPING, @@ -727,9 +726,6 @@ static int __init intel_prepare_irq_remapping(void) if (dmar_table_init() < 0) return -ENODEV; - if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) - return -ENODEV; - if (!dmar_ir_support()) return -ENODEV; @@ -1533,10 +1529,6 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) int ret; int eim = x2apic_enabled(); - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu); - if (ret) - return ret; - if (eim && !ecap_eim_support(iommu->ecap)) { pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", iommu->reg_phys, iommu->ecap); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 5b7d85f1e143..fb59a7d35958 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -244,11 +244,31 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); - if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) { + if (WARN_ON(!pte)) { spin_unlock(&iommu->lock); return; } + if (!pasid_pte_is_present(pte)) { + if (!pasid_pte_is_fault_disabled(pte)) { + WARN_ON(READ_ONCE(pte->val[0]) != 0); + spin_unlock(&iommu->lock); + return; + } + + /* + * When a PASID is used for SVA by a device, it's possible + * that the pasid entry is non-present with the Fault + * Processing Disabled bit set. Clear the pasid entry and + * drain the PRQ for the PASID before return. + */ + pasid_clear_entry(pte); + spin_unlock(&iommu->lock); + intel_iommu_drain_pasid_prq(dev, pasid); + + return; + } + did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); intel_pasid_clear_entry(dev, pasid, fault_ignore); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 082f4fe20216..668d8ece6b14 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -73,6 +73,12 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte) return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT; } +/* Get FPD(Fault Processing Disable) bit of a PASID table entry */ +static inline bool pasid_pte_is_fault_disabled(struct pasid_entry *pte) +{ + return READ_ONCE(pte->val[0]) & PASID_PTE_FPD; +} + /* Get PGTT field of a PASID table entry */ static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte) { diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6b9bb58a414f..7632c80edea6 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -223,6 +223,34 @@ static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data) return ptes_per_table - (i & (ptes_per_table - 1)); } +/* + * Check if concatenated PGDs are mandatory according to Arm DDI0487 (K.a) + * 1) R_DXBSH: For 16KB, and 48-bit input size, use level 1 instead of 0. + * 2) R_SRKBC: After de-ciphering the table for PA size and valid initial lookup + * a) 40 bits PA size with 4K: use level 1 instead of level 0 (2 tables for ias = oas) + * b) 40 bits PA size with 16K: use level 2 instead of level 1 (16 tables for ias = oas) + * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas) + * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas) + */ +static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg, + struct arm_lpae_io_pgtable *data) +{ + unsigned int ias = cfg->ias; + unsigned int oas = cfg->oas; + + /* Covers 1 and 2.d */ + if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0)) + return (oas == 48) || (ias == 48); + + /* Covers 2.a and 2.c */ + if ((ARM_LPAE_GRANULE(data) == SZ_4K) && (data->start_level == 0)) + return (oas == 40) || (oas == 42); + + /* Case 2.b */ + return (ARM_LPAE_GRANULE(data) == SZ_16K) && + (data->start_level == 1) && (oas == 40); +} + static bool selftest_running = false; static dma_addr_t __arm_lpae_dma_addr(void *pages) @@ -676,85 +704,107 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov data->start_level, ptep); } +struct io_pgtable_walk_data { + struct io_pgtable *iop; + void *data; + int (*visit)(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size); + unsigned long flags; + u64 addr; + const u64 end; +}; + +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); + +struct iova_to_phys_data { + arm_lpae_iopte pte; + int lvl; +}; + +static int visit_iova_to_phys(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct iova_to_phys_data *data = walk_data->data; + data->pte = *ptep; + data->lvl = lvl; + return 0; +} + static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); - arm_lpae_iopte pte, *ptep = data->pgd; - int lvl = data->start_level; - - do { - /* Valid IOPTE pointer? */ - if (!ptep) - return 0; - - /* Grab the IOPTE we're interested in */ - ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); - pte = READ_ONCE(*ptep); - - /* Valid entry? */ - if (!pte) - return 0; + struct iova_to_phys_data d; + struct io_pgtable_walk_data walk_data = { + .data = &d, + .visit = visit_iova_to_phys, + .addr = iova, + .end = iova + 1, + }; + int ret; - /* Leaf entry? */ - if (iopte_leaf(pte, lvl, data->iop.fmt)) - goto found_translation; + ret = __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level); + if (ret) + return 0; - /* Take it to the next level */ - ptep = iopte_deref(pte, data); - } while (++lvl < ARM_LPAE_MAX_LEVELS); + iova &= (ARM_LPAE_BLOCK_SIZE(d.lvl, data) - 1); + return iopte_to_paddr(d.pte, data) | iova; +} - /* Ran out of page tables to walk */ +static int visit_pgtable_walk(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct arm_lpae_io_pgtable_walk_data *data = walk_data->data; + data->ptes[lvl] = *ptep; return 0; - -found_translation: - iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); - return iopte_to_paddr(pte, data) | iova; } -struct io_pgtable_walk_data { - struct iommu_dirty_bitmap *dirty; - unsigned long flags; - u64 addr; - const u64 end; -}; +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, + void *wd) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_walk_data walk_data = { + .data = wd, + .visit = visit_pgtable_walk, + .addr = iova, + .end = iova + 1, + }; -static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, - struct io_pgtable_walk_data *walk_data, - arm_lpae_iopte *ptep, - int lvl); + return __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level); +} -static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, - struct io_pgtable_walk_data *walk_data, - arm_lpae_iopte *ptep, int lvl) +static int io_pgtable_visit(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) { struct io_pgtable *iop = &data->iop; arm_lpae_iopte pte = READ_ONCE(*ptep); - if (iopte_leaf(pte, lvl, iop->fmt)) { - size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + int ret = walk_data->visit(walk_data, lvl, ptep, size); + if (ret) + return ret; - if (iopte_writeable_dirty(pte)) { - iommu_dirty_bitmap_record(walk_data->dirty, - walk_data->addr, size); - if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) - iopte_set_writeable_clean(ptep); - } + if (iopte_leaf(pte, lvl, iop->fmt)) { walk_data->addr += size; return 0; } - if (WARN_ON(!iopte_table(pte, lvl))) + if (!iopte_table(pte, lvl)) { return -EINVAL; + } ptep = iopte_deref(pte, data); - return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); + return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1); } -static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, - struct io_pgtable_walk_data *walk_data, - arm_lpae_iopte *ptep, - int lvl) +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) { u32 idx; int max_entries, ret; @@ -769,7 +819,7 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { - ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); + ret = io_pgtable_visit(data, walk_data, ptep + idx, lvl); if (ret) return ret; } @@ -777,6 +827,23 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, return 0; } +static int visit_dirty(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct iommu_dirty_bitmap *dirty = walk_data->data; + + if (!iopte_leaf(*ptep, lvl, walk_data->iop->fmt)) + return 0; + + if (iopte_writeable_dirty(*ptep)) { + iommu_dirty_bitmap_record(dirty, walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + + return 0; +} + static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags, @@ -785,7 +852,9 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct io_pgtable_cfg *cfg = &data->iop.cfg; struct io_pgtable_walk_data walk_data = { - .dirty = dirty, + .iop = &data->iop, + .data = dirty, + .visit = visit_dirty, .flags = flags, .addr = iova, .end = iova + size, @@ -800,7 +869,7 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, if (data->iop.fmt != ARM_64_LPAE_S1) return -EINVAL; - return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); + return __arm_lpae_iopte_walk(data, &walk_data, ptep, lvl); } static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) @@ -882,6 +951,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, + .pgtable_walk = arm_lpae_pgtable_walk, }; return data; @@ -1006,18 +1076,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) if (!data) return NULL; - /* - * Concatenate PGDs at level 1 if possible in order to reduce - * the depth of the stage-2 walk. - */ - if (data->start_level == 0) { - unsigned long pgd_pages; - - pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); - if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { - data->pgd_bits += data->bits_per_level; - data->start_level++; - } + if (arm_lpae_concat_mandatory(cfg, data)) { + if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) > + ARM_LPAE_S2_MAX_CONCAT_PAGES)) + return NULL; + data->pgd_bits += data->bits_per_level; + data->start_level++; } /* VTCR */ @@ -1364,15 +1428,14 @@ static int __init arm_lpae_do_selftests(void) SZ_64K | SZ_512M, }; - static const unsigned int ias[] __initconst = { + static const unsigned int address_size[] __initconst = { 32, 36, 40, 42, 44, 48, }; - int i, j, pass = 0, fail = 0; + int i, j, k, pass = 0, fail = 0; struct device dev; struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, - .oas = 48, .coherent_walk = true, .iommu_dev = &dev, }; @@ -1381,15 +1444,19 @@ static int __init arm_lpae_do_selftests(void) set_dev_node(&dev, NUMA_NO_NODE); for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { - for (j = 0; j < ARRAY_SIZE(ias); ++j) { - cfg.pgsize_bitmap = pgsize[i]; - cfg.ias = ias[j]; - pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", - pgsize[i], ias[j]); - if (arm_lpae_run_tests(&cfg)) - fail++; - else - pass++; + for (j = 0; j < ARRAY_SIZE(address_size); ++j) { + /* Don't use ias > oas as it is not valid for stage-2. */ + for (k = 0; k <= j; ++k) { + cfg.pgsize_bitmap = pgsize[i]; + cfg.ias = address_size[k]; + cfg.oas = address_size[j]; + pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u OAS %u\n", + pgsize[i], cfg.ias, cfg.oas); + if (arm_lpae_run_tests(&cfg)) + fail++; + else + pass++; + } } } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 599030e1e890..870c3cdbd0f6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2819,7 +2819,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (!ops) - return -EPROBE_DEFER; + return driver_deferred_probe_check_state(dev); if (fwspec) return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; @@ -3312,6 +3312,16 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); +static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_domain *blocked_domain = ops->blocked_domain; + + WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, + dev, pasid, domain)); +} + static int __iommu_set_group_pasid(struct iommu_domain *domain, struct iommu_group *group, ioasid_t pasid) { @@ -3330,11 +3340,9 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, err_revert: last_gdev = device; for_each_group_device(group, device) { - const struct iommu_ops *ops = dev_iommu_ops(device->dev); - if (device == last_gdev) break; - ops->remove_dev_pasid(device->dev, pasid, domain); + iommu_remove_dev_pasid(device->dev, pasid, domain); } return ret; } @@ -3344,12 +3352,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, struct iommu_domain *domain) { struct group_device *device; - const struct iommu_ops *ops; - for_each_group_device(group, device) { - ops = dev_iommu_ops(device->dev); - ops->remove_dev_pasid(device->dev, pasid, domain); - } + for_each_group_device(group, device) + iommu_remove_dev_pasid(device->dev, pasid, domain); } /* @@ -3368,16 +3373,20 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; struct group_device *device; + const struct iommu_ops *ops; int ret; - if (!domain->ops->set_dev_pasid) - return -EOPNOTSUPP; - if (!group) return -ENODEV; - if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner || - pasid == IOMMU_NO_PASID) + ops = dev_iommu_ops(dev); + + if (!domain->ops->set_dev_pasid || + !ops->blocked_domain || + !ops->blocked_domain->ops->set_dev_pasid) + return -EOPNOTSUPP; + + if (ops != domain->owner || pasid == IOMMU_NO_PASID) return -EINVAL; mutex_lock(&group->mutex); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index ce40f0a419ea..2769e4544038 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -725,47 +725,32 @@ static int msm_iommu_probe(struct platform_device *pdev) iommu->dev = &pdev->dev; INIT_LIST_HEAD(&iommu->ctx_list); - iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); + iommu->pclk = devm_clk_get_prepared(iommu->dev, "smmu_pclk"); if (IS_ERR(iommu->pclk)) return dev_err_probe(iommu->dev, PTR_ERR(iommu->pclk), "could not get smmu_pclk\n"); - ret = clk_prepare(iommu->pclk); - if (ret) - return dev_err_probe(iommu->dev, ret, - "could not prepare smmu_pclk\n"); - - iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); - if (IS_ERR(iommu->clk)) { - clk_unprepare(iommu->pclk); + iommu->clk = devm_clk_get_prepared(iommu->dev, "iommu_clk"); + if (IS_ERR(iommu->clk)) return dev_err_probe(iommu->dev, PTR_ERR(iommu->clk), "could not get iommu_clk\n"); - } - - ret = clk_prepare(iommu->clk); - if (ret) { - clk_unprepare(iommu->pclk); - return dev_err_probe(iommu->dev, ret, "could not prepare iommu_clk\n"); - } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); iommu->base = devm_ioremap_resource(iommu->dev, r); if (IS_ERR(iommu->base)) { ret = dev_err_probe(iommu->dev, PTR_ERR(iommu->base), "could not get iommu base\n"); - goto fail; + return ret; } ioaddr = r->start; iommu->irq = platform_get_irq(pdev, 0); - if (iommu->irq < 0) { - ret = -ENODEV; - goto fail; - } + if (iommu->irq < 0) + return -ENODEV; ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val); if (ret) { dev_err(iommu->dev, "could not get ncb\n"); - goto fail; + return ret; } iommu->ncb = val; @@ -780,8 +765,7 @@ static int msm_iommu_probe(struct platform_device *pdev) if (!par) { pr_err("Invalid PAR value detected\n"); - ret = -ENODEV; - goto fail; + return -ENODEV; } ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL, @@ -791,7 +775,7 @@ static int msm_iommu_probe(struct platform_device *pdev) iommu); if (ret) { pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret); - goto fail; + return ret; } list_add(&iommu->dev_node, &qcom_iommu_devices); @@ -800,23 +784,19 @@ static int msm_iommu_probe(struct platform_device *pdev) "msm-smmu.%pa", &ioaddr); if (ret) { pr_err("Could not add msm-smmu at %pa to sysfs\n", &ioaddr); - goto fail; + return ret; } ret = iommu_device_register(&iommu->iommu, &msm_iommu_ops, &pdev->dev); if (ret) { pr_err("Could not register msm-smmu at %pa\n", &ioaddr); - goto fail; + return ret; } pr_info("device mapped at %p, irq %d with %d ctx banks\n", iommu->base, iommu->irq, iommu->ncb); return ret; -fail: - clk_unprepare(iommu->clk); - clk_unprepare(iommu->pclk); - return ret; } static const struct of_device_id msm_iommu_dt_match[] = { @@ -824,20 +804,11 @@ static const struct of_device_id msm_iommu_dt_match[] = { {} }; -static void msm_iommu_remove(struct platform_device *pdev) -{ - struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); - - clk_unprepare(iommu->clk); - clk_unprepare(iommu->pclk); -} - static struct platform_driver msm_iommu_driver = { .driver = { .name = "msm_iommu", .of_match_table = msm_iommu_dt_match, }, .probe = msm_iommu_probe, - .remove = msm_iommu_remove, }; builtin_platform_driver(msm_iommu_driver); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ab60901f8f92..034b0e670384 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/soc/mediatek/infracfg.h> #include <linux/soc/mediatek/mtk_sip_svc.h> +#include <linux/string_choices.h> #include <asm/barrier.h> #include <soc/mediatek/smi.h> @@ -510,7 +511,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) bank->parent_dev, "fault type=0x%x iova=0x%llx pa=0x%llx master=0x%x(larb=%d port=%d) layer=%d %s\n", int_state, fault_iova, fault_pa, regval, fault_larb, fault_port, - layer, write ? "write" : "read"); + layer, str_write_read(write)); } /* Interrupt clear */ @@ -602,7 +603,7 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, larb_mmu->bank[portid] = upper_32_bits(region->iova_base); dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n", - enable ? "enable" : "disable", dev_name(larb_mmu->dev), + str_enable_disable(enable), dev_name(larb_mmu->dev), portid_msk, regionid, upper_32_bits(region->iova_base)); if (enable) @@ -630,8 +631,8 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, } if (ret) dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n", - enable ? "enable" : "disable", - dev_name(data->dev), portid_msk, ret); + str_enable_disable(enable), dev_name(data->dev), + portid_msk, ret); } return ret; } diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index b6de1ca00cef..a565b9e40f4a 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -25,6 +25,7 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/string_choices.h> #include <asm/barrier.h> #include <asm/dma-iommu.h> #include <dt-bindings/memory/mtk-memory-port.h> @@ -243,7 +244,7 @@ static void mtk_iommu_v1_config(struct mtk_iommu_v1_data *data, larb_mmu = &data->larb_imu[larbid]; dev_dbg(dev, "%s iommu port: %d\n", - enable ? "enable" : "disable", portid); + str_enable_disable(enable), portid); if (enable) larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e7a6a1611d19..97987cd78da9 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -29,8 +29,6 @@ static int of_iommu_xlate(struct device *dev, return -ENODEV; ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np)); - if (ret == -EPROBE_DEFER) - return driver_deferred_probe_check_state(dev); if (ret) return ret; diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c index c7a89143014c..d82d2b00904c 100644 --- a/drivers/iommu/riscv/iommu-pci.c +++ b/drivers/iommu/riscv/iommu-pci.c @@ -101,6 +101,13 @@ static void riscv_iommu_pci_remove(struct pci_dev *pdev) riscv_iommu_remove(iommu); } +static void riscv_iommu_pci_shutdown(struct pci_dev *pdev) +{ + struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev); + + riscv_iommu_disable(iommu); +} + static const struct pci_device_id riscv_iommu_pci_tbl[] = { {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), 0}, {PCI_VDEVICE(RIVOS, PCI_DEVICE_ID_RIVOS_RISCV_IOMMU_GA), 0}, @@ -112,6 +119,7 @@ static struct pci_driver riscv_iommu_pci_driver = { .id_table = riscv_iommu_pci_tbl, .probe = riscv_iommu_pci_probe, .remove = riscv_iommu_pci_remove, + .shutdown = riscv_iommu_pci_shutdown, .driver = { .suppress_bind_attrs = true, }, diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c index 382ba2841849..725e919b97ef 100644 --- a/drivers/iommu/riscv/iommu-platform.c +++ b/drivers/iommu/riscv/iommu-platform.c @@ -11,18 +11,43 @@ */ #include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include "iommu-bits.h" #include "iommu.h" +static void riscv_iommu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + struct device *dev = msi_desc_to_dev(desc); + struct riscv_iommu_device *iommu = dev_get_drvdata(dev); + u16 idx = desc->msi_index; + u64 addr; + + addr = ((u64)msg->address_hi << 32) | msg->address_lo; + + if (addr != (addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR)) { + dev_err_once(dev, + "uh oh, the IOMMU can't send MSIs to 0x%llx, sending to 0x%llx instead\n", + addr, addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR); + } + + addr &= RISCV_IOMMU_MSI_CFG_TBL_ADDR; + + riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_ADDR(idx), addr); + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_DATA(idx), msg->data); + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_CTRL(idx), 0); +} + static int riscv_iommu_platform_probe(struct platform_device *pdev) { + enum riscv_iommu_igs_settings igs; struct device *dev = &pdev->dev; struct riscv_iommu_device *iommu = NULL; struct resource *res = NULL; - int vec; + int vec, ret; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -40,16 +65,6 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev) iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES); iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL); - /* For now we only support WSI */ - switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) { - case RISCV_IOMMU_CAPABILITIES_IGS_WSI: - case RISCV_IOMMU_CAPABILITIES_IGS_BOTH: - break; - default: - return dev_err_probe(dev, -ENODEV, - "unable to use wire-signaled interrupts\n"); - } - iommu->irqs_count = platform_irq_count(pdev); if (iommu->irqs_count <= 0) return dev_err_probe(dev, -ENODEV, @@ -57,13 +72,58 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev) if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT) iommu->irqs_count = RISCV_IOMMU_INTR_COUNT; - for (vec = 0; vec < iommu->irqs_count; vec++) - iommu->irqs[vec] = platform_get_irq(pdev, vec); + igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps); + switch (igs) { + case RISCV_IOMMU_CAPABILITIES_IGS_BOTH: + case RISCV_IOMMU_CAPABILITIES_IGS_MSI: + if (is_of_node(dev->fwnode)) + of_msi_configure(dev, to_of_node(dev->fwnode)); + + if (!dev_get_msi_domain(dev)) { + dev_warn(dev, "failed to find an MSI domain\n"); + goto msi_fail; + } + + ret = platform_device_msi_init_and_alloc_irqs(dev, iommu->irqs_count, + riscv_iommu_write_msi_msg); + if (ret) { + dev_warn(dev, "failed to allocate MSIs\n"); + goto msi_fail; + } + + for (vec = 0; vec < iommu->irqs_count; vec++) + iommu->irqs[vec] = msi_get_virq(dev, vec); + + /* Enable message-signaled interrupts, fctl.WSI */ + if (iommu->fctl & RISCV_IOMMU_FCTL_WSI) { + iommu->fctl ^= RISCV_IOMMU_FCTL_WSI; + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl); + } + + dev_info(dev, "using MSIs\n"); + break; + +msi_fail: + if (igs != RISCV_IOMMU_CAPABILITIES_IGS_BOTH) { + return dev_err_probe(dev, -ENODEV, + "unable to use wire-signaled interrupts\n"); + } - /* Enable wire-signaled interrupts, fctl.WSI */ - if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) { - iommu->fctl |= RISCV_IOMMU_FCTL_WSI; - riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl); + fallthrough; + + case RISCV_IOMMU_CAPABILITIES_IGS_WSI: + for (vec = 0; vec < iommu->irqs_count; vec++) + iommu->irqs[vec] = platform_get_irq(pdev, vec); + + /* Enable wire-signaled interrupts, fctl.WSI */ + if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) { + iommu->fctl |= RISCV_IOMMU_FCTL_WSI; + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl); + } + dev_info(dev, "using wire-signaled interrupts\n"); + break; + default: + return dev_err_probe(dev, -ENODEV, "invalid IGS\n"); } return riscv_iommu_init(iommu); @@ -71,7 +131,18 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev) static void riscv_iommu_platform_remove(struct platform_device *pdev) { - riscv_iommu_remove(dev_get_drvdata(&pdev->dev)); + struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev); + bool msi = !(iommu->fctl & RISCV_IOMMU_FCTL_WSI); + + riscv_iommu_remove(iommu); + + if (msi) + platform_device_msi_free_irqs_all(&pdev->dev); +}; + +static void riscv_iommu_platform_shutdown(struct platform_device *pdev) +{ + riscv_iommu_disable(dev_get_drvdata(&pdev->dev)); }; static const struct of_device_id riscv_iommu_of_match[] = { @@ -82,6 +153,7 @@ static const struct of_device_id riscv_iommu_of_match[] = { static struct platform_driver riscv_iommu_platform_driver = { .probe = riscv_iommu_platform_probe, .remove = riscv_iommu_platform_remove, + .shutdown = riscv_iommu_platform_shutdown, .driver = { .name = "riscv,iommu", .of_match_table = riscv_iommu_of_match, diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 8a05def774bd..8f049d4a0e2c 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -240,6 +240,12 @@ static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu, return rc; } + /* Empty queue before enabling it */ + if (queue->qid == RISCV_IOMMU_INTR_CQ) + riscv_iommu_writel(queue->iommu, Q_TAIL(queue), 0); + else + riscv_iommu_writel(queue->iommu, Q_HEAD(queue), 0); + /* * Enable queue with interrupts, clear any memory fault if any. * Wait for the hardware to acknowledge request and activate queue @@ -645,9 +651,11 @@ static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iomm * This is best effort IOMMU translation shutdown flow. * Disable IOMMU without waiting for hardware response. */ -static void riscv_iommu_disable(struct riscv_iommu_device *iommu) +void riscv_iommu_disable(struct riscv_iommu_device *iommu) { - riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, 0); + riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, + FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, + RISCV_IOMMU_DDTP_IOMMU_MODE_BARE)); riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQCSR, 0); riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FQCSR, 0); riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0); @@ -1270,7 +1278,7 @@ static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, dma_addr_t iova) { struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - unsigned long pte_size; + size_t pte_size; unsigned long *ptr; ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h index b1c4664542b4..46df79dd5495 100644 --- a/drivers/iommu/riscv/iommu.h +++ b/drivers/iommu/riscv/iommu.h @@ -64,6 +64,7 @@ struct riscv_iommu_device { int riscv_iommu_init(struct riscv_iommu_device *iommu); void riscv_iommu_remove(struct riscv_iommu_device *iommu); +void riscv_iommu_disable(struct riscv_iommu_device *iommu); #define riscv_iommu_readl(iommu, addr) \ readl_relaxed((iommu)->reg + (addr)) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4b369419b32c..323cc665c357 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -25,6 +25,7 @@ #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/string_choices.h> #include "iommu-pages.h" @@ -611,7 +612,7 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id) dev_err(iommu->dev, "Page fault at %pad of type %s\n", &iova, - (flags == IOMMU_FAULT_WRITE) ? "write" : "read"); + str_write_read(flags == IOMMU_FAULT_WRITE)); log_iova(iommu, i, iova); diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h index c637e0997f6d..abec23c7744f 100644 --- a/include/linux/adreno-smmu-priv.h +++ b/include/linux/adreno-smmu-priv.h @@ -50,6 +50,11 @@ struct adreno_smmu_fault_info { * the GPU driver must call resume_translation() * @resume_translation: Resume translation after a fault * + * @set_prr_bit: [optional] Configure the GPU's Partially Resident + * Region (PRR) bit in the ACTLR register. + * @set_prr_addr: [optional] Configure the PRR_CFG_*ADDR register with + * the physical address of PRR page passed from GPU + * driver. * * The GPU driver (drm/msm) and adreno-smmu work together for controlling * the GPU's SMMU instance. This is by necessity, as the GPU is directly @@ -67,6 +72,8 @@ struct adreno_smmu_priv { void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info); void (*set_stall)(const void *cookie, bool enabled); void (*resume_translation)(const void *cookie, bool terminate); + void (*set_prr_bit)(const void *cookie, bool set); + void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr); }; #endif /* __ADRENO_SMMU_PRIV_H */ diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 2b90c48a6a87..062fbd4c9b77 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -31,11 +31,11 @@ struct amd_iommu_pi_data { struct task_struct; struct pci_dev; -extern int amd_iommu_detect(void); +extern void amd_iommu_detect(void); #else /* CONFIG_AMD_IOMMU */ -static inline int amd_iommu_detect(void) { return -ENODEV; } +static inline void amd_iommu_detect(void) { } #endif /* CONFIG_AMD_IOMMU */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ce86b09ae80f..bba2a51c87d2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -181,11 +181,21 @@ struct io_pgtable_cfg { }; /** + * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk + * + * @ptes: The recorded PTE values from the walk + */ +struct arm_lpae_io_pgtable_walk_data { + u64 ptes[4]; +}; + +/** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. + * @pgtable_walk: (optional) Perform a page table walk for a given iova. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -199,6 +209,7 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd); int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 318d27841130..38c65e92ecd0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -587,9 +587,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains - * @remove_dev_pasid: Remove any translation configurations of a specific - * pasid, so that any DMA transactions with this pasid - * will be blocked by the hardware. * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind * the @dev, as the set of virtualization resources shared/passed * to user space IOMMU instance. And associate it with a nesting @@ -647,8 +644,6 @@ struct iommu_ops { struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); - void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain); struct iommufd_viommu *(*viommu_alloc)( struct device *dev, struct iommu_domain *parent_domain, |