diff options
Diffstat (limited to 'arch/s390')
| -rw-r--r-- | arch/s390/Kconfig | 2 | ||||
| -rw-r--r-- | arch/s390/boot/vmem.c | 16 | ||||
| -rw-r--r-- | arch/s390/include/asm/bug.h | 5 | ||||
| -rw-r--r-- | arch/s390/include/asm/page.h | 2 | ||||
| -rw-r--r-- | arch/s390/include/asm/pci.h | 5 | ||||
| -rw-r--r-- | arch/s390/include/uapi/asm/ipl.h | 1 | ||||
| -rw-r--r-- | arch/s390/kernel/ipl.c | 48 | ||||
| -rw-r--r-- | arch/s390/kernel/stacktrace.c | 18 | ||||
| -rw-r--r-- | arch/s390/mm/gmap_helpers.c | 9 | ||||
| -rw-r--r-- | arch/s390/mm/pageattr.c | 2 | ||||
| -rw-r--r-- | arch/s390/mm/vmem.c | 14 | ||||
| -rw-r--r-- | arch/s390/pci/pci.c | 13 | ||||
| -rw-r--r-- | arch/s390/pci/pci_bus.c | 116 | ||||
| -rw-r--r-- | arch/s390/pci/pci_bus.h | 15 | ||||
| -rw-r--r-- | arch/s390/pci/pci_irq.c | 332 |
15 files changed, 407 insertions, 191 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 938e5df75b2d..0e5fad5f06ca 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -238,6 +238,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE @@ -254,6 +255,7 @@ config S390 select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select IRQ_MSI_LIB if PCI select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MMU_GATHER_MERGE_VMAS diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index fbe64ffdfb96..7d6cc4c85af0 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -244,22 +244,10 @@ static void *boot_crst_alloc(unsigned long val) static pte_t *boot_pte_alloc(void) { - static void *pte_leftover; pte_t *pte; - /* - * handling pte_leftovers this way helps to avoid memory fragmentation - * during POPULATE_KASAN_MAP_SHADOW when EDAT is off - */ - if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); - pte = pte_leftover + _PAGE_TABLE_SIZE; - __arch_set_page_dat(pte, 1); - } else { - pte = pte_leftover; - pte_leftover = NULL; - } - + pte = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + __arch_set_page_dat(pte, 1); memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index acb4b13d98c5..ee9221bb5d18 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -4,11 +4,14 @@ #include <linux/stringify.h> +#ifdef CONFIG_BUG + #ifndef CONFIG_DEBUG_BUGVERBOSE #define _BUGVERBOSE_LOCATION(file, line) #else #define __BUGVERBOSE_LOCATION(file, line) \ .pushsection .rodata.str, "aMS", @progbits, 1; \ + .align 2; \ 10002: .ascii file "\0"; \ .popsection; \ \ @@ -52,6 +55,8 @@ do { \ #define HAVE_ARCH_BUG +#endif /* CONFIG_BUG */ + #include <asm-generic/bug.h> #endif /* _ASM_S390_BUG_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 9240a363c893..c1d63b613bf9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -166,6 +166,8 @@ static inline int page_reset_referenced(unsigned long addr) return CC_TRANSFORM(cc); } +int split_pud_page(pud_t *pudp, unsigned long addr); + /* Bits int the storage key */ #define _PAGE_CHANGED 0x02 /* HW changed bit */ #define _PAGE_REFERENCED 0x04 /* HW referenced bit */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index a32f465ecf73..c0ff19dab580 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/mutex.h> #include <linux/iommu.h> +#include <linux/irqdomain.h> #include <linux/pci_hotplug.h> #include <asm/pci_clp.h> #include <asm/pci_debug.h> @@ -109,6 +110,7 @@ struct zpci_bus { struct list_head resources; struct list_head bus_next; struct resource bus_resource; + struct irq_domain *msi_parent_domain; int topo; /* TID if topo_is_tid, PCHID otherwise */ int domain_nr; u8 multifunction : 1; @@ -310,6 +312,9 @@ int zpci_dma_exit_device(struct zpci_dev *zdev); /* IRQ */ int __init zpci_irq_init(void); void __init zpci_irq_exit(void); +int zpci_set_irq(struct zpci_dev *zdev); +int zpci_create_parent_msi_domain(struct zpci_bus *zbus); +void zpci_remove_parent_msi_domain(struct zpci_bus *zbus); /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index 2cd28af50dd4..3d64a2251699 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -15,6 +15,7 @@ struct ipl_pl_hdr { #define IPL_PL_FLAG_IPLPS 0x80 #define IPL_PL_FLAG_SIPL 0x40 #define IPL_PL_FLAG_IPLSR 0x20 +#define IPL_PL_FLAG_SBP 0x10 /* IPL Parameter Block header */ struct ipl_pb_hdr { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 961a3d60a4dd..dcdc7e274848 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) +#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value) \ + IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + unsigned long long value; \ + if (sscanf(buf, _fmt_in, &value) != 1) \ + return -EINVAL; \ + (_value) = value; \ + (_hdr).flags &= ~IPL_PL_FLAG_SBP; \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, 0644, \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store) + #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ @@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->fcp.lun); -DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", - reipl_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", reipl_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_fcp->fcp.devno); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", + reipl_block_fcp->hdr, + reipl_block_fcp->fcp.bootprog); static void reipl_get_ascii_loadparm(char *loadparm, struct ipl_parameter_block *ibp) @@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", reipl_block_nvme->nvme.fid); DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", reipl_block_nvme->nvme.nsid); -DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", - reipl_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", reipl_block_nvme->nvme.br_lba); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->hdr, + reipl_block_nvme->nvme.bootprog); static struct attribute *reipl_nvme_attrs[] = { &sys_reipl_nvme_fid_attr.attr, @@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { }; DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd); -DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", - reipl_block_eckd->eckd.bootprog); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", + reipl_block_eckd->hdr, + reipl_block_eckd->eckd.bootprog); static struct attribute *reipl_eckd_attrs[] = { &sys_reipl_eckd_device_attr.attr, @@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->fcp.lun); -DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", - dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", dump_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", dump_block_fcp->fcp.devno); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", + dump_block_fcp->hdr, + dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr, dump_block_fcp->fcp, @@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n", dump_block_nvme->nvme.fid); DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n", dump_block_nvme->nvme.nsid); -DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", - dump_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n", dump_block_nvme->nvme.br_lba); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", + dump_block_nvme->hdr, + dump_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr, dump_block_nvme->nvme, @@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = { /* ECKD dump device attributes */ DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd); -DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", - dump_block_eckd->eckd.bootprog); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", + dump_block_eckd->hdr, + dump_block_eckd->eckd.bootprog); IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd); IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 3aae7f70e6ab..18520d333058 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo struct stack_frame_vdso_wrapper __user *sf_vdso; struct stack_frame_user __user *sf; unsigned long ip, sp; - bool first = true; if (!current->mm) return; @@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo if (__get_user(ip, &sf->gprs[8])) break; } - /* Sanity check: ABI requires SP to be 8 byte aligned. */ - if (sp & 0x7) + /* Validate SP and RA (ABI requires SP to be 8 byte aligned). */ + if (sp & 0x7 || ip_invalid(ip)) break; - if (ip_invalid(ip)) { - /* - * If the instruction address is invalid, and this - * is the first stack frame, assume r14 has not - * been written to the stack yet. Otherwise exit. - */ - if (!first) - break; - ip = regs->gprs[14]; - if (ip_invalid(ip)) - break; - } if (!store_ip(consume_entry, cookie, entry, perf, ip)) break; - first = false; } pagefault_enable(); } diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index 549f14ad08af..d41b19925a5a 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -47,6 +47,7 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) { struct vm_area_struct *vma; + unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; @@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) if (pte_swap(*ptep)) { preempt_disable(); pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); - ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); - pte_clear(mm, vmaddr, ptep); + if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO)) { + ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); + pte_clear(mm, vmaddr, ptep); + } pgste_set_unlock(ptep, pgste); preempt_enable(); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 3042647c9dbf..d3ce04a4b248 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -204,7 +204,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, return rc; } -static int split_pud_page(pud_t *pudp, unsigned long addr) +int split_pud_page(pud_t *pudp, unsigned long addr) { unsigned long pmd_addr, prot; pmd_t *pm_dir, *pmdp; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d96587b84e81..eeadff45e0e1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -330,10 +330,14 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (pud_leaf(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { + if (!direct) + vmem_free_pages(pud_deref(*pud), get_order(PUD_SIZE), altmap); pud_clear(pud); pages++; + continue; + } else { + split_pud_page(pud, addr & PUD_MASK); } - continue; } } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && @@ -433,9 +437,15 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + /* Don't mess with any tables not fully in 1:1 mapping, vmemmap & kasan area */ +#ifdef CONFIG_KASAN + if (WARN_ON_ONCE(!(start >= KASAN_SHADOW_START && end <= KASAN_SHADOW_END) && + end > __abs_lowcore)) + return -EINVAL; +#else if (WARN_ON_ONCE(end > __abs_lowcore)) return -EINVAL; +#endif for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 93d2c9c780fc..57f3980b98a9 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -708,6 +708,12 @@ int zpci_reenable_device(struct zpci_dev *zdev) if (rc) return rc; + if (zdev->msi_nr_irqs > 0) { + rc = zpci_set_irq(zdev); + if (rc) + return rc; + } + rc = zpci_iommu_register_ioat(zdev, &status); if (rc) zpci_disable_device(zdev); @@ -955,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev) } void zpci_release_device(struct kref *kref) + __releases(&zpci_list_lock) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); @@ -1142,6 +1149,7 @@ static void zpci_add_devices(struct list_head *scan_list) int zpci_scan_devices(void) { + struct zpci_bus *zbus; LIST_HEAD(scan_list); int rc; @@ -1150,7 +1158,10 @@ int zpci_scan_devices(void) return rc; zpci_add_devices(&scan_list); - zpci_bus_scan_busses(); + zpci_bus_for_each(zbus) { + zpci_bus_scan_bus(zbus); + cond_resched(); + } return 0; } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 72adc8f6e94f..42a13e451f64 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/seq_file.h> +#include <linux/irqdomain.h> #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> @@ -152,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus) return ret; } -/* zpci_bus_scan_busses - Scan all registered busses - * - * Scan all available zbusses - * - */ -void zpci_bus_scan_busses(void) -{ - struct zpci_bus *zbus = NULL; - - mutex_lock(&zbus_list_lock); - list_for_each_entry(zbus, &zbus_list, bus_next) { - zpci_bus_scan_bus(zbus); - cond_resched(); - } - mutex_unlock(&zbus_list_lock); -} - static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && @@ -198,25 +182,52 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s zbus->multifunction = zpci_bus_is_multifunction_root(fr); zbus->max_bus_speed = fr->max_bus_speed; + if (zpci_create_parent_msi_domain(zbus)) + goto out_free_domain; + /* * Note that the zbus->resources are taken over and zbus->resources * is empty after a successful call */ bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources); - if (!bus) { - zpci_free_domain(zbus->domain_nr); - return -EFAULT; - } + if (!bus) + goto out_remove_msi_domain; zbus->bus = bus; + dev_set_msi_domain(&zbus->bus->dev, zbus->msi_parent_domain); return 0; + +out_remove_msi_domain: + zpci_remove_parent_msi_domain(zbus); +out_free_domain: + zpci_free_domain(zbus->domain_nr); + return -ENOMEM; } -static void zpci_bus_release(struct kref *kref) +/** + * zpci_bus_release - Un-initialize resources associated with the zbus and + * free memory + * @kref: refcount * that is part of struct zpci_bus + * + * MUST be called with `zbus_list_lock` held, but the lock is released during + * run of the function. + */ +static inline void zpci_bus_release(struct kref *kref) + __releases(&zbus_list_lock) { struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref); + lockdep_assert_held(&zbus_list_lock); + + list_del(&zbus->bus_next); + mutex_unlock(&zbus_list_lock); + + /* + * At this point no-one should see this object, or be able to get a new + * reference to it. + */ + if (zbus->bus) { pci_lock_rescan_remove(); pci_stop_root_bus(zbus->bus); @@ -228,15 +239,19 @@ static void zpci_bus_release(struct kref *kref) pci_unlock_rescan_remove(); } - mutex_lock(&zbus_list_lock); - list_del(&zbus->bus_next); - mutex_unlock(&zbus_list_lock); + zpci_remove_parent_msi_domain(zbus); kfree(zbus); } -static void zpci_bus_put(struct zpci_bus *zbus) +static inline void __zpci_bus_get(struct zpci_bus *zbus) +{ + lockdep_assert_held(&zbus_list_lock); + kref_get(&zbus->kref); +} + +static inline void zpci_bus_put(struct zpci_bus *zbus) { - kref_put(&zbus->kref, zpci_bus_release); + kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock); } static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid) @@ -248,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid) if (!zbus->multifunction) continue; if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) { - kref_get(&zbus->kref); + __zpci_bus_get(zbus); goto out_unlock; } } @@ -258,6 +273,44 @@ out_unlock: return zbus; } +/** + * zpci_bus_get_next - get the next zbus object from given position in the list + * @pos: current position/cursor in the global zbus list + * + * Acquires and releases references as the cursor iterates (might also free/ + * release the cursor). Is tolerant of concurrent operations on the list. + * + * To begin the iteration, set *@pos to %NULL before calling the function. + * + * *@pos is set to %NULL in cases where either the list is empty, or *@pos is + * the last element in the list. + * + * Context: Process context. May sleep. + */ +void zpci_bus_get_next(struct zpci_bus **pos) +{ + struct zpci_bus *curp = *pos, *next = NULL; + + mutex_lock(&zbus_list_lock); + if (curp) + next = list_next_entry(curp, bus_next); + else + next = list_first_entry(&zbus_list, typeof(*curp), bus_next); + + if (list_entry_is_head(next, &zbus_list, bus_next)) + next = NULL; + + if (next) + __zpci_bus_get(next); + + *pos = next; + mutex_unlock(&zbus_list_lock); + + /* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */ + if (curp) + zpci_bus_put(curp); +} + static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) { struct zpci_bus *zbus; @@ -269,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) zbus->topo = topo; zbus->topo_is_tid = topo_is_tid; INIT_LIST_HEAD(&zbus->bus_next); - mutex_lock(&zbus_list_lock); - list_add_tail(&zbus->bus_next, &zbus_list); - mutex_unlock(&zbus_list_lock); kref_init(&zbus->kref); INIT_LIST_HEAD(&zbus->resources); @@ -281,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) zbus->bus_resource.flags = IORESOURCE_BUS; pci_add_resource(&zbus->resources, &zbus->bus_resource); + mutex_lock(&zbus_list_lock); + list_add_tail(&zbus->bus_next, &zbus_list); + mutex_unlock(&zbus_list_lock); + return zbus; } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index ae3d7a9159bd..e440742e3145 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); void zpci_bus_device_unregister(struct zpci_dev *zdev); int zpci_bus_scan_bus(struct zpci_bus *zbus); -void zpci_bus_scan_busses(void); +void zpci_bus_get_next(struct zpci_bus **pos); + +/** + * zpci_bus_for_each - iterate over all the registered zbus objects + * @pos: a struct zpci_bus * as cursor + * + * Acquires and releases references as the cursor iterates over the registered + * objects. Is tolerant against concurrent removals of objects. + * + * Context: Process context. May sleep. + */ +#define zpci_bus_for_each(pos) \ + for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \ + zpci_bus_get_next(&(pos))) int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 2a06df8c2498..e9dd45f3c09d 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -6,6 +6,7 @@ #include <linux/kernel_stat.h> #include <linux/pci.h> #include <linux/msi.h> +#include <linux/irqchip/irq-msi-lib.h> #include <linux/smp.h> #include <asm/isc.h> @@ -97,7 +98,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) } /* Register adapter interruptions */ -static int zpci_set_irq(struct zpci_dev *zdev) +int zpci_set_irq(struct zpci_dev *zdev) { int rc; @@ -125,27 +126,53 @@ static int zpci_clear_irq(struct zpci_dev *zdev) static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest, bool force) { - struct msi_desc *entry = irq_data_get_msi_desc(data); - struct msi_msg msg = entry->msg; - int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); + irq_data_update_affinity(data, dest); + return IRQ_SET_MASK_OK; +} - msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpu_addr << 8); - pci_write_msi_msg(data->irq, &msg); +/* + * Encode the hwirq number for the parent domain. The encoding must be unique + * for each IRQ of each device in the parent domain, so it uses the devfn to + * identify the device and the msi_index to identify the IRQ within that device. + */ +static inline u32 zpci_encode_hwirq(u8 devfn, u16 msi_index) +{ + return (devfn << 16) | msi_index; +} - return IRQ_SET_MASK_OK; +static inline u16 zpci_decode_hwirq_msi_index(irq_hw_number_t hwirq) +{ + return hwirq & 0xffff; +} + +static void zpci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + + if (irq_delivery == DIRECTED) { + int cpu = cpumask_first(irq_data_get_affinity_mask(data)); + + msg->address_lo = zdev->msi_addr & 0xff0000ff; + msg->address_lo |= (smp_cpu_get_cpu_address(cpu) << 8); + } else { + msg->address_lo = zdev->msi_addr & 0xffffffff; + } + msg->address_hi = zdev->msi_addr >> 32; + msg->data = zpci_decode_hwirq_msi_index(data->hwirq); } static struct irq_chip zpci_irq_chip = { .name = "PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, + .irq_compose_msi_msg = zpci_compose_msi_msg, }; static void zpci_handle_cpu_local_irq(bool rescan) { struct airq_iv *dibv = zpci_ibv[smp_processor_id()]; union zpci_sic_iib iib = {{0}}; + struct irq_domain *msi_domain; + irq_hw_number_t hwirq; unsigned long bit; int irqs_on = 0; @@ -163,7 +190,9 @@ static void zpci_handle_cpu_local_irq(bool rescan) continue; } inc_irq_stat(IRQIO_MSI); - generic_handle_irq(airq_iv_get_data(dibv, bit)); + hwirq = airq_iv_get_data(dibv, bit); + msi_domain = (struct irq_domain *)airq_iv_get_ptr(dibv, bit); + generic_handle_domain_irq(msi_domain, hwirq); } } @@ -228,6 +257,8 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, struct tpi_info *tpi_info) { union zpci_sic_iib iib = {{0}}; + struct irq_domain *msi_domain; + irq_hw_number_t hwirq; unsigned long si, ai; struct airq_iv *aibv; int irqs_on = 0; @@ -255,7 +286,9 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, break; inc_irq_stat(IRQIO_MSI); airq_iv_lock(aibv, ai); - generic_handle_irq(airq_iv_get_data(aibv, ai)); + hwirq = airq_iv_get_data(aibv, ai); + msi_domain = (struct irq_domain *)airq_iv_get_ptr(aibv, ai); + generic_handle_domain_irq(msi_domain, hwirq); airq_iv_unlock(aibv, ai); } } @@ -277,7 +310,9 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, zdev->aisb = *bit; /* Create adapter interrupt vector */ - zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); + zdev->aibv = airq_iv_create(msi_vecs, + AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_BITLOCK, + NULL); if (!zdev->aibv) return -ENOMEM; @@ -289,146 +324,220 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, return 0; } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +bool arch_restore_msi_irqs(struct pci_dev *pdev) { - unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; struct zpci_dev *zdev = to_zpci(pdev); - struct msi_desc *msi; - struct msi_msg msg; - unsigned long bit; - int cpu_addr; - int rc, irq; + zpci_set_irq(zdev); + return true; +} + +static struct airq_struct zpci_airq = { + .handler = zpci_floating_irq_handler, + .isc = PCI_ISC, +}; + +static void zpci_msi_teardown_directed(struct zpci_dev *zdev) +{ + airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->max_msi); + zdev->msi_first_bit = -1U; + zdev->msi_nr_irqs = 0; +} + +static void zpci_msi_teardown_floating(struct zpci_dev *zdev) +{ + airq_iv_release(zdev->aibv); + zdev->aibv = NULL; + airq_iv_free_bit(zpci_sbv, zdev->aisb); zdev->aisb = -1UL; zdev->msi_first_bit = -1U; + zdev->msi_nr_irqs = 0; +} + +static void zpci_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) +{ + struct zpci_dev *zdev = to_zpci_dev(domain->dev); + + zpci_clear_irq(zdev); + if (irq_delivery == DIRECTED) + zpci_msi_teardown_directed(zdev); + else + zpci_msi_teardown_floating(zdev); +} + +static int zpci_msi_prepare(struct irq_domain *domain, + struct device *dev, int nvec, + msi_alloc_info_t *info) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + struct pci_dev *pdev = to_pci_dev(dev); + unsigned long bit; + int msi_vecs, rc; msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); if (msi_vecs < nvec) { - pr_info("%s requested %d irqs, allocate system limit of %d", + pr_info("%s requested %d IRQs, allocate system limit of %d\n", pci_name(pdev), nvec, zdev->max_msi); } rc = __alloc_airq(zdev, msi_vecs, &bit); - if (rc < 0) + if (rc) { + pr_err("Allocating adapter IRQs for %s failed\n", pci_name(pdev)); return rc; + } - /* - * Request MSI interrupts: - * When using MSI, nvec_used interrupt sources and their irq - * descriptors are controlled through one msi descriptor. - * Thus the outer loop over msi descriptors shall run only once, - * while two inner loops iterate over the interrupt vectors. - * When using MSI-X, each interrupt vector/irq descriptor - * is bound to exactly one msi descriptor (nvec_used is one). - * So the inner loops are executed once, while the outer iterates - * over the MSI-X descriptors. - */ - hwirq = bit; - msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - if (hwirq - bit >= msi_vecs) - break; - irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); - irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); - if (irq < 0) - return -ENOMEM; + zdev->msi_first_bit = bit; + zdev->msi_nr_irqs = msi_vecs; + rc = zpci_set_irq(zdev); + if (rc) { + pr_err("Registering adapter IRQs for %s failed\n", + pci_name(pdev)); + + if (irq_delivery == DIRECTED) + zpci_msi_teardown_directed(zdev); + else + zpci_msi_teardown_floating(zdev); + return rc; + } + return 0; +} - for (i = 0; i < irqs_per_msi; i++) { - rc = irq_set_msi_desc_off(irq, i, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq + i, &zpci_irq_chip, - handle_percpu_irq); - } +static int zpci_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct msi_desc *desc = ((msi_alloc_info_t *)args)->desc; + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + struct zpci_bus *zbus = zdev->zbus; + unsigned int cpu, hwirq; + unsigned long bit; + int i; - msg.data = hwirq - bit; - if (irq_delivery == DIRECTED) { - if (msi->affinity) - cpu = cpumask_first(&msi->affinity->mask); - else - cpu = 0; - cpu_addr = smp_cpu_get_cpu_address(cpu); + bit = zdev->msi_first_bit + desc->msi_index; + hwirq = zpci_encode_hwirq(zdev->devfn, desc->msi_index); - msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= (cpu_addr << 8); + if (desc->msi_index + nr_irqs > zdev->max_msi) + return -EINVAL; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, hwirq + i, + &zpci_irq_chip, zdev, + handle_percpu_irq, NULL, NULL); + + if (irq_delivery == DIRECTED) { for_each_possible_cpu(cpu) { - for (i = 0; i < irqs_per_msi; i++) - airq_iv_set_data(zpci_ibv[cpu], - hwirq + i, irq + i); + airq_iv_set_ptr(zpci_ibv[cpu], bit + i, + (unsigned long)zbus->msi_parent_domain); + airq_iv_set_data(zpci_ibv[cpu], bit + i, hwirq + i); } } else { - msg.address_lo = zdev->msi_addr & 0xffffffff; - for (i = 0; i < irqs_per_msi; i++) - airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); + airq_iv_set_ptr(zdev->aibv, bit + i, + (unsigned long)zbus->msi_parent_domain); + airq_iv_set_data(zdev->aibv, bit + i, hwirq + i); } - msg.address_hi = zdev->msi_addr >> 32; - pci_write_msi_msg(irq, &msg); - hwirq += irqs_per_msi; } - zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = hwirq - bit; - - rc = zpci_set_irq(zdev); - if (rc) - return rc; - - return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; + return 0; } -void arch_teardown_msi_irqs(struct pci_dev *pdev) +static void zpci_msi_clear_airq(struct irq_data *d, int i) { - struct zpci_dev *zdev = to_zpci(pdev); - struct msi_desc *msi; - unsigned int i; - int rc; + struct msi_desc *desc = irq_data_get_msi_desc(d); + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + unsigned long bit; + unsigned int cpu; + u16 msi_index; - /* Disable interrupts */ - rc = zpci_clear_irq(zdev); - if (rc) - return; + msi_index = zpci_decode_hwirq_msi_index(d->hwirq); + bit = zdev->msi_first_bit + msi_index; - /* Release MSI interrupts */ - msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - for (i = 0; i < msi->nvec_used; i++) { - irq_set_msi_desc(msi->irq + i, NULL); - irq_free_desc(msi->irq + i); + if (irq_delivery == DIRECTED) { + for_each_possible_cpu(cpu) { + airq_iv_set_ptr(zpci_ibv[cpu], bit + i, 0); + airq_iv_set_data(zpci_ibv[cpu], bit + i, 0); } - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; + } else { + airq_iv_set_ptr(zdev->aibv, bit + i, 0); + airq_iv_set_data(zdev->aibv, bit + i, 0); } +} - if (zdev->aisb != -1UL) { - zpci_ibv[zdev->aisb] = NULL; - airq_iv_free_bit(zpci_sbv, zdev->aisb); - zdev->aisb = -1UL; - } - if (zdev->aibv) { - airq_iv_release(zdev->aibv); - zdev->aibv = NULL; - } +static void zpci_msi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d; + int i; - if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U) - airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs); + for (i = 0; i < nr_irqs; i++) { + d = irq_domain_get_irq_data(domain, virq + i); + zpci_msi_clear_airq(d, i); + irq_domain_reset_irq_data(d); + } } -bool arch_restore_msi_irqs(struct pci_dev *pdev) +static const struct irq_domain_ops zpci_msi_domain_ops = { + .alloc = zpci_msi_domain_alloc, + .free = zpci_msi_domain_free, +}; + +static bool zpci_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info) { - struct zpci_dev *zdev = to_zpci(pdev); + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + info->ops->msi_prepare = zpci_msi_prepare; + info->ops->msi_teardown = zpci_msi_teardown; - zpci_set_irq(zdev); return true; } -static struct airq_struct zpci_airq = { - .handler = zpci_floating_irq_handler, - .isc = PCI_ISC, +static struct msi_parent_ops zpci_msi_parent_ops = { + .supported_flags = MSI_GENERIC_FLAGS_MASK | + MSI_FLAG_PCI_MSIX | + MSI_FLAG_MULTI_PCI_MSI, + .required_flags = MSI_FLAG_USE_DEF_DOM_OPS | + MSI_FLAG_USE_DEF_CHIP_OPS, + .init_dev_msi_info = zpci_init_dev_msi_info, }; +int zpci_create_parent_msi_domain(struct zpci_bus *zbus) +{ + char fwnode_name[18]; + + snprintf(fwnode_name, sizeof(fwnode_name), "ZPCI_MSI_DOM_%04x", zbus->domain_nr); + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_fwnode(fwnode_name), + .ops = &zpci_msi_domain_ops, + }; + + if (!info.fwnode) { + pr_err("Failed to allocate fwnode for MSI IRQ domain\n"); + return -ENOMEM; + } + + if (irq_delivery == FLOATING) + zpci_msi_parent_ops.required_flags |= MSI_FLAG_NO_AFFINITY; + + zbus->msi_parent_domain = msi_create_parent_irq_domain(&info, &zpci_msi_parent_ops); + if (!zbus->msi_parent_domain) { + irq_domain_free_fwnode(info.fwnode); + pr_err("Failed to create MSI IRQ domain\n"); + return -ENOMEM; + } + + return 0; +} + +void zpci_remove_parent_msi_domain(struct zpci_bus *zbus) +{ + struct fwnode_handle *fn; + + fn = zbus->msi_parent_domain->fwnode; + irq_domain_remove(zbus->msi_parent_domain); + irq_domain_free_fwnode(fn); +} + static void __init cpu_enable_directed_irq(void *unused) { union zpci_sic_iib iib = {{0}}; @@ -465,6 +574,7 @@ static int __init zpci_directed_irq_init(void) * is only done on the first vector. */ zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE, + AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_CACHELINE | (!cpu ? AIRQ_IV_ALLOC : 0), NULL); |
