diff options
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/alpha/include/asm/console.h | 4 | ||||
| -rw-r--r-- | arch/alpha/include/asm/page.h | 4 | ||||
| -rw-r--r-- | arch/alpha/include/asm/pal.h | 4 | ||||
| -rw-r--r-- | arch/alpha/include/asm/thread_info.h | 8 | ||||
| -rw-r--r-- | arch/alpha/include/uapi/asm/ioctls.h | 8 | ||||
| -rw-r--r-- | arch/arm/Kconfig | 4 | ||||
| -rw-r--r-- | arch/arm/include/asm/word-at-a-time.h | 10 | ||||
| -rw-r--r-- | arch/arm/mm/alignment.c | 6 | ||||
| -rw-r--r-- | arch/arm/mm/fault.c | 100 | ||||
| -rw-r--r-- | arch/s390/Kconfig | 2 | ||||
| -rw-r--r-- | arch/s390/boot/vmem.c | 16 | ||||
| -rw-r--r-- | arch/s390/include/asm/bug.h | 5 | ||||
| -rw-r--r-- | arch/s390/include/asm/page.h | 2 | ||||
| -rw-r--r-- | arch/s390/include/asm/pci.h | 5 | ||||
| -rw-r--r-- | arch/s390/mm/gmap_helpers.c | 9 | ||||
| -rw-r--r-- | arch/s390/mm/pageattr.c | 2 | ||||
| -rw-r--r-- | arch/s390/mm/vmem.c | 14 | ||||
| -rw-r--r-- | arch/s390/pci/pci.c | 6 | ||||
| -rw-r--r-- | arch/s390/pci/pci_bus.c | 18 | ||||
| -rw-r--r-- | arch/s390/pci/pci_irq.c | 332 |
20 files changed, 378 insertions, 181 deletions
diff --git a/arch/alpha/include/asm/console.h b/arch/alpha/include/asm/console.h index 088b7b9eb15a..1cabdb6064bb 100644 --- a/arch/alpha/include/asm/console.h +++ b/arch/alpha/include/asm/console.h @@ -4,7 +4,7 @@ #include <uapi/asm/console.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern long callback_puts(long unit, const char *s, long length); extern long callback_getc(long unit); extern long callback_open_console(void); @@ -26,5 +26,5 @@ struct crb_struct; struct hwrpb_struct; extern int callback_init_done; extern void * callback_init(void *); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __AXP_CONSOLE_H */ diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 5ec4c77e432e..d2c6667d73e9 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -6,7 +6,7 @@ #include <asm/pal.h> #include <vdso/page.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define STRICT_MM_TYPECHECKS @@ -74,7 +74,7 @@ typedef struct page *pgtable_t; #define PAGE_OFFSET 0xfffffc0000000000 #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET) #define __va(x) ((void *)((unsigned long) (x) + PAGE_OFFSET)) diff --git a/arch/alpha/include/asm/pal.h b/arch/alpha/include/asm/pal.h index db2b3b18b34c..799a64c05198 100644 --- a/arch/alpha/include/asm/pal.h +++ b/arch/alpha/include/asm/pal.h @@ -4,7 +4,7 @@ #include <uapi/asm/pal.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void halt(void) __attribute__((noreturn)); #define __halt() __asm__ __volatile__ ("call_pal %0 #halt" : : "i" (PAL_halt)) @@ -183,5 +183,5 @@ qemu_get_vmtime(void) return v0; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ALPHA_PAL_H */ diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 4a4d00b37986..98ccbca64984 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -4,14 +4,14 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> #include <asm/types.h> #include <asm/hwrpb.h> #include <asm/sysinfo.h> #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct thread_info { struct pcb_struct pcb; /* palcode state */ @@ -44,7 +44,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); register unsigned long *current_stack_pointer __asm__ ("$30"); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Thread information allocation. */ #define THREAD_SIZE_ORDER 1 @@ -110,7 +110,7 @@ register unsigned long *current_stack_pointer __asm__ ("$30"); put_user(res, (int __user *)(value)); \ }) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void __save_fpu(void); static inline void save_fpu(void) diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index 971311605288..a09d04b49cc6 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -23,10 +23,10 @@ #define TCSETSW _IOW('t', 21, struct termios) #define TCSETSF _IOW('t', 22, struct termios) -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x40127417 +#define TCSETA 0x80127418 +#define TCSETAW 0x80127419 +#define TCSETAF 0x8012741c #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ff61891abe53..fa83c040ee2d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -82,7 +82,7 @@ config ARM select HAS_IOPORT select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && (!PREEMPT_RT || !SMP) select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL @@ -1213,7 +1213,7 @@ config HIGHMEM config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" if EXPERT - depends on HIGHMEM + depends on HIGHMEM && !PREEMPT_RT default y help The VM uses one page of physical memory for each page table. diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index f9a3897b06e7..5023f98d8293 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -67,7 +67,7 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, offset; + unsigned long ret, tmp; /* Load word from unaligned pointer addr */ asm( @@ -75,9 +75,9 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) "2:\n" " .pushsection .text.fixup,\"ax\"\n" " .align 2\n" - "3: and %1, %2, #0x3\n" - " bic %2, %2, #0x3\n" - " ldr %0, [%2]\n" + "3: bic %1, %2, #0x3\n" + " ldr %0, [%1]\n" + " and %1, %2, #0x3\n" " lsl %1, %1, #0x3\n" #ifndef __ARMEB__ " lsr %0, %0, %1\n" @@ -90,7 +90,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) " .align 3\n" " .long 1b, 3b\n" " .popsection" - : "=&r" (ret), "=&r" (offset) + : "=&r" (ret), "=&r" (tmp) : "r" (addr), "Qo" (*(unsigned long *)addr)); return ret; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 3c6ddb1afdc4..812380f30ae3 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -19,10 +19,11 @@ #include <linux/init.h> #include <linux/sched/signal.h> #include <linux/uaccess.h> +#include <linux/unaligned.h> #include <asm/cp15.h> #include <asm/system_info.h> -#include <linux/unaligned.h> +#include <asm/system_misc.h> #include <asm/opcodes.h> #include "fault.h" @@ -809,6 +810,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) int thumb2_32b = 0; int fault; + if (addr >= TASK_SIZE && user_mode(regs)) + harden_branch_predictor(); + if (interrupts_enabled(regs)) local_irq_enable(); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 2bc828a1940c..ed4330cc3f4e 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -128,6 +128,19 @@ static inline bool is_translation_fault(unsigned int fsr) return false; } +static inline bool is_permission_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) + return true; +#else + if (fs == FS_L1_PERM || fs == FS_L2_PERM) + return true; +#endif + return false; +} + static void die_kernel_fault(const char *msg, struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -162,6 +175,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, */ if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; + } else if (is_permission_fault(fsr) && fsr & FSR_LNX_PF) { + msg = "execution of memory"; } else { if (is_translation_fault(fsr) && kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) @@ -183,9 +198,6 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, { struct task_struct *tsk = current; - if (addr > TASK_SIZE) - harden_branch_predictor(); - #ifdef CONFIG_DEBUG_USER if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { @@ -225,19 +237,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } #ifdef CONFIG_MMU -static inline bool is_permission_fault(unsigned int fsr) -{ - int fs = fsr_fs(fsr); -#ifdef CONFIG_ARM_LPAE - if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) - return true; -#else - if (fs == FS_L1_PERM || fs == FS_L2_PERM) - return true; -#endif - return false; -} - #ifdef CONFIG_CPU_TTBR0_PAN static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) { @@ -260,6 +259,37 @@ static inline bool ttbr0_usermode_access_allowed(struct pt_regs *regs) #endif static int __kprobes +do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr, + unsigned int fsr, struct pt_regs *regs) +{ + if (user_mode(regs)) { + /* + * Fault from user mode for a kernel space address. User mode + * should not be faulting in kernel space, which includes the + * vector/khelper page. Handle the branch predictor hardening + * while interrupts are still disabled, then send a SIGSEGV. + */ + harden_branch_predictor(); + __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); + } else { + /* + * Fault from kernel mode. Enable interrupts if they were + * enabled in the parent context. Section (upper page table) + * translation faults are handled via do_translation_fault(), + * so we will only get here for a non-present kernel space + * PTE or PTE permission fault. This may happen in exceptional + * circumstances and need the fixup tables to be walked. + */ + if (interrupts_enabled(regs)) + local_irq_enable(); + + __do_kernel_fault(mm, addr, fsr, regs); + } + + return 0; +} + +static int __kprobes do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -272,6 +302,12 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (kprobe_page_fault(regs, fsr)) return 0; + /* + * Handle kernel addresses faults separately, which avoids touching + * the mmap lock from contexts that are not able to sleep. + */ + if (addr >= TASK_SIZE) + return do_kernel_address_page_fault(mm, addr, fsr, regs); /* Enable interrupts if they were enabled in the parent context. */ if (interrupts_enabled(regs)) @@ -448,16 +484,20 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * We enter here because the first level page table doesn't contain * a valid entry for the address. * - * If the address is in kernel space (>= TASK_SIZE), then we are - * probably faulting in the vmalloc() area. + * If this is a user address (addr < TASK_SIZE), we handle this as a + * normal page fault. This leaves the remainder of the function to handle + * kernel address translation faults. + * + * Since user mode is not permitted to access kernel addresses, pass these + * directly to do_kernel_address_page_fault() to handle. * - * If the init_task's first level page tables contains the relevant - * entry, we copy the it to this task. If not, we send the process - * a signal, fixup the exception, or oops the kernel. + * Otherwise, we're probably faulting in the vmalloc() area, so try to fix + * that up. Note that we must not take any locks or enable interrupts in + * this case. * - * NOTE! We MUST NOT take any locks for this case. We may be in an - * interrupt or a critical region, and should only copy the information - * from the master page table, nothing more. + * If vmalloc() fixup fails, that means the non-leaf page tables did not + * contain an entry for this address, so handle this via + * do_kernel_address_page_fault(). */ #ifdef CONFIG_MMU static int __kprobes @@ -523,7 +563,8 @@ do_translation_fault(unsigned long addr, unsigned int fsr, return 0; bad_area: - do_bad_area(addr, fsr, regs); + do_kernel_address_page_fault(current->mm, addr, fsr, regs); + return 0; } #else /* CONFIG_MMU */ @@ -543,7 +584,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + /* + * If this is a kernel address, but from user mode, then userspace + * is trying bad stuff. Invoke the branch predictor handling. + * Interrupts are disabled here. + */ + if (addr >= TASK_SIZE && user_mode(regs)) + harden_branch_predictor(); + do_bad_area(addr, fsr, regs); + return 0; } #endif /* CONFIG_ARM_LPAE */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 938e5df75b2d..0e5fad5f06ca 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -238,6 +238,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE @@ -254,6 +255,7 @@ config S390 select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select IRQ_MSI_LIB if PCI select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MMU_GATHER_MERGE_VMAS diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index fbe64ffdfb96..7d6cc4c85af0 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -244,22 +244,10 @@ static void *boot_crst_alloc(unsigned long val) static pte_t *boot_pte_alloc(void) { - static void *pte_leftover; pte_t *pte; - /* - * handling pte_leftovers this way helps to avoid memory fragmentation - * during POPULATE_KASAN_MAP_SHADOW when EDAT is off - */ - if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); - pte = pte_leftover + _PAGE_TABLE_SIZE; - __arch_set_page_dat(pte, 1); - } else { - pte = pte_leftover; - pte_leftover = NULL; - } - + pte = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + __arch_set_page_dat(pte, 1); memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index acb4b13d98c5..ee9221bb5d18 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -4,11 +4,14 @@ #include <linux/stringify.h> +#ifdef CONFIG_BUG + #ifndef CONFIG_DEBUG_BUGVERBOSE #define _BUGVERBOSE_LOCATION(file, line) #else #define __BUGVERBOSE_LOCATION(file, line) \ .pushsection .rodata.str, "aMS", @progbits, 1; \ + .align 2; \ 10002: .ascii file "\0"; \ .popsection; \ \ @@ -52,6 +55,8 @@ do { \ #define HAVE_ARCH_BUG +#endif /* CONFIG_BUG */ + #include <asm-generic/bug.h> #endif /* _ASM_S390_BUG_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 9240a363c893..c1d63b613bf9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -166,6 +166,8 @@ static inline int page_reset_referenced(unsigned long addr) return CC_TRANSFORM(cc); } +int split_pud_page(pud_t *pudp, unsigned long addr); + /* Bits int the storage key */ #define _PAGE_CHANGED 0x02 /* HW changed bit */ #define _PAGE_REFERENCED 0x04 /* HW referenced bit */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index a32f465ecf73..c0ff19dab580 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/mutex.h> #include <linux/iommu.h> +#include <linux/irqdomain.h> #include <linux/pci_hotplug.h> #include <asm/pci_clp.h> #include <asm/pci_debug.h> @@ -109,6 +110,7 @@ struct zpci_bus { struct list_head resources; struct list_head bus_next; struct resource bus_resource; + struct irq_domain *msi_parent_domain; int topo; /* TID if topo_is_tid, PCHID otherwise */ int domain_nr; u8 multifunction : 1; @@ -310,6 +312,9 @@ int zpci_dma_exit_device(struct zpci_dev *zdev); /* IRQ */ int __init zpci_irq_init(void); void __init zpci_irq_exit(void); +int zpci_set_irq(struct zpci_dev *zdev); +int zpci_create_parent_msi_domain(struct zpci_bus *zbus); +void zpci_remove_parent_msi_domain(struct zpci_bus *zbus); /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index 549f14ad08af..d41b19925a5a 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -47,6 +47,7 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) { struct vm_area_struct *vma; + unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; @@ -65,9 +66,13 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) if (pte_swap(*ptep)) { preempt_disable(); pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); - ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); - pte_clear(mm, vmaddr, ptep); + if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO)) { + ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); + pte_clear(mm, vmaddr, ptep); + } pgste_set_unlock(ptep, pgste); preempt_enable(); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 3042647c9dbf..d3ce04a4b248 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -204,7 +204,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, return rc; } -static int split_pud_page(pud_t *pudp, unsigned long addr) +int split_pud_page(pud_t *pudp, unsigned long addr) { unsigned long pmd_addr, prot; pmd_t *pm_dir, *pmdp; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index d96587b84e81..eeadff45e0e1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -330,10 +330,14 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (pud_leaf(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { + if (!direct) + vmem_free_pages(pud_deref(*pud), get_order(PUD_SIZE), altmap); pud_clear(pud); pages++; + continue; + } else { + split_pud_page(pud, addr & PUD_MASK); } - continue; } } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && @@ -433,9 +437,15 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; - /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + /* Don't mess with any tables not fully in 1:1 mapping, vmemmap & kasan area */ +#ifdef CONFIG_KASAN + if (WARN_ON_ONCE(!(start >= KASAN_SHADOW_START && end <= KASAN_SHADOW_END) && + end > __abs_lowcore)) + return -EINVAL; +#else if (WARN_ON_ONCE(end > __abs_lowcore)) return -EINVAL; +#endif for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 93d2c9c780fc..5a6ace9d875a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -708,6 +708,12 @@ int zpci_reenable_device(struct zpci_dev *zdev) if (rc) return rc; + if (zdev->msi_nr_irqs > 0) { + rc = zpci_set_irq(zdev); + if (rc) + return rc; + } + rc = zpci_iommu_register_ioat(zdev, &status); if (rc) zpci_disable_device(zdev); diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 72adc8f6e94f..66c4bd888b29 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/seq_file.h> +#include <linux/irqdomain.h> #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> @@ -198,19 +199,27 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s zbus->multifunction = zpci_bus_is_multifunction_root(fr); zbus->max_bus_speed = fr->max_bus_speed; + if (zpci_create_parent_msi_domain(zbus)) + goto out_free_domain; + /* * Note that the zbus->resources are taken over and zbus->resources * is empty after a successful call */ bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources); - if (!bus) { - zpci_free_domain(zbus->domain_nr); - return -EFAULT; - } + if (!bus) + goto out_remove_msi_domain; zbus->bus = bus; + dev_set_msi_domain(&zbus->bus->dev, zbus->msi_parent_domain); return 0; + +out_remove_msi_domain: + zpci_remove_parent_msi_domain(zbus); +out_free_domain: + zpci_free_domain(zbus->domain_nr); + return -ENOMEM; } static void zpci_bus_release(struct kref *kref) @@ -231,6 +240,7 @@ static void zpci_bus_release(struct kref *kref) mutex_lock(&zbus_list_lock); list_del(&zbus->bus_next); mutex_unlock(&zbus_list_lock); + zpci_remove_parent_msi_domain(zbus); kfree(zbus); } diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 2a06df8c2498..e9dd45f3c09d 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -6,6 +6,7 @@ #include <linux/kernel_stat.h> #include <linux/pci.h> #include <linux/msi.h> +#include <linux/irqchip/irq-msi-lib.h> #include <linux/smp.h> #include <asm/isc.h> @@ -97,7 +98,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) } /* Register adapter interruptions */ -static int zpci_set_irq(struct zpci_dev *zdev) +int zpci_set_irq(struct zpci_dev *zdev) { int rc; @@ -125,27 +126,53 @@ static int zpci_clear_irq(struct zpci_dev *zdev) static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest, bool force) { - struct msi_desc *entry = irq_data_get_msi_desc(data); - struct msi_msg msg = entry->msg; - int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); + irq_data_update_affinity(data, dest); + return IRQ_SET_MASK_OK; +} - msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpu_addr << 8); - pci_write_msi_msg(data->irq, &msg); +/* + * Encode the hwirq number for the parent domain. The encoding must be unique + * for each IRQ of each device in the parent domain, so it uses the devfn to + * identify the device and the msi_index to identify the IRQ within that device. + */ +static inline u32 zpci_encode_hwirq(u8 devfn, u16 msi_index) +{ + return (devfn << 16) | msi_index; +} - return IRQ_SET_MASK_OK; +static inline u16 zpci_decode_hwirq_msi_index(irq_hw_number_t hwirq) +{ + return hwirq & 0xffff; +} + +static void zpci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + + if (irq_delivery == DIRECTED) { + int cpu = cpumask_first(irq_data_get_affinity_mask(data)); + + msg->address_lo = zdev->msi_addr & 0xff0000ff; + msg->address_lo |= (smp_cpu_get_cpu_address(cpu) << 8); + } else { + msg->address_lo = zdev->msi_addr & 0xffffffff; + } + msg->address_hi = zdev->msi_addr >> 32; + msg->data = zpci_decode_hwirq_msi_index(data->hwirq); } static struct irq_chip zpci_irq_chip = { .name = "PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, + .irq_compose_msi_msg = zpci_compose_msi_msg, }; static void zpci_handle_cpu_local_irq(bool rescan) { struct airq_iv *dibv = zpci_ibv[smp_processor_id()]; union zpci_sic_iib iib = {{0}}; + struct irq_domain *msi_domain; + irq_hw_number_t hwirq; unsigned long bit; int irqs_on = 0; @@ -163,7 +190,9 @@ static void zpci_handle_cpu_local_irq(bool rescan) continue; } inc_irq_stat(IRQIO_MSI); - generic_handle_irq(airq_iv_get_data(dibv, bit)); + hwirq = airq_iv_get_data(dibv, bit); + msi_domain = (struct irq_domain *)airq_iv_get_ptr(dibv, bit); + generic_handle_domain_irq(msi_domain, hwirq); } } @@ -228,6 +257,8 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, struct tpi_info *tpi_info) { union zpci_sic_iib iib = {{0}}; + struct irq_domain *msi_domain; + irq_hw_number_t hwirq; unsigned long si, ai; struct airq_iv *aibv; int irqs_on = 0; @@ -255,7 +286,9 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, break; inc_irq_stat(IRQIO_MSI); airq_iv_lock(aibv, ai); - generic_handle_irq(airq_iv_get_data(aibv, ai)); + hwirq = airq_iv_get_data(aibv, ai); + msi_domain = (struct irq_domain *)airq_iv_get_ptr(aibv, ai); + generic_handle_domain_irq(msi_domain, hwirq); airq_iv_unlock(aibv, ai); } } @@ -277,7 +310,9 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, zdev->aisb = *bit; /* Create adapter interrupt vector */ - zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); + zdev->aibv = airq_iv_create(msi_vecs, + AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_BITLOCK, + NULL); if (!zdev->aibv) return -ENOMEM; @@ -289,146 +324,220 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, return 0; } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +bool arch_restore_msi_irqs(struct pci_dev *pdev) { - unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; struct zpci_dev *zdev = to_zpci(pdev); - struct msi_desc *msi; - struct msi_msg msg; - unsigned long bit; - int cpu_addr; - int rc, irq; + zpci_set_irq(zdev); + return true; +} + +static struct airq_struct zpci_airq = { + .handler = zpci_floating_irq_handler, + .isc = PCI_ISC, +}; + +static void zpci_msi_teardown_directed(struct zpci_dev *zdev) +{ + airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->max_msi); + zdev->msi_first_bit = -1U; + zdev->msi_nr_irqs = 0; +} + +static void zpci_msi_teardown_floating(struct zpci_dev *zdev) +{ + airq_iv_release(zdev->aibv); + zdev->aibv = NULL; + airq_iv_free_bit(zpci_sbv, zdev->aisb); zdev->aisb = -1UL; zdev->msi_first_bit = -1U; + zdev->msi_nr_irqs = 0; +} + +static void zpci_msi_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) +{ + struct zpci_dev *zdev = to_zpci_dev(domain->dev); + + zpci_clear_irq(zdev); + if (irq_delivery == DIRECTED) + zpci_msi_teardown_directed(zdev); + else + zpci_msi_teardown_floating(zdev); +} + +static int zpci_msi_prepare(struct irq_domain *domain, + struct device *dev, int nvec, + msi_alloc_info_t *info) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + struct pci_dev *pdev = to_pci_dev(dev); + unsigned long bit; + int msi_vecs, rc; msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); if (msi_vecs < nvec) { - pr_info("%s requested %d irqs, allocate system limit of %d", + pr_info("%s requested %d IRQs, allocate system limit of %d\n", pci_name(pdev), nvec, zdev->max_msi); } rc = __alloc_airq(zdev, msi_vecs, &bit); - if (rc < 0) + if (rc) { + pr_err("Allocating adapter IRQs for %s failed\n", pci_name(pdev)); return rc; + } - /* - * Request MSI interrupts: - * When using MSI, nvec_used interrupt sources and their irq - * descriptors are controlled through one msi descriptor. - * Thus the outer loop over msi descriptors shall run only once, - * while two inner loops iterate over the interrupt vectors. - * When using MSI-X, each interrupt vector/irq descriptor - * is bound to exactly one msi descriptor (nvec_used is one). - * So the inner loops are executed once, while the outer iterates - * over the MSI-X descriptors. - */ - hwirq = bit; - msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - if (hwirq - bit >= msi_vecs) - break; - irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); - irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); - if (irq < 0) - return -ENOMEM; + zdev->msi_first_bit = bit; + zdev->msi_nr_irqs = msi_vecs; + rc = zpci_set_irq(zdev); + if (rc) { + pr_err("Registering adapter IRQs for %s failed\n", + pci_name(pdev)); + + if (irq_delivery == DIRECTED) + zpci_msi_teardown_directed(zdev); + else + zpci_msi_teardown_floating(zdev); + return rc; + } + return 0; +} - for (i = 0; i < irqs_per_msi; i++) { - rc = irq_set_msi_desc_off(irq, i, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq + i, &zpci_irq_chip, - handle_percpu_irq); - } +static int zpci_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct msi_desc *desc = ((msi_alloc_info_t *)args)->desc; + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + struct zpci_bus *zbus = zdev->zbus; + unsigned int cpu, hwirq; + unsigned long bit; + int i; - msg.data = hwirq - bit; - if (irq_delivery == DIRECTED) { - if (msi->affinity) - cpu = cpumask_first(&msi->affinity->mask); - else - cpu = 0; - cpu_addr = smp_cpu_get_cpu_address(cpu); + bit = zdev->msi_first_bit + desc->msi_index; + hwirq = zpci_encode_hwirq(zdev->devfn, desc->msi_index); - msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= (cpu_addr << 8); + if (desc->msi_index + nr_irqs > zdev->max_msi) + return -EINVAL; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, hwirq + i, + &zpci_irq_chip, zdev, + handle_percpu_irq, NULL, NULL); + + if (irq_delivery == DIRECTED) { for_each_possible_cpu(cpu) { - for (i = 0; i < irqs_per_msi; i++) - airq_iv_set_data(zpci_ibv[cpu], - hwirq + i, irq + i); + airq_iv_set_ptr(zpci_ibv[cpu], bit + i, + (unsigned long)zbus->msi_parent_domain); + airq_iv_set_data(zpci_ibv[cpu], bit + i, hwirq + i); } } else { - msg.address_lo = zdev->msi_addr & 0xffffffff; - for (i = 0; i < irqs_per_msi; i++) - airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); + airq_iv_set_ptr(zdev->aibv, bit + i, + (unsigned long)zbus->msi_parent_domain); + airq_iv_set_data(zdev->aibv, bit + i, hwirq + i); } - msg.address_hi = zdev->msi_addr >> 32; - pci_write_msi_msg(irq, &msg); - hwirq += irqs_per_msi; } - zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = hwirq - bit; - - rc = zpci_set_irq(zdev); - if (rc) - return rc; - - return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; + return 0; } -void arch_teardown_msi_irqs(struct pci_dev *pdev) +static void zpci_msi_clear_airq(struct irq_data *d, int i) { - struct zpci_dev *zdev = to_zpci(pdev); - struct msi_desc *msi; - unsigned int i; - int rc; + struct msi_desc *desc = irq_data_get_msi_desc(d); + struct zpci_dev *zdev = to_zpci_dev(desc->dev); + unsigned long bit; + unsigned int cpu; + u16 msi_index; - /* Disable interrupts */ - rc = zpci_clear_irq(zdev); - if (rc) - return; + msi_index = zpci_decode_hwirq_msi_index(d->hwirq); + bit = zdev->msi_first_bit + msi_index; - /* Release MSI interrupts */ - msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - for (i = 0; i < msi->nvec_used; i++) { - irq_set_msi_desc(msi->irq + i, NULL); - irq_free_desc(msi->irq + i); + if (irq_delivery == DIRECTED) { + for_each_possible_cpu(cpu) { + airq_iv_set_ptr(zpci_ibv[cpu], bit + i, 0); + airq_iv_set_data(zpci_ibv[cpu], bit + i, 0); } - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; + } else { + airq_iv_set_ptr(zdev->aibv, bit + i, 0); + airq_iv_set_data(zdev->aibv, bit + i, 0); } +} - if (zdev->aisb != -1UL) { - zpci_ibv[zdev->aisb] = NULL; - airq_iv_free_bit(zpci_sbv, zdev->aisb); - zdev->aisb = -1UL; - } - if (zdev->aibv) { - airq_iv_release(zdev->aibv); - zdev->aibv = NULL; - } +static void zpci_msi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d; + int i; - if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U) - airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs); + for (i = 0; i < nr_irqs; i++) { + d = irq_domain_get_irq_data(domain, virq + i); + zpci_msi_clear_airq(d, i); + irq_domain_reset_irq_data(d); + } } -bool arch_restore_msi_irqs(struct pci_dev *pdev) +static const struct irq_domain_ops zpci_msi_domain_ops = { + .alloc = zpci_msi_domain_alloc, + .free = zpci_msi_domain_free, +}; + +static bool zpci_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info) { - struct zpci_dev *zdev = to_zpci(pdev); + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + info->ops->msi_prepare = zpci_msi_prepare; + info->ops->msi_teardown = zpci_msi_teardown; - zpci_set_irq(zdev); return true; } -static struct airq_struct zpci_airq = { - .handler = zpci_floating_irq_handler, - .isc = PCI_ISC, +static struct msi_parent_ops zpci_msi_parent_ops = { + .supported_flags = MSI_GENERIC_FLAGS_MASK | + MSI_FLAG_PCI_MSIX | + MSI_FLAG_MULTI_PCI_MSI, + .required_flags = MSI_FLAG_USE_DEF_DOM_OPS | + MSI_FLAG_USE_DEF_CHIP_OPS, + .init_dev_msi_info = zpci_init_dev_msi_info, }; +int zpci_create_parent_msi_domain(struct zpci_bus *zbus) +{ + char fwnode_name[18]; + + snprintf(fwnode_name, sizeof(fwnode_name), "ZPCI_MSI_DOM_%04x", zbus->domain_nr); + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_fwnode(fwnode_name), + .ops = &zpci_msi_domain_ops, + }; + + if (!info.fwnode) { + pr_err("Failed to allocate fwnode for MSI IRQ domain\n"); + return -ENOMEM; + } + + if (irq_delivery == FLOATING) + zpci_msi_parent_ops.required_flags |= MSI_FLAG_NO_AFFINITY; + + zbus->msi_parent_domain = msi_create_parent_irq_domain(&info, &zpci_msi_parent_ops); + if (!zbus->msi_parent_domain) { + irq_domain_free_fwnode(info.fwnode); + pr_err("Failed to create MSI IRQ domain\n"); + return -ENOMEM; + } + + return 0; +} + +void zpci_remove_parent_msi_domain(struct zpci_bus *zbus) +{ + struct fwnode_handle *fn; + + fn = zbus->msi_parent_domain->fwnode; + irq_domain_remove(zbus->msi_parent_domain); + irq_domain_free_fwnode(fn); +} + static void __init cpu_enable_directed_irq(void *unused) { union zpci_sic_iib iib = {{0}}; @@ -465,6 +574,7 @@ static int __init zpci_directed_irq_init(void) * is only done on the first vector. */ zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE, + AIRQ_IV_PTR | AIRQ_IV_DATA | AIRQ_IV_CACHELINE | (!cpu ? AIRQ_IV_ALLOC : 0), NULL); |
