diff options
Diffstat (limited to 'arch/um/kernel')
| -rw-r--r-- | arch/um/kernel/Makefile | 1 | ||||
| -rw-r--r-- | arch/um/kernel/asm-offsets.c | 44 | ||||
| -rw-r--r-- | arch/um/kernel/irq.c | 27 | ||||
| -rw-r--r-- | arch/um/kernel/kmsg_dump.c | 2 | ||||
| -rw-r--r-- | arch/um/kernel/ksyms.c | 2 | ||||
| -rw-r--r-- | arch/um/kernel/mem.c | 111 | ||||
| -rw-r--r-- | arch/um/kernel/physmem.c | 71 | ||||
| -rw-r--r-- | arch/um/kernel/process.c | 18 | ||||
| -rw-r--r-- | arch/um/kernel/skas/mmu.c | 33 | ||||
| -rw-r--r-- | arch/um/kernel/skas/process.c | 19 | ||||
| -rw-r--r-- | arch/um/kernel/smp.c | 242 | ||||
| -rw-r--r-- | arch/um/kernel/time.c | 58 | ||||
| -rw-r--r-- | arch/um/kernel/tlb.c | 5 | ||||
| -rw-r--r-- | arch/um/kernel/trap.c | 2 | ||||
| -rw-r--r-- | arch/um/kernel/um_arch.c | 49 |
15 files changed, 445 insertions, 239 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index b8f4e9281599..be60bc451b3f 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_OF) += dtb.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_SMP) += smp.o USER_OBJS := config.o diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c index a69873aa697f..d38447e39d5e 100644 --- a/arch/um/kernel/asm-offsets.c +++ b/arch/um/kernel/asm-offsets.c @@ -1,3 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define COMPILE_OFFSETS +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/crypto.h> +#include <linux/kbuild.h> +#include <linux/audit.h> +#include <linux/fs.h> +#include <asm/mman.h> +#include <asm/seccomp.h> -#include <sysdep/kernel-offsets.h> +/* workaround for a warning with -Wmissing-prototypes */ +void foo(void); + +void foo(void) +{ + DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); + + DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); + DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); + DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); + + DEFINE(UM_GFP_KERNEL, GFP_KERNEL); + DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); + + DEFINE(UM_THREAD_SIZE, THREAD_SIZE); + + DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); + + DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); + + DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); + + DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE); + DEFINE(HOSTFS_ATTR_UID, ATTR_UID); + DEFINE(HOSTFS_ATTR_GID, ATTR_GID); + DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE); + DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME); + DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME); + DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME); + DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET); + DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET); +} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index d69d137a0334..f4b13f15a9c1 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -22,6 +22,9 @@ #include <irq_kern.h> #include <linux/time-internal.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* When epoll triggers we do not know why it did so * we can also have different IRQs for read and write. @@ -683,7 +686,7 @@ void __init init_IRQ(void) { int i; - irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); + irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq); for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); @@ -701,3 +704,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si, { do_IRQ(SIGCHLD_IRQ, regs); } + +/* + * /proc/interrupts printing for arch specific interrupts + */ +int arch_show_interrupts(struct seq_file *p, int prec) +{ +#if IS_ENABLED(CONFIG_SMP) + int cpu; + + seq_printf(p, "%*s: ", prec, "RES"); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count); + seq_puts(p, " Rescheduling interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CAL"); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count); + seq_puts(p, " Function call interrupts\n"); +#endif + + return 0; +} diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 419021175272..fc0f543d1d8e 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -31,7 +31,7 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, * expected to output the crash information. */ if (strcmp(con->name, "ttynull") != 0 && - (console_srcu_read_flags(con) & CON_ENABLED)) { + console_is_usable(con, console_srcu_read_flags(con), true)) { break; } } diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index f2fb77da08cf..96314c31e61c 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -6,8 +6,8 @@ #include <linux/module.h> #include <os.h> +EXPORT_SYMBOL(um_get_signals); EXPORT_SYMBOL(um_set_signals); -EXPORT_SYMBOL(signals_enabled); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 32e3b1972dc1..39c4a7e21c6f 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -71,7 +71,7 @@ void __init arch_mm_preinit(void) /* Map in the area just after the brk now that kmalloc is about * to be turned on. */ - brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); + brk_end = PAGE_ALIGN((unsigned long) sbrk(0)); map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; @@ -84,109 +84,6 @@ void __init mem_init(void) kmalloc_ok = 1; } -#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static void __init one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, - PAGE_SIZE); - if (!pte) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_pmd(pmd, __pmd(_KERNPG_TABLE + - (unsigned long) __pa(pte))); - BUG_ON(pte != pte_offset_kernel(pmd, 0)); - } -} - -static void __init one_md_table_init(pud_t *pud) -{ -#if CONFIG_PGTABLE_LEVELS > 2 - pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pmd_table) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table))); - BUG_ON(pmd_table != pmd_offset(pud, 0)); -#endif -} - -static void __init one_ud_table_init(p4d_t *p4d) -{ -#if CONFIG_PGTABLE_LEVELS > 3 - pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pud_table) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table))); - BUG_ON(pud_table != pud_offset(p4d, 0)); -#endif -} - -static void __init fixrange_init(unsigned long start, unsigned long end, - pgd_t *pgd_base) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - int i, j; - unsigned long vaddr; - - vaddr = start; - i = pgd_index(vaddr); - j = pmd_index(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { - p4d = p4d_offset(pgd, vaddr); - if (p4d_none(*p4d)) - one_ud_table_init(p4d); - pud = pud_offset(p4d, vaddr); - if (pud_none(*pud)) - one_md_table_init(pud); - pmd = pmd_offset(pud, vaddr); - for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { - one_page_table_init(pmd); - vaddr += PMD_SIZE; - } - j = 0; - } -} - -static void __init fixaddr_user_init( void) -{ - long size = FIXADDR_USER_END - FIXADDR_USER_START; - pte_t *pte; - phys_t p; - unsigned long v, vaddr = FIXADDR_USER_START; - - if (!size) - return; - - fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); - v = (unsigned long) memblock_alloc_low(size, PAGE_SIZE); - if (!v) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, size, PAGE_SIZE); - - memcpy((void *) v , (void *) FIXADDR_USER_START, size); - p = __pa(v); - for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, - p += PAGE_SIZE) { - pte = virt_to_kpte(vaddr); - pte_set_val(*pte, p, PAGE_READONLY); - } -} -#endif - void __init paging_init(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; @@ -197,12 +94,8 @@ void __init paging_init(void) panic("%s: Failed to allocate %lu bytes align=%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); - max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; + max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT; free_area_init(max_zone_pfn); - -#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) - fixaddr_user_init(); -#endif } /* diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index af02b5f9911d..ae6ca373c261 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -105,19 +105,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) fd = physmem_fd; *offset_out = phys; } - else if (phys < __pa(end_iomem)) { - struct iomem_region *region = iomem_regions; - - while (region != NULL) { - if ((phys >= region->phys) && - (phys < region->phys + region->size)) { - fd = region->fd; - *offset_out = phys - region->phys; - break; - } - region = region->next; - } - } return fd; } @@ -140,61 +127,3 @@ __uml_setup("mem=", uml_mem_setup, " be more, and the excess, if it's ever used, will just be swapped out.\n" " Example: mem=64M\n\n" ); - -__uml_setup("iomem=", parse_iomem, -"iomem=<name>,<file>\n" -" Configure <file> as an IO memory region named <name>.\n\n" -); - -/* - * This list is constructed in parse_iomem and addresses filled in - * setup_iomem, both of which run during early boot. Afterwards, it's - * unchanged. - */ -struct iomem_region *iomem_regions; - -/* Initialized in parse_iomem and unchanged thereafter */ -int iomem_size; - -unsigned long find_iomem(char *driver, unsigned long *len_out) -{ - struct iomem_region *region = iomem_regions; - - while (region != NULL) { - if (!strcmp(region->driver, driver)) { - *len_out = region->size; - return region->virt; - } - - region = region->next; - } - - return 0; -} -EXPORT_SYMBOL(find_iomem); - -static int setup_iomem(void) -{ - struct iomem_region *region = iomem_regions; - unsigned long iomem_start = high_physmem + PAGE_SIZE; - int err; - - while (region != NULL) { - err = os_map_memory((void *) iomem_start, region->fd, 0, - region->size, 1, 1, 0); - if (err) - printk(KERN_ERR "Mapping iomem region for driver '%s' " - "failed, errno = %d\n", region->driver, -err); - else { - region->virt = iomem_start; - region->phys = __pa(region->virt); - } - - iomem_start += region->size + PAGE_SIZE; - region = region->next; - } - - return 0; -} - -__initcall(setup_iomem); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 9c9c66dc45f0..63b38a3f73f7 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,7 +43,9 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct task_struct *cpu_tasks[NR_CPUS]; +struct task_struct *cpu_tasks[NR_CPUS] = { + [0 ... NR_CPUS - 1] = &init_task, +}; EXPORT_SYMBOL(cpu_tasks); void free_stack(unsigned long stack, int order) @@ -185,11 +187,7 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) void initial_thread_cb(void (*proc)(void *), void *arg) { - int save_kmalloc_ok = kmalloc_ok; - - kmalloc_ok = 0; initial_thread_cb_skas(proc, arg); - kmalloc_ok = save_kmalloc_ok; } int arch_dup_task_struct(struct task_struct *dst, @@ -220,11 +218,21 @@ void arch_cpu_idle(void) um_idle_sleep(); } +void arch_cpu_idle_prepare(void) +{ + os_idle_prepare(); +} + int __uml_cant_sleep(void) { return in_atomic() || irqs_disabled() || in_interrupt(); /* Is in_interrupt() really needed? */ } +int uml_need_resched(void) +{ + return need_resched(); +} + extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; void do_uml_exitcalls(void) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index afe9a2f251ef..00957788591b 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -23,17 +23,36 @@ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); static spinlock_t mm_list_lock; static struct list_head mm_list; +void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile) +{ + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); + + mutex_lock(&ctx->turnstile); +} + +void exit_turnstile(struct mm_id *mm_id) __releases(turnstile) +{ + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); + + mutex_unlock(&ctx->turnstile); +} + int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_id *new_id = &mm->context.id; unsigned long stack = 0; int ret = -ENOMEM; + mutex_init(&mm->context.turnstile); + spin_lock_init(&mm->context.sync_tlb_lock); + stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES)); if (stack == 0) goto out; new_id->stack = stack; + new_id->syscall_data_len = 0; + new_id->syscall_fd_num = 0; scoped_guard(spinlock_irqsave, &mm_list_lock) { /* Insert into list, used for lookups when the child dies */ @@ -73,6 +92,9 @@ void destroy_context(struct mm_struct *mm) return; } + scoped_guard(spinlock_irqsave, &mm_list_lock) + list_del(&mm->context.list); + if (mmu->id.pid > 0) { os_kill_ptraced_process(mmu->id.pid, 1); mmu->id.pid = -1; @@ -82,10 +104,6 @@ void destroy_context(struct mm_struct *mm) os_close_file(mmu->id.sock); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); - - guard(spinlock_irqsave)(&mm_list_lock); - - list_del(&mm->context.list); } static irqreturn_t mm_sigchld_irq(int irq, void* dev) @@ -110,12 +128,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev) /* Marks the MM as dead */ mm_context->id.pid = -1; - /* - * NOTE: If SMP is implemented, a futex_wake - * needs to be added here. - */ stub_data = (void *)mm_context->id.stack; stub_data->futex = FUTEX_IN_KERN; +#if IS_ENABLED(CONFIG_SMP) + os_futex_wake(&stub_data->futex); +#endif /* * NOTE: Currently executing syscalls by diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 5881b17eb987..4a7673b0261a 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -7,6 +7,7 @@ #include <linux/sched/mm.h> #include <linux/sched/task_stack.h> #include <linux/sched/task.h> +#include <linux/smp-internal.h> #include <asm/tlbflush.h> @@ -26,12 +27,12 @@ static int __init start_kernel_proc(void *unused) return 0; } -static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); +char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); int __init start_uml(void) { - stack_protections((unsigned long) &cpu0_irqstack); - set_sigstack(cpu0_irqstack, THREAD_SIZE); + stack_protections((unsigned long) &cpu_irqstacks[0]); + set_sigstack(cpu_irqstacks[0], THREAD_SIZE); init_new_thread_signals(); @@ -64,3 +65,15 @@ void current_mm_sync(void) um_tlb_sync(current->mm); } + +static DEFINE_SPINLOCK(initial_jmpbuf_spinlock); + +void initial_jmpbuf_lock(void) +{ + spin_lock_irq(&initial_jmpbuf_spinlock); +} + +void initial_jmpbuf_unlock(void) +{ + spin_unlock_irq(&initial_jmpbuf_spinlock); +} diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c new file mode 100644 index 000000000000..f1e52b7348fb --- /dev/null +++ b/arch/um/kernel/smp.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + * + * Based on the previous implementation in TT mode + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/module.h> +#include <linux/processor.h> +#include <linux/threads.h> +#include <linux/cpu.h> +#include <linux/hardirq.h> +#include <linux/smp.h> +#include <linux/smp-internal.h> +#include <init.h> +#include <kern.h> +#include <os.h> +#include <smp.h> + +enum { + UML_IPI_RES = 0, + UML_IPI_CALL_SINGLE, + UML_IPI_CALL, + UML_IPI_STOP, +}; + +void arch_smp_send_reschedule(int cpu) +{ + os_send_ipi(cpu, UML_IPI_RES); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + os_send_ipi(cpu, UML_IPI_CALL_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + for_each_cpu(cpu, mask) + os_send_ipi(cpu, UML_IPI_CALL); +} + +void smp_send_stop(void) +{ + int cpu, me = smp_processor_id(); + + for_each_online_cpu(cpu) { + if (cpu == me) + continue; + os_send_ipi(cpu, UML_IPI_STOP); + } +} + +static void ipi_handler(int vector, struct uml_pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + int cpu = raw_smp_processor_id(); + + irq_enter(); + + if (current->mm) + os_alarm_process(current->mm->context.id.pid); + + switch (vector) { + case UML_IPI_RES: + inc_irq_stat(irq_resched_count); + scheduler_ipi(); + break; + + case UML_IPI_CALL_SINGLE: + inc_irq_stat(irq_call_count); + generic_smp_call_function_single_interrupt(); + break; + + case UML_IPI_CALL: + inc_irq_stat(irq_call_count); + generic_smp_call_function_interrupt(); + break; + + case UML_IPI_STOP: + set_cpu_online(cpu, false); + while (1) + pause(); + break; + + default: + pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector); + break; + } + + irq_exit(); + set_irq_regs(old_regs); +} + +void uml_ipi_handler(int vector) +{ + struct uml_pt_regs r = { .is_user = 0 }; + + preempt_disable(); + ipi_handler(vector, &r); + preempt_enable(); +} + +/* AP states used only during CPU startup */ +enum { + UML_CPU_PAUSED = 0, + UML_CPU_RUNNING, +}; + +static int cpu_states[NR_CPUS]; + +static int start_secondary(void *unused) +{ + int err, cpu = raw_smp_processor_id(); + + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + + err = um_setup_timer(); + if (err) + panic("CPU#%d failed to setup timer, err = %d", cpu, err); + + local_irq_enable(); + + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + return 0; +} + +void uml_start_secondary(void *opaque) +{ + int cpu = raw_smp_processor_id(); + struct mm_struct *mm = &init_mm; + struct task_struct *idle; + + stack_protections((unsigned long) &cpu_irqstacks[cpu]); + set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE); + + set_cpu_present(cpu, true); + os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED); + + smp_rmb(); /* paired with smp_wmb() in __cpu_up() */ + + idle = cpu_tasks[cpu]; + idle->thread_info.cpu = cpu; + + mmgrab(mm); + idle->active_mm = mm; + + idle->thread.request.thread.proc = start_secondary; + idle->thread.request.thread.arg = NULL; + + new_thread(task_stack_page(idle), &idle->thread.switch_buf, + new_thread_handler); + os_start_secondary(opaque, &idle->thread.switch_buf); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int err, cpu, me = smp_processor_id(); + unsigned long deadline; + + os_init_smp(); + + for_each_possible_cpu(cpu) { + if (cpu == me) + continue; + + pr_debug("Booting processor %d...\n", cpu); + err = os_start_cpu_thread(cpu); + if (err) { + pr_crit("CPU#%d failed to start cpu thread, err = %d", + cpu, err); + continue; + } + + deadline = jiffies + msecs_to_jiffies(1000); + spin_until_cond(cpu_present(cpu) || + time_is_before_jiffies(deadline)); + + if (!cpu_present(cpu)) + pr_crit("CPU#%d failed to boot\n", cpu); + } +} + +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + cpu_tasks[cpu] = tidle; + smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */ + cpu_states[cpu] = UML_CPU_RUNNING; + os_futex_wake(&cpu_states[cpu]); + spin_until_cond(cpu_online(cpu)); + + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* Set in uml_ncpus_setup */ +int uml_ncpus = 1; + +void __init prefill_possible_map(void) +{ + int cpu; + + for (cpu = 0; cpu < uml_ncpus; cpu++) + set_cpu_possible(cpu, true); + for (; cpu < NR_CPUS; cpu++) + set_cpu_possible(cpu, false); +} + +static int __init uml_ncpus_setup(char *line, int *add) +{ + *add = 0; + + if (kstrtoint(line, 10, ¨_ncpus)) { + os_warn("%s: Couldn't parse '%s'\n", __func__, line); + return -1; + } + + uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS); + + return 0; +} + +__uml_setup("ncpus=", uml_ncpus_setup, +"ncpus=<# of desired CPUs>\n" +" This tells UML how many virtual processors to start. The maximum\n" +" number of supported virtual processors can be obtained by querying\n" +" the CONFIG_NR_CPUS option using --showconfig.\n\n" +); + +EXPORT_SYMBOL(uml_curr_cpu); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 17da0a870650..b344a36b44eb 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -625,9 +625,10 @@ void time_travel_sleep(void) * controller application. */ unsigned long long next = S64_MAX; + int cpu = raw_smp_processor_id(); if (time_travel_mode == TT_MODE_BASIC) - os_timer_disable(); + os_timer_disable(cpu); time_travel_update_time(next, true); @@ -638,9 +639,9 @@ void time_travel_sleep(void) * This is somewhat wrong - we should get the first * one sooner like the os_timer_one_shot() below... */ - os_timer_set_interval(time_travel_timer_interval); + os_timer_set_interval(cpu, time_travel_timer_interval); } else { - os_timer_one_shot(time_travel_timer_event.time - next); + os_timer_one_shot(cpu, time_travel_timer_event.time - next); } } } @@ -758,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time); #define time_travel_del_event(e) do { } while (0) #endif +static struct clock_event_device timer_clockevent[NR_CPUS]; + void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { unsigned long flags; @@ -780,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { + int cpu = evt - &timer_clockevent[0]; + if (time_travel_mode != TT_MODE_OFF) time_travel_del_event(&time_travel_timer_event); if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - os_timer_disable(); + os_timer_disable(cpu); return 0; } @@ -793,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt) static int itimer_set_periodic(struct clock_event_device *evt) { unsigned long long interval = NSEC_PER_SEC / HZ; + int cpu = evt - &timer_clockevent[0]; if (time_travel_mode != TT_MODE_OFF) { time_travel_del_event(&time_travel_timer_event); @@ -805,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device *evt) if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - os_timer_set_interval(interval); + os_timer_set_interval(cpu, interval); return 0; } @@ -825,7 +831,7 @@ static int itimer_next_event(unsigned long delta, if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - return os_timer_one_shot(delta); + return os_timer_one_shot(raw_smp_processor_id(), delta); return 0; } @@ -835,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt) return itimer_next_event(0, evt); } -static struct clock_event_device timer_clockevent = { +static struct clock_event_device _timer_clockevent = { .name = "posix-timer", .rating = 250, - .cpumask = cpu_possible_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_state_shutdown = itimer_shutdown, @@ -856,6 +861,9 @@ static struct clock_event_device timer_clockevent = { static irqreturn_t um_timer(int irq, void *dev) { + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + /* * Interrupt the (possibly) running userspace process, technically this * should only happen if userspace is currently executing. @@ -867,7 +875,7 @@ static irqreturn_t um_timer(int irq, void *dev) get_current()->mm) os_alarm_process(get_current()->mm->context.id.pid); - (*timer_clockevent.event_handler)(&timer_clockevent); + evt->event_handler(evt); return IRQ_HANDLED; } @@ -904,7 +912,24 @@ static struct clocksource timer_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init um_timer_setup(void) +int um_setup_timer(void) +{ + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + int err; + + err = os_timer_create(); + if (err) + return err; + + memcpy(evt, &_timer_clockevent, sizeof(*evt)); + evt->cpumask = cpumask_of(cpu); + clockevents_register_device(evt); + + return 0; +} + +static void __init um_timer_init(void) { int err; @@ -913,8 +938,8 @@ static void __init um_timer_setup(void) printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); - err = os_timer_create(); - if (err != 0) { + err = um_setup_timer(); + if (err) { printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); return; } @@ -924,7 +949,6 @@ static void __init um_timer_setup(void) printk(KERN_ERR "clocksource_register_hz returned %d\n", err); return; } - clockevents_register_device(&timer_clockevent); } void read_persistent_clock64(struct timespec64 *ts) @@ -945,7 +969,7 @@ void read_persistent_clock64(struct timespec64 *ts) void __init time_init(void) { timer_set_signal_handler(); - late_time_init = um_timer_setup; + late_time_init = um_timer_init; } #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT @@ -961,21 +985,21 @@ static int setup_time_travel(char *str) { if (strcmp(str, "=inf-cpu") == 0) { time_travel_mode = TT_MODE_INFCPU; - timer_clockevent.name = "time-travel-timer-infcpu"; + _timer_clockevent.name = "time-travel-timer-infcpu"; timer_clocksource.name = "time-travel-clock"; return 1; } if (strncmp(str, "=ext:", 5) == 0) { time_travel_mode = TT_MODE_EXTERNAL; - timer_clockevent.name = "time-travel-timer-external"; + _timer_clockevent.name = "time-travel-timer-external"; timer_clocksource.name = "time-travel-clock-external"; return time_travel_connect_external(str + 5); } if (!*str) { time_travel_mode = TT_MODE_BASIC; - timer_clockevent.name = "time-travel-timer"; + _timer_clockevent.name = "time-travel-timer"; timer_clocksource.name = "time-travel-clock"; return 1; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index cf7e0d4407f2..39608cccf2c6 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm) { pgd_t *pgd; struct vm_ops ops; - unsigned long addr = mm->context.sync_tlb_range_from, next; + unsigned long addr, next; int ret = 0; + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); + if (mm->context.sync_tlb_range_to == 0) return 0; @@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm) ops.unmap = unmap; } + addr = mm->context.sync_tlb_range_from; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 5b80a3a89c20..177615820a4c 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && init_mm.context.sync_tlb_range_to) { + if (!is_user && address >= start_vm && address < end_vm) { /* * Kernel has pending updates from set_ptes that were not * flushed yet. Syncing them should fix the pagefault (if not diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index cfbbbf8500c3..e2b24e1ecfa6 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -19,6 +19,7 @@ #include <linux/kmsg_dump.h> #include <linux/suspend.h> #include <linux/random.h> +#include <linux/smp-internal.h> #include <asm/processor.h> #include <asm/cpufeature.h> @@ -71,6 +72,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) { int i = 0; +#if IS_ENABLED(CONFIG_SMP) + i = (uintptr_t) v - 1; + if (!cpu_online(i)) + return 0; +#endif + seq_printf(m, "processor\t: %d\n", i); seq_printf(m, "vendor_id\t: User Mode Linux\n"); seq_printf(m, "model name\t: UML\n"); @@ -87,13 +94,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); - return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL; + if (*pos < nr_cpu_ids) + return (void *)(uintptr_t)(*pos + 1); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) @@ -239,8 +247,6 @@ static struct notifier_block panic_exit_notifier = { void uml_finishsetup(void) { - cpu_tasks[0] = &init_task; - atomic_notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); @@ -254,11 +260,7 @@ unsigned long stub_start; unsigned long task_size; EXPORT_SYMBOL(task_size); -unsigned long host_task_size; - unsigned long brk_start; -unsigned long end_iomem; -EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) @@ -298,16 +300,14 @@ static unsigned long __init get_top_address(char **envp) top_addr = (unsigned long) envp[i]; } - top_addr &= ~(UM_KERN_PAGE_SIZE - 1); - top_addr += UM_KERN_PAGE_SIZE; - - return top_addr; + return PAGE_ALIGN(top_addr + 1); } int __init linux_main(int argc, char **argv, char **envp) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; + unsigned long host_task_size; unsigned long stack; unsigned int i; int add; @@ -354,12 +354,11 @@ int __init linux_main(int argc, char **argv, char **envp) * so they actually get what they asked for. This should * add zero for non-exec shield users */ - - diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + diff = PAGE_ALIGN(brk_start) - PAGE_ALIGN((unsigned long) &_end); if (diff > 1024 * 1024) { os_info("Adding %ld bytes to physical memory to account for " "exec-shield gap\n", diff); - physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + physmem_size += diff; } uml_physmem = (unsigned long) __binary_start & PAGE_MASK; @@ -369,10 +368,8 @@ int __init linux_main(int argc, char **argv, char **envp) setup_machinename(init_utsname()->machine); - physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK; - iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; - - max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; + physmem_size = PAGE_ALIGN(physmem_size); + max_physmem = TASK_SIZE - uml_physmem - MIN_VMALLOC; if (physmem_size > max_physmem) { physmem_size = max_physmem; os_info("Physical memory size shrunk to %llu bytes\n", @@ -380,7 +377,6 @@ int __init linux_main(int argc, char **argv, char **envp) } high_physmem = uml_physmem + physmem_size; - end_iomem = high_physmem + iomem_size; start_vm = VMALLOC_START; @@ -421,6 +417,7 @@ void __init setup_arch(char **cmdline_p) strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(host_info, sizeof host_info); + prefill_possible_map(); if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) { add_bootloader_randomness(rng_seed, sizeof(rng_seed)); @@ -455,6 +452,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } +#if IS_ENABLED(CONFIG_SMP) +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ +} + +void alternatives_smp_module_del(struct module *mod) +{ +} +#endif + void *text_poke(void *addr, const void *opcode, size_t len) { /* |
