diff options
Diffstat (limited to 'arch/um/kernel')
-rw-r--r-- | arch/um/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/um/kernel/ioport.c | 13 | ||||
-rw-r--r-- | arch/um/kernel/irq.c | 6 | ||||
-rw-r--r-- | arch/um/kernel/skas/mmu.c | 89 | ||||
-rw-r--r-- | arch/um/kernel/skas/stub.c | 130 | ||||
-rw-r--r-- | arch/um/kernel/skas/stub_exe.c | 159 | ||||
-rw-r--r-- | arch/um/kernel/time.c | 13 | ||||
-rw-r--r-- | arch/um/kernel/trap.c | 156 | ||||
-rw-r--r-- | arch/um/kernel/um_arch.c | 2 |
9 files changed, 500 insertions, 69 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 4df1cd0d2017..4669db2aa9be 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -25,7 +25,6 @@ obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_OF) += dtb.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o USER_OBJS := config.o diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c deleted file mode 100644 index 7220615b3beb..000000000000 --- a/arch/um/kernel/ioport.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2021 Intel Corporation - * Author: Johannes Berg <johannes@sipsolutions.net> - */ -#include <asm/iomap.h> -#include <asm-generic/pci_iomap.h> - -void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, - unsigned int nr) -{ - return NULL; -} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index abe8f30a521c..0dfaf96bb7da 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -690,3 +690,9 @@ void __init init_IRQ(void) /* Initialize EPOLL Loop */ os_setup_epoll(); } + +void sigchld_handler(int sig, struct siginfo *unused_si, + struct uml_pt_regs *regs, void *mc) +{ + do_IRQ(SIGCHLD_IRQ, regs); +} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 0eb5a1d3ba70..849fafa4b54f 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -8,6 +8,7 @@ #include <linux/sched/signal.h> #include <linux/slab.h> +#include <shared/irq_kern.h> #include <asm/pgalloc.h> #include <asm/sections.h> #include <asm/mmu_context.h> @@ -19,6 +20,9 @@ /* Ensure the stub_data struct covers the allocated area */ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); +spinlock_t mm_list_lock; +struct list_head mm_list; + int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_id *new_id = &mm->context.id; @@ -31,14 +35,14 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) new_id->stack = stack; - block_signals_trace(); - new_id->pid = start_userspace(stack); - unblock_signals_trace(); + scoped_guard(spinlock_irqsave, &mm_list_lock) { + /* Insert into list, used for lookups when the child dies */ + list_add(&mm->context.list, &mm_list); + } - if (new_id->pid < 0) { - ret = new_id->pid; + ret = start_userspace(new_id); + if (ret < 0) goto out_free; - } /* Ensure the new MM is clean and nothing unwanted is mapped */ unmap(new_id, 0, STUB_START); @@ -60,13 +64,82 @@ void destroy_context(struct mm_struct *mm) * zero, resulting in a kill(0), which will result in the * whole UML suddenly dying. Also, cover negative and * 1 cases, since they shouldn't happen either. + * + * Negative cases happen if the child died unexpectedly. */ - if (mmu->id.pid < 2) { + if (mmu->id.pid >= 0 && mmu->id.pid < 2) { printk(KERN_ERR "corrupt mm_context - pid = %d\n", mmu->id.pid); return; } - os_kill_ptraced_process(mmu->id.pid, 1); + + if (mmu->id.pid > 0) { + os_kill_ptraced_process(mmu->id.pid, 1); + mmu->id.pid = -1; + } + + if (using_seccomp && mmu->id.sock) + os_close_file(mmu->id.sock); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); + + guard(spinlock_irqsave)(&mm_list_lock); + + list_del(&mm->context.list); +} + +static irqreturn_t mm_sigchld_irq(int irq, void* dev) +{ + struct mm_context *mm_context; + pid_t pid; + + guard(spinlock)(&mm_list_lock); + + while ((pid = os_reap_child()) > 0) { + /* + * A child died, check if we have an MM with the PID. This is + * only relevant in SECCOMP mode (as ptrace will fail anyway). + * + * See wait_stub_done_seccomp for more details. + */ + list_for_each_entry(mm_context, &mm_list, list) { + if (mm_context->id.pid == pid) { + struct stub_data *stub_data; + printk("Unexpectedly lost MM child! Affected tasks will segfault."); + + /* Marks the MM as dead */ + mm_context->id.pid = -1; + + /* + * NOTE: If SMP is implemented, a futex_wake + * needs to be added here. + */ + stub_data = (void *)mm_context->id.stack; + stub_data->futex = FUTEX_IN_KERN; + + /* + * NOTE: Currently executing syscalls by + * affected tasks may finish normally. + */ + break; + } + } + } + + return IRQ_HANDLED; +} + +static int __init init_child_tracking(void) +{ + int err; + + spin_lock_init(&mm_list_lock); + INIT_LIST_HEAD(&mm_list); + + err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL); + if (err < 0) + panic("Failed to register SIGCHLD IRQ: %d", err); + + return 0; } +early_initcall(init_child_tracking) diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c index 796fc266d3bb..67cab46a602c 100644 --- a/arch/um/kernel/skas/stub.c +++ b/arch/um/kernel/skas/stub.c @@ -5,21 +5,54 @@ #include <sysdep/stub.h> -static __always_inline int syscall_handler(struct stub_data *d) +#include <linux/futex.h> +#include <sys/socket.h> +#include <errno.h> + +/* + * Known security issues + * + * Userspace can jump to this address to execute *any* syscall that is + * permitted by the stub. As we will return afterwards, it can do + * whatever it likes, including: + * - Tricking the kernel into handing out the memory FD + * - Using this memory FD to read/write all physical memory + * - Running in parallel to the kernel processing a syscall + * (possibly creating data races?) + * - Blocking e.g. SIGALRM to avoid time based scheduling + * + * To avoid this, the permitted location for each syscall needs to be + * checked for in the SECCOMP filter (which is reasonably simple). Also, + * more care will need to go into considerations how the code might be + * tricked by using a prepared stack (or even modifying the stack from + * another thread in case SMP support is added). + * + * As for the SIGALRM, the best counter measure will be to check in the + * kernel that the process is reporting back the SIGALRM in a timely + * fashion. + */ +static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS]) { + struct stub_data *d = get_stub_data(); int i; unsigned long res; + int fd; for (i = 0; i < d->syscall_data_len; i++) { struct stub_syscall *sc = &d->syscall_data[i]; switch (sc->syscall) { case STUB_SYSCALL_MMAP: + if (fd_map) + fd = fd_map[sc->mem.fd]; + else + fd = sc->mem.fd; + res = stub_syscall6(STUB_MMAP_NR, sc->mem.addr, sc->mem.length, sc->mem.prot, MAP_SHARED | MAP_FIXED, - sc->mem.fd, sc->mem.offset); + fd, sc->mem.offset); if (res != sc->mem.addr) { d->err = res; d->syscall_data_len = i; @@ -51,9 +84,98 @@ static __always_inline int syscall_handler(struct stub_data *d) void __section(".__syscall_stub") stub_syscall_handler(void) { + syscall_handler(NULL); + + trap_myself(); +} + +void __section(".__syscall_stub") +stub_signal_interrupt(int sig, siginfo_t *info, void *p) +{ struct stub_data *d = get_stub_data(); + char rcv_data; + union { + char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)]; + struct cmsghdr align; + } ctrl = {}; + struct iovec iov = { + .iov_base = &rcv_data, + .iov_len = 1, + }; + struct msghdr msghdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &ctrl, + .msg_controllen = sizeof(ctrl), + }; + ucontext_t *uc = p; + struct cmsghdr *fd_msg; + int *fd_map; + int num_fds; + long res; - syscall_handler(d); + d->signal = sig; + d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0]; + d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0]; - trap_myself(); +restart_wait: + d->futex = FUTEX_IN_KERN; + do { + res = stub_syscall3(__NR_futex, (unsigned long)&d->futex, + FUTEX_WAKE, 1); + } while (res == -EINTR); + + do { + res = stub_syscall4(__NR_futex, (unsigned long)&d->futex, + FUTEX_WAIT, FUTEX_IN_KERN, 0); + } while (res == -EINTR || d->futex == FUTEX_IN_KERN); + + if (res < 0 && res != -EAGAIN) + stub_syscall1(__NR_exit_group, 1); + + if (d->syscall_data_len) { + /* Read passed FDs (if any) */ + do { + res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0); + } while (res == -EINTR); + + /* We should never have a receive error (other than -EAGAIN) */ + if (res < 0 && res != -EAGAIN) + stub_syscall1(__NR_exit_group, 1); + + /* Receive the FDs */ + num_fds = 0; + fd_msg = msghdr.msg_control; + fd_map = (void *)&CMSG_DATA(fd_msg); + if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr)) + num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + + /* Try running queued syscalls. */ + res = syscall_handler(fd_map); + + while (num_fds) + stub_syscall2(__NR_close, fd_map[--num_fds], 0); + } else { + res = 0; + } + + if (res < 0 || d->restart_wait) { + /* Report SIGSYS if we restart. */ + d->signal = SIGSYS; + d->restart_wait = 0; + + goto restart_wait; + } + + /* Restore arch dependent state that is not part of the mcontext */ + stub_seccomp_restore_state(&d->arch_data); + + /* Return so that the host modified mcontext is restored. */ +} + +void __section(".__syscall_stub") +stub_signal_restorer(void) +{ + /* We must not have anything on the stack when doing rt_sigreturn */ + stub_syscall0(__NR_rt_sigreturn); } diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c index 23c99b285e82..cbafaa684e66 100644 --- a/arch/um/kernel/skas/stub_exe.c +++ b/arch/um/kernel/skas/stub_exe.c @@ -1,8 +1,12 @@ #include <sys/ptrace.h> #include <sys/prctl.h> +#include <sys/fcntl.h> #include <asm/unistd.h> #include <sysdep/stub.h> #include <stub-data.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <generated/asm-offsets.h> void _start(void); @@ -25,8 +29,6 @@ noinline static void real_init(void) } sa = { /* Need to set SA_RESTORER (but the handler never returns) */ .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, - /* no need to mask any signals */ - .sa_mask = 0, }; /* set a nice name */ @@ -35,13 +37,20 @@ noinline static void real_init(void) /* Make sure this process dies if the kernel dies */ stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); + /* Needed in SECCOMP mode (and safe to do anyway) */ + stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + /* read information from STDIN and close it */ res = stub_syscall3(__NR_read, 0, (unsigned long)&init_data, sizeof(init_data)); if (res != sizeof(init_data)) stub_syscall1(__NR_exit, 10); - stub_syscall1(__NR_close, 0); + /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */ + if (!init_data.seccomp) + stub_syscall1(__NR_close, 0); + else + stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK); /* map stub code + data */ res = stub_syscall6(STUB_MMAP_NR, @@ -59,22 +68,148 @@ noinline static void real_init(void) if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) stub_syscall1(__NR_exit, 12); + /* In SECCOMP mode, we only need the signalling FD from now on */ + if (init_data.seccomp) { + res = stub_syscall3(__NR_close_range, 1, ~0U, 0); + if (res != 0) + stub_syscall1(__NR_exit, 13); + } + /* setup signal stack inside stub data */ stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); - /* register SIGSEGV handler */ - sa.sa_handler_ = (void *) init_data.segv_handler; - res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, - sizeof(sa.sa_mask)); - if (res != 0) - stub_syscall1(__NR_exit, 13); + /* register signal handlers */ + sa.sa_handler_ = (void *) init_data.signal_handler; + sa.sa_restorer = (void *) init_data.signal_restorer; + if (!init_data.seccomp) { + /* In ptrace mode, the SIGSEGV handler never returns */ + sa.sa_mask = 0; + + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 14); + } else { + /* SECCOMP mode uses rt_sigreturn, need to mask all signals */ + sa.sa_mask = ~0ULL; + + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 15); + + res = stub_syscall4(__NR_rt_sigaction, SIGSYS, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 16); + + res = stub_syscall4(__NR_rt_sigaction, SIGALRM, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 17); + + res = stub_syscall4(__NR_rt_sigaction, SIGTRAP, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 18); + + res = stub_syscall4(__NR_rt_sigaction, SIGILL, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 19); + + res = stub_syscall4(__NR_rt_sigaction, SIGFPE, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 20); + } + + /* + * If in seccomp mode, install the SECCOMP filter and trigger a syscall. + * Otherwise set PTRACE_TRACEME and do a SIGSTOP. + */ + if (init_data.seccomp) { + struct sock_filter filter[] = { +#if __BITS_PER_LONG > 32 + /* [0] Load upper 32bit of instruction pointer from seccomp_data */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + (offsetof(struct seccomp_data, instruction_pointer) + 4)), + + /* [1] Jump forward 3 instructions if the upper address is not identical */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3), +#endif + /* [2] Load lower 32bit of instruction pointer from seccomp_data */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + (offsetof(struct seccomp_data, instruction_pointer))), + + /* [3] Mask out lower bits */ + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000), + + /* [4] Jump to [6] if the lower bits are not on the expected page */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0), + + /* [5] Trap call, allow */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), + + /* [6,7] Check architecture */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, arch)), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, + UM_SECCOMP_ARCH_NATIVE, 1, 0), + + /* [8] Kill (for architecture check) */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), + + /* [9] Load syscall number */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, nr)), + + /* [10-16] Check against permitted syscalls */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, + 7, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg, + 6, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close, + 5, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR, + 4, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, + 3, 0), +#ifdef __i386__ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area, + 2, 0), +#else + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl, + 2, 0), +#endif + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, + 1, 0), + + /* [17] Not one of the permitted syscalls */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), + + /* [18] Permitted call for the stub */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = sizeof(filter) / sizeof(filter[0]), + .filter = filter, + }; + + if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, + (unsigned long)&prog) != 0) + stub_syscall1(__NR_exit, 21); - stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + /* Fall through, the exit syscall will cause SIGSYS */ + } else { + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + } - stub_syscall1(__NR_exit, 14); + stub_syscall1(__NR_exit, 30); __builtin_unreachable(); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 1394568c0210..ae0fa2173778 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -856,11 +856,16 @@ static struct clock_event_device timer_clockevent = { static irqreturn_t um_timer(int irq, void *dev) { - if (get_current()->mm != NULL) - { - /* userspace - relay signal, results in correct userspace timers */ + /* + * Interrupt the (possibly) running userspace process, technically this + * should only happen if userspace is currently executing. + * With infinite CPU time-travel, we can only get here when userspace + * is not executing. Do not notify there and avoid spurious scheduling. + */ + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL && + get_current()->mm) os_alarm_process(get_current()->mm->context.id.pid); - } (*timer_clockevent.event_handler)(&timer_clockevent); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ce073150dc20..5b80a3a89c20 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -16,7 +16,122 @@ #include <kern_util.h> #include <os.h> #include <skas.h> -#include <arch.h> + +/* + * NOTE: UML does not have exception tables. As such, this is almost a copy + * of the code in mm/memory.c, only adjusting the logic to simply check whether + * we are coming from the kernel instead of doing an additional lookup in the + * exception table. + * We can do this simplification because we never get here if the exception was + * fixable. + */ +static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + if (likely(mmap_read_trylock(mm))) + return true; + + if (!is_user) + return false; + + return !mmap_read_lock_killable(mm); +} + +static inline bool mmap_upgrade_trylock(struct mm_struct *mm) +{ + /* + * We don't have this operation yet. + * + * It should be easy enough to do: it's basically a + * atomic_long_try_cmpxchg_acquire() + * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but + * it also needs the proper lockdep magic etc. + */ + return false; +} + +static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + mmap_read_unlock(mm); + if (!is_user) + return false; + + return !mmap_write_lock_killable(mm); +} + +/* + * Helper for page fault handling. + * + * This is kind of equivalend to "mmap_read_lock()" followed + * by "find_extend_vma()", except it's a lot more careful about + * the locking (and will drop the lock on failure). + * + * For example, if we have a kernel bug that causes a page + * fault, we don't want to just use mmap_read_lock() to get + * the mm lock, because that would deadlock if the bug were + * to happen while we're holding the mm lock for writing. + * + * So this checks the exception tables on kernel faults in + * order to only do this all for instructions that are actually + * expected to fault. + * + * We can also actually take the mm lock for writing if we + * need to extend the vma, which helps the VM layer a lot. + */ +static struct vm_area_struct * +um_lock_mm_and_find_vma(struct mm_struct *mm, + unsigned long addr, bool is_user) +{ + struct vm_area_struct *vma; + + if (!get_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (likely(vma && (vma->vm_start <= addr))) + return vma; + + /* + * Well, dang. We might still be successful, but only + * if we can extend a vma to do so. + */ + if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { + mmap_read_unlock(mm); + return NULL; + } + + /* + * We can try to upgrade the mmap lock atomically, + * in which case we can continue to use the vma + * we already looked up. + * + * Otherwise we'll have to drop the mmap lock and + * re-take it, and also look up the vma again, + * re-checking it. + */ + if (!mmap_upgrade_trylock(mm)) { + if (!upgrade_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (!vma) + goto fail; + if (vma->vm_start <= addr) + goto success; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto fail; + } + + if (expand_stack_locked(vma, addr)) + goto fail; + +success: + mmap_write_downgrade(mm); + return vma; + +fail: + mmap_write_unlock(mm); + return NULL; +} /* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by @@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (is_user) flags |= FAULT_FLAG_USER; retry: - mmap_read_lock(mm); - vma = find_vma(mm, address); - if (!vma) - goto out; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out; - if (is_user && !ARCH_IS_STACKGROW(address)) - goto out; - vma = expand_stack(mm, address); + vma = um_lock_mm_and_find_vma(mm, address, is_user); if (!vma) goto out_nosemaphore; -good_area: *code_out = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -225,20 +329,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, panic("Failed to sync kernel TLBs: %d", err); goto out; } - else if (current->mm == NULL) { - if (current->pagefault_disabled) { - if (!mc) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault with pagefaults disabled but no mcontext"); - } - if (!current->thread.segv_continue) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault without recovery target"); - } - mc_set_rip(mc, current->thread.segv_continue); - current->thread.segv_continue = NULL; - goto out; + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); + } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } + else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index d4b3b6742ec8..2f5ee045bc7a 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -477,7 +477,7 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len) return text_poke(addr, opcode, len); } -void text_poke_sync(void) +void smp_text_poke_sync_each_cpu(void) { } |