diff options
Diffstat (limited to 'arch/um/kernel/trap.c')
-rw-r--r-- | arch/um/kernel/trap.c | 156 |
1 files changed, 130 insertions, 26 deletions
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ce073150dc20..5b80a3a89c20 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -16,7 +16,122 @@ #include <kern_util.h> #include <os.h> #include <skas.h> -#include <arch.h> + +/* + * NOTE: UML does not have exception tables. As such, this is almost a copy + * of the code in mm/memory.c, only adjusting the logic to simply check whether + * we are coming from the kernel instead of doing an additional lookup in the + * exception table. + * We can do this simplification because we never get here if the exception was + * fixable. + */ +static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + if (likely(mmap_read_trylock(mm))) + return true; + + if (!is_user) + return false; + + return !mmap_read_lock_killable(mm); +} + +static inline bool mmap_upgrade_trylock(struct mm_struct *mm) +{ + /* + * We don't have this operation yet. + * + * It should be easy enough to do: it's basically a + * atomic_long_try_cmpxchg_acquire() + * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but + * it also needs the proper lockdep magic etc. + */ + return false; +} + +static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + mmap_read_unlock(mm); + if (!is_user) + return false; + + return !mmap_write_lock_killable(mm); +} + +/* + * Helper for page fault handling. + * + * This is kind of equivalend to "mmap_read_lock()" followed + * by "find_extend_vma()", except it's a lot more careful about + * the locking (and will drop the lock on failure). + * + * For example, if we have a kernel bug that causes a page + * fault, we don't want to just use mmap_read_lock() to get + * the mm lock, because that would deadlock if the bug were + * to happen while we're holding the mm lock for writing. + * + * So this checks the exception tables on kernel faults in + * order to only do this all for instructions that are actually + * expected to fault. + * + * We can also actually take the mm lock for writing if we + * need to extend the vma, which helps the VM layer a lot. + */ +static struct vm_area_struct * +um_lock_mm_and_find_vma(struct mm_struct *mm, + unsigned long addr, bool is_user) +{ + struct vm_area_struct *vma; + + if (!get_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (likely(vma && (vma->vm_start <= addr))) + return vma; + + /* + * Well, dang. We might still be successful, but only + * if we can extend a vma to do so. + */ + if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { + mmap_read_unlock(mm); + return NULL; + } + + /* + * We can try to upgrade the mmap lock atomically, + * in which case we can continue to use the vma + * we already looked up. + * + * Otherwise we'll have to drop the mmap lock and + * re-take it, and also look up the vma again, + * re-checking it. + */ + if (!mmap_upgrade_trylock(mm)) { + if (!upgrade_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (!vma) + goto fail; + if (vma->vm_start <= addr) + goto success; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto fail; + } + + if (expand_stack_locked(vma, addr)) + goto fail; + +success: + mmap_write_downgrade(mm); + return vma; + +fail: + mmap_write_unlock(mm); + return NULL; +} /* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by @@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (is_user) flags |= FAULT_FLAG_USER; retry: - mmap_read_lock(mm); - vma = find_vma(mm, address); - if (!vma) - goto out; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out; - if (is_user && !ARCH_IS_STACKGROW(address)) - goto out; - vma = expand_stack(mm, address); + vma = um_lock_mm_and_find_vma(mm, address, is_user); if (!vma) goto out_nosemaphore; -good_area: *code_out = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -225,20 +329,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, panic("Failed to sync kernel TLBs: %d", err); goto out; } - else if (current->mm == NULL) { - if (current->pagefault_disabled) { - if (!mc) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault with pagefaults disabled but no mcontext"); - } - if (!current->thread.segv_continue) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault without recovery target"); - } - mc_set_rip(mc, current->thread.segv_continue); - current->thread.segv_continue = NULL; - goto out; + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); + } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } + else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } |