summaryrefslogtreecommitdiff
path: root/arch/um/kernel/trap.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/kernel/trap.c')
-rw-r--r--arch/um/kernel/trap.c156
1 files changed, 130 insertions, 26 deletions
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ce073150dc20..5b80a3a89c20 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -16,7 +16,122 @@
#include <kern_util.h>
#include <os.h>
#include <skas.h>
-#include <arch.h>
+
+/*
+ * NOTE: UML does not have exception tables. As such, this is almost a copy
+ * of the code in mm/memory.c, only adjusting the logic to simply check whether
+ * we are coming from the kernel instead of doing an additional lookup in the
+ * exception table.
+ * We can do this simplification because we never get here if the exception was
+ * fixable.
+ */
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ if (likely(mmap_read_trylock(mm)))
+ return true;
+
+ if (!is_user)
+ return false;
+
+ return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ mmap_read_unlock(mm);
+ if (!is_user)
+ return false;
+
+ return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+static struct vm_area_struct *
+um_lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, bool is_user)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack_locked(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
@@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto out;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (is_user && !ARCH_IS_STACKGROW(address))
- goto out;
- vma = expand_stack(mm, address);
+ vma = um_lock_mm_and_find_vma(mm, address, is_user);
if (!vma)
goto out_nosemaphore;
-good_area:
*code_out = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -225,20 +329,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
panic("Failed to sync kernel TLBs: %d", err);
goto out;
}
- else if (current->mm == NULL) {
- if (current->pagefault_disabled) {
- if (!mc) {
- show_regs(container_of(regs, struct pt_regs, regs));
- panic("Segfault with pagefaults disabled but no mcontext");
- }
- if (!current->thread.segv_continue) {
- show_regs(container_of(regs, struct pt_regs, regs));
- panic("Segfault without recovery target");
- }
- mc_set_rip(mc, current->thread.segv_continue);
- current->thread.segv_continue = NULL;
- goto out;
+ else if (current->pagefault_disabled) {
+ if (!mc) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault with pagefaults disabled but no mcontext");
+ }
+ if (!current->thread.segv_continue) {
+ show_regs(container_of(regs, struct pt_regs, regs));
+ panic("Segfault without recovery target");
}
+ mc_set_rip(mc, current->thread.segv_continue);
+ current->thread.segv_continue = NULL;
+ goto out;
+ }
+ else if (current->mm == NULL) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with no mm");
}