diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2020-05-09 08:50:36 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2020-05-09 08:50:36 -0500 |
commit | b213c2dcbcbc138d111f150e13317ea50002cab5 (patch) | |
tree | 8b1c0c2c346e39f9af285f29a46bc5abd4450f6d | |
parent | 6a8b55ed4056ea5559ebe4f6a4b247f627870d4c (diff) | |
parent | 2388777a0a5957a10b3d78677216530a9b3bd09f (diff) |
exec: Promised cleanups after introducing exec_update_mutex
In the patchset that introduced exec_update_mutex there were a few last
minute discoveries and fixes that left the code in a state that can
be very easily be improved.
During the merge window we discussed the first three of these patches
and I promised I would resend them.
What the first patch does is it makes the the calls in the binfmts:
flush_old_exec();
/* set the personality */
setup_new_exec();
install_exec_creds();
With no sleeps or anything in between.
At the conclusion of this set of changes the the calls in the binfmts
are:
begin_new_exec();
/* set the personality */
setup_new_exec();
The intent is to make the code easier to follow and easier to change.
Eric W. Biederman (7):
binfmt: Move install_exec_creds after setup_new_exec to match binfmt_elf
exec: Make unlocking exec_update_mutex explict
exec: Rename the flag called_exec_mmap point_of_no_return
exec: Merge install_exec_creds into setup_new_exec
exec: In setup_new_exec cache current in the local variable me
exec: Move most of setup_new_exec into flush_old_exec
exec: Rename flush_old_exec begin_new_exec
Documentation/trace/ftrace.rst | 2 +-
arch/x86/ia32/ia32_aout.c | 4 +-
fs/binfmt_aout.c | 3 +-
fs/binfmt_elf.c | 3 +-
fs/binfmt_elf_fdpic.c | 3 +-
fs/binfmt_flat.c | 4 +-
fs/exec.c | 162 ++++++++++++++++++++---------------------
include/linux/binfmts.h | 10 +--
kernel/events/core.c | 2 +-
9 files changed, 92 insertions(+), 101 deletions(-)
Link: https://lkml.kernel.org/r/87h7wujhmz.fsf@x220.int.ebiederm.org
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Greg Ungerer <gerg@linux-m68k.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r-- | Documentation/trace/ftrace.rst | 2 | ||||
-rw-r--r-- | arch/x86/ia32/ia32_aout.c | 4 | ||||
-rw-r--r-- | fs/binfmt_aout.c | 3 | ||||
-rw-r--r-- | fs/binfmt_elf.c | 3 | ||||
-rw-r--r-- | fs/binfmt_elf_fdpic.c | 3 | ||||
-rw-r--r-- | fs/binfmt_flat.c | 4 | ||||
-rw-r--r-- | fs/exec.c | 162 | ||||
-rw-r--r-- | include/linux/binfmts.h | 10 | ||||
-rw-r--r-- | kernel/events/core.c | 2 |
9 files changed, 92 insertions, 101 deletions
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 3b5614b1d1a5..430a16283103 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1524,7 +1524,7 @@ display-graph option:: => remove_vma => exit_mmap => mmput - => flush_old_exec + => begin_new_exec => load_elf_binary => search_binary_handler => __do_execve_file.isra.32 diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 9bb71abd66bd..385d3d172ee1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -131,7 +131,7 @@ static int load_aout_binary(struct linux_binprm *bprm) return -ENOMEM; /* Flush all traces of the currently running executable */ - retval = flush_old_exec(bprm); + retval = begin_new_exec(bprm); if (retval) return retval; @@ -156,8 +156,6 @@ static int load_aout_binary(struct linux_binprm *bprm) if (retval < 0) return retval; - install_exec_creds(bprm); - if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 8e8346a81723..3e84e9bb9084 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -151,7 +151,7 @@ static int load_aout_binary(struct linux_binprm * bprm) return -ENOMEM; /* Flush all traces of the currently running executable */ - retval = flush_old_exec(bprm); + retval = begin_new_exec(bprm); if (retval) return retval; @@ -174,7 +174,6 @@ static int load_aout_binary(struct linux_binprm * bprm) if (retval < 0) return retval; - install_exec_creds(bprm); if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 13f25e241ac4..396d5c2e6b5e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -844,7 +844,7 @@ out_free_interp: goto out_free_dentry; /* Flush all traces of the currently running executable */ - retval = flush_old_exec(bprm); + retval = begin_new_exec(bprm); if (retval) goto out_free_dentry; @@ -858,7 +858,6 @@ out_free_interp: current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); - install_exec_creds(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 240f66663543..896e3ca9bf85 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -338,7 +338,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) interp_params.flags |= ELF_FDPIC_FLAG_CONSTDISP; /* flush all traces of the currently running executable */ - retval = flush_old_exec(bprm); + retval = begin_new_exec(bprm); if (retval) goto error; @@ -434,7 +434,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->start_stack = current->mm->start_brk + stack_size; #endif - install_exec_creds(bprm); if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) goto error; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 831a2b25ba79..9b82bc111d0a 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -534,7 +534,7 @@ static int load_flat_file(struct linux_binprm *bprm, /* Flush all traces of the currently running executable */ if (id == 0) { - ret = flush_old_exec(bprm); + ret = begin_new_exec(bprm); if (ret) goto err; @@ -963,8 +963,6 @@ static int load_flat_binary(struct linux_binprm *bprm) } } - install_exec_creds(bprm); - set_binfmt(&flat_format); #ifdef CONFIG_MMU diff --git a/fs/exec.c b/fs/exec.c index 06b4c550af5d..3cc40048cc65 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1298,7 +1298,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) * signal (via de_thread() or coredump), or will have SEGV raised * (after exec_mmap()) by search_binary_handlers (see below). */ -int flush_old_exec(struct linux_binprm * bprm) +int begin_new_exec(struct linux_binprm * bprm) { struct task_struct *me = current; int retval; @@ -1326,12 +1326,12 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; /* - * After setting bprm->called_exec_mmap (to mark that current is - * using the prepared mm now), we have nothing left of the original - * process. If anything from here on returns an error, the check - * in search_binary_handler() will SEGV current. + * With the new mm installed it is completely impossible to + * fail and return to the original process. If anything from + * here on returns an error, the check in + * search_binary_handler() will SEGV current. */ - bprm->called_exec_mmap = 1; + bprm->point_of_no_return = true; bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS @@ -1344,7 +1344,7 @@ int flush_old_exec(struct linux_binprm * bprm) */ retval = unshare_sighand(me); if (retval) - goto out; + goto out_unlock; set_fs(USER_DS); me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | @@ -1359,36 +1359,7 @@ int flush_old_exec(struct linux_binprm * bprm) * undergoing exec(2). */ do_close_on_exec(me->files); - return 0; - -out: - return retval; -} -EXPORT_SYMBOL(flush_old_exec); -void would_dump(struct linux_binprm *bprm, struct file *file) -{ - struct inode *inode = file_inode(file); - if (inode_permission(inode, MAY_READ) < 0) { - struct user_namespace *old, *user_ns; - bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; - - /* Ensure mm->user_ns contains the executable */ - user_ns = old = bprm->mm->user_ns; - while ((user_ns != &init_user_ns) && - !privileged_wrt_inode_uidgid(user_ns, inode)) - user_ns = user_ns->parent; - - if (old != user_ns) { - bprm->mm->user_ns = get_user_ns(user_ns); - put_user_ns(old); - } - } -} -EXPORT_SYMBOL(would_dump); - -void setup_new_exec(struct linux_binprm * bprm) -{ /* * Once here, prepare_binrpm() will not be called any more, so * the final state of setuid/setgid/fscaps can be merged into the @@ -1398,7 +1369,7 @@ void setup_new_exec(struct linux_binprm * bprm) if (bprm->secureexec) { /* Make sure parent cannot signal privileged process. */ - current->pdeath_signal = 0; + me->pdeath_signal = 0; /* * For secureexec, reset the stack limit to sane default to @@ -1411,9 +1382,7 @@ void setup_new_exec(struct linux_binprm * bprm) bprm->rlim_stack.rlim_cur = _STK_LIM; } - arch_pick_mmap_layout(current->mm, &bprm->rlim_stack); - - current->sas_ss_sp = current->sas_ss_size = 0; + me->sas_ss_sp = me->sas_ss_size = 0; /* * Figure out dumpability. Note that this checking only of current @@ -1427,20 +1396,82 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, SUID_DUMP_USER); - arch_setup_new_exec(); perf_event_exec(); - __set_task_comm(current, kbasename(bprm->filename), true); + __set_task_comm(me, kbasename(bprm->filename), true); + + /* An exec changes our domain. We are no longer part of the thread + group */ + WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1); + flush_signal_handlers(me, 0); + + /* + * install the new credentials for this executable + */ + security_bprm_committing_creds(bprm); + + commit_creds(bprm->cred); + bprm->cred = NULL; + + /* + * Disable monitoring for regular users + * when executing setuid binaries. Must + * wait until new credentials are committed + * by commit_creds() above + */ + if (get_dumpable(me->mm) != SUID_DUMP_USER) + perf_event_exit_task(me); + /* + * cred_guard_mutex must be held at least to this point to prevent + * ptrace_attach() from altering our determination of the task's + * credentials; any time after this it may be unlocked. + */ + security_bprm_committed_creds(bprm); + return 0; + +out_unlock: + mutex_unlock(&me->signal->exec_update_mutex); +out: + return retval; +} +EXPORT_SYMBOL(begin_new_exec); + +void would_dump(struct linux_binprm *bprm, struct file *file) +{ + struct inode *inode = file_inode(file); + if (inode_permission(inode, MAY_READ) < 0) { + struct user_namespace *old, *user_ns; + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + + /* Ensure mm->user_ns contains the executable */ + user_ns = old = bprm->mm->user_ns; + while ((user_ns != &init_user_ns) && + !privileged_wrt_inode_uidgid(user_ns, inode)) + user_ns = user_ns->parent; + + if (old != user_ns) { + bprm->mm->user_ns = get_user_ns(user_ns); + put_user_ns(old); + } + } +} +EXPORT_SYMBOL(would_dump); + +void setup_new_exec(struct linux_binprm * bprm) +{ + /* Setup things that can depend upon the personality */ + struct task_struct *me = current; + + arch_pick_mmap_layout(me->mm, &bprm->rlim_stack); + + arch_setup_new_exec(); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc */ - current->mm->task_size = TASK_SIZE; - - /* An exec changes our domain. We are no longer part of the thread - group */ - WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1); - flush_signal_handlers(current, 0); + me->mm->task_size = TASK_SIZE; + mutex_unlock(&me->signal->exec_update_mutex); + mutex_unlock(&me->signal->cred_guard_mutex); } EXPORT_SYMBOL(setup_new_exec); @@ -1456,7 +1487,7 @@ EXPORT_SYMBOL(finalize_exec); /* * Prepare credentials and lock ->cred_guard_mutex. - * install_exec_creds() commits the new creds and drops the lock. + * setup_new_exec() commits the new creds and drops the lock. * Or, if exec fails before, free_bprm() should release ->cred and * and unlock. */ @@ -1477,8 +1508,6 @@ static void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { - if (bprm->called_exec_mmap) - mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } @@ -1505,35 +1534,6 @@ int bprm_change_interp(const char *interp, struct linux_binprm *bprm) EXPORT_SYMBOL(bprm_change_interp); /* - * install the new credentials for this executable - */ -void install_exec_creds(struct linux_binprm *bprm) -{ - security_bprm_committing_creds(bprm); - - commit_creds(bprm->cred); - bprm->cred = NULL; - - /* - * Disable monitoring for regular users - * when executing setuid binaries. Must - * wait until new credentials are committed - * by commit_creds() above - */ - if (get_dumpable(current->mm) != SUID_DUMP_USER) - perf_event_exit_task(current); - /* - * cred_guard_mutex must be held at least to this point to prevent - * ptrace_attach() from altering our determination of the task's - * credentials; any time after this it may be unlocked. - */ - security_bprm_committed_creds(bprm); - mutex_unlock(¤t->signal->exec_update_mutex); - mutex_unlock(¤t->signal->cred_guard_mutex); -} -EXPORT_SYMBOL(install_exec_creds); - -/* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH or seccomp thread-sync @@ -1720,7 +1720,7 @@ int search_binary_handler(struct linux_binprm *bprm) read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval < 0 && bprm->called_exec_mmap) { + if (retval < 0 && bprm->point_of_no_return) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); force_sigsegv(SIGSEGV); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index a345d9fed3d8..1b48e2154766 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -46,11 +46,10 @@ struct linux_binprm { */ secureexec:1, /* - * Set by flush_old_exec, when exec_mmap has been called. - * This is past the point of no return, when the - * exec_update_mutex has been taken. + * Set when errors can no longer be returned to the + * original userspace. */ - called_exec_mmap:1; + point_of_no_return:1; #ifdef __alpha__ unsigned int taso:1; #endif @@ -126,7 +125,7 @@ extern void unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *); -extern int flush_old_exec(struct linux_binprm * bprm); +extern int begin_new_exec(struct linux_binprm * bprm); extern void setup_new_exec(struct linux_binprm * bprm); extern void finalize_exec(struct linux_binprm *bprm); extern void would_dump(struct linux_binprm *, struct file *); @@ -146,7 +145,6 @@ extern int transfer_args_to_stack(struct linux_binprm *bprm, extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); -extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); diff --git a/kernel/events/core.c b/kernel/events/core.c index 633b4ae72ed5..169449b5e56b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12217,7 +12217,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) * When a child task exits, feed back event values to parent events. * * Can be called with exec_update_mutex held when called from - * install_exec_creds(). + * setup_new_exec(). */ void perf_event_exit_task(struct task_struct *child) { |