diff options
Diffstat (limited to 'fs/coredump.c')
-rw-r--r-- | fs/coredump.c | 461 |
1 files changed, 366 insertions, 95 deletions
diff --git a/fs/coredump.c b/fs/coredump.c index c33c177a701b..f217ebf2b3b6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -43,6 +43,14 @@ #include <linux/timekeeping.h> #include <linux/sysctl.h> #include <linux/elf.h> +#include <linux/pidfs.h> +#include <linux/net.h> +#include <linux/socket.h> +#include <net/af_unix.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <uapi/linux/pidfd.h> +#include <uapi/linux/un.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -60,6 +68,12 @@ static void free_vma_snapshot(struct coredump_params *cprm); #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) /* Define a reasonable max cap */ #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) +/* + * File descriptor number for the pidfd for the thread-group leader of + * the coredumping task installed into the usermode helper's file + * descriptor table. + */ +#define COREDUMP_PIDFD_NUMBER 3 static int core_uses_pid; static unsigned int core_pipe_limit; @@ -68,9 +82,16 @@ static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; +enum coredump_type_t { + COREDUMP_FILE = 1, + COREDUMP_PIPE = 2, + COREDUMP_SOCK = 3, +}; + struct core_name { char *corename; int used, size; + enum coredump_type_t core_type; }; static int expand_corename(struct core_name *cn, int size) @@ -210,18 +231,24 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; - int ispipe = (*pat_ptr == '|'); bool was_space = false; int pid_in_pattern = 0; int err = 0; cn->used = 0; cn->corename = NULL; + if (*pat_ptr == '|') + cn->core_type = COREDUMP_PIPE; + else if (*pat_ptr == '@') + cn->core_type = COREDUMP_SOCK; + else + cn->core_type = COREDUMP_FILE; if (expand_corename(cn, core_name_size)) return -ENOMEM; cn->corename[0] = '\0'; - if (ispipe) { + switch (cn->core_type) { + case COREDUMP_PIPE: { int argvs = sizeof(core_pattern) / 2; (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); if (!(*argv)) @@ -230,6 +257,45 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, ++pat_ptr; if (!(*pat_ptr)) return -ENOMEM; + break; + } + case COREDUMP_SOCK: { + /* skip the @ */ + pat_ptr++; + if (!(*pat_ptr)) + return -ENOMEM; + + err = cn_printf(cn, "%s", pat_ptr); + if (err) + return err; + + /* Require absolute paths. */ + if (cn->corename[0] != '/') + return -EINVAL; + + /* + * Ensure we can uses spaces to indicate additional + * parameters in the future. + */ + if (strchr(cn->corename, ' ')) { + coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename); + return -EINVAL; + } + + /* + * Currently no need to parse any other options. + * Relevant information can be retrieved from the peer + * pidfd retrievable via SO_PEERPIDFD by the receiver or + * via /proc/<pid>, using the SO_PEERPIDFD to guard + * against pid recycling when opening /proc/<pid>. + */ + return 0; + } + case COREDUMP_FILE: + break; + default: + WARN_ON_ONCE(true); + return -EINVAL; } /* Repeat as long as we have more pattern to process and more output @@ -239,7 +305,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, * Split on spaces before doing template expansion so that * %e and %E don't get split if they have spaces in them */ - if (ispipe) { + if (cn->core_type == COREDUMP_PIPE) { if (isspace(*pat_ptr)) { if (cn->used != 0) was_space = true; @@ -339,6 +405,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, case 'C': err = cn_printf(cn, "%d", cprm->cpu); break; + /* pidfd number */ + case 'F': { + /* + * Installing a pidfd only makes sense if + * we actually spawn a usermode helper. + */ + if (cn->core_type != COREDUMP_PIPE) + break; + + /* + * Note that we'll install a pidfd for the + * thread-group leader. We know that task + * linkage hasn't been removed yet and even if + * this @current isn't the actual thread-group + * leader we know that the thread-group leader + * cannot be reaped until @current has exited. + */ + cprm->pid = task_tgid(current); + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); + break; + } default: break; } @@ -355,12 +442,10 @@ out: * If core_pattern does not include a %p (as is the default) * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ - if (!ispipe && !pid_in_pattern && core_uses_pid) { - err = cn_printf(cn, ".%d", task_tgid_vnr(current)); - if (err) - return err; - } - return ispipe; + if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid) + return cn_printf(cn, ".%d", task_tgid_vnr(current)); + + return 0; } static int zap_process(struct signal_struct *signal, int exit_code) @@ -493,7 +578,7 @@ static void wait_for_dump_helpers(struct file *file) } /* - * umh_pipe_setup + * umh_coredump_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) @@ -503,11 +588,34 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; - int err = create_pipe_files(files, 0); + int err; + + if (cp->pid) { + struct file *pidfs_file __free(fput) = NULL; + + pidfs_file = pidfs_alloc_file(cp->pid, 0); + if (IS_ERR(pidfs_file)) + return PTR_ERR(pidfs_file); + + pidfs_coredump(cp); + + /* + * Usermode helpers are childen of either + * system_unbound_wq or of kthreadd. So we know that + * we're starting off with a clean file descriptor + * table. So we should always be able to use + * COREDUMP_PIDFD_NUMBER as our file descriptor value. + */ + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); + if (err < 0) + return err; + } + + err = create_pipe_files(files, 0); if (err) return err; @@ -515,10 +623,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) err = replace_fd(0, files[0], 0); fput(files[0]); + if (err < 0) + return err; + /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - return err; + return 0; } void do_coredump(const kernel_siginfo_t *siginfo) @@ -530,7 +641,6 @@ void do_coredump(const kernel_siginfo_t *siginfo) const struct cred *old_cred; struct cred *cred; int retval = 0; - int ispipe; size_t *argv = NULL; int argc = 0; /* require nonrelative corefile path and be extra careful */ @@ -579,70 +689,14 @@ void do_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - ispipe = format_corename(&cn, &cprm, &argv, &argc); - - if (ispipe) { - int argi; - int dump_count; - char **helper_argv; - struct subprocess_info *sub_info; - - if (ispipe < 0) { - coredump_report_failure("format_corename failed, aborting core"); - goto fail_unlock; - } - - if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. - * - * Normally core limits are irrelevant to pipes, since - * we're not writing to the file system, but we use - * cprm.limit of 1 here as a special value, this is a - * consistent way to catch recursive crashes. - * We can still crash if the core_pattern binary sets - * RLIM_CORE = !1, but it runs as root, and can do - * lots of stupid things. - * - * Note that we use task_tgid_vnr here to grab the pid - * of the process group leader. That way we get the - * right pid if a thread in a multi-threaded - * core_pattern process dies. - */ - coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); - goto fail_unlock; - } - cprm.limit = RLIM_INFINITY; - - dump_count = atomic_inc_return(&core_dump_count); - if (core_pipe_limit && (core_pipe_limit < dump_count)) { - coredump_report_failure("over core_pipe_limit, skipping core dump"); - goto fail_dropcount; - } - - helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), - GFP_KERNEL); - if (!helper_argv) { - coredump_report_failure("%s failed to allocate memory", __func__); - goto fail_dropcount; - } - for (argi = 0; argi < argc; argi++) - helper_argv[argi] = cn.corename + argv[argi]; - helper_argv[argi] = NULL; - - retval = -ENOMEM; - sub_info = call_usermodehelper_setup(helper_argv[0], - helper_argv, NULL, GFP_KERNEL, - umh_pipe_setup, NULL, &cprm); - if (sub_info) - retval = call_usermodehelper_exec(sub_info, - UMH_WAIT_EXEC); + retval = format_corename(&cn, &cprm, &argv, &argc); + if (retval < 0) { + coredump_report_failure("format_corename failed, aborting core"); + goto fail_unlock; + } - kfree(helper_argv); - if (retval) { - coredump_report_failure("|%s pipe failed", cn.corename); - goto close_fail; - } - } else { + switch (cn.core_type) { + case COREDUMP_FILE: { struct mnt_idmap *idmap; struct inode *inode; int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | @@ -736,6 +790,143 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (do_truncate(idmap, cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; + break; + } + case COREDUMP_PIPE: { + int argi; + int dump_count; + char **helper_argv; + struct subprocess_info *sub_info; + + if (cprm.limit == 1) { + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. + * + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * cprm.limit of 1 here as a special value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); + goto fail_unlock; + } + cprm.limit = RLIM_INFINITY; + + dump_count = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < dump_count)) { + coredump_report_failure("over core_pipe_limit, skipping core dump"); + goto fail_dropcount; + } + + helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), + GFP_KERNEL); + if (!helper_argv) { + coredump_report_failure("%s failed to allocate memory", __func__); + goto fail_dropcount; + } + for (argi = 0; argi < argc; argi++) + helper_argv[argi] = cn.corename + argv[argi]; + helper_argv[argi] = NULL; + + retval = -ENOMEM; + sub_info = call_usermodehelper_setup(helper_argv[0], + helper_argv, NULL, GFP_KERNEL, + umh_coredump_setup, NULL, &cprm); + if (sub_info) + retval = call_usermodehelper_exec(sub_info, + UMH_WAIT_EXEC); + + kfree(helper_argv); + if (retval) { + coredump_report_failure("|%s pipe failed", cn.corename); + goto close_fail; + } + break; + } + case COREDUMP_SOCK: { +#ifdef CONFIG_UNIX + struct file *file __free(fput) = NULL; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + ssize_t addr_len; + struct socket *socket; + + addr_len = strscpy(addr.sun_path, cn.corename); + if (addr_len < 0) + goto close_fail; + addr_len += offsetof(struct sockaddr_un, sun_path) + 1; + + /* + * It is possible that the userspace process which is + * supposed to handle the coredump and is listening on + * the AF_UNIX socket coredumps. Userspace should just + * mark itself non dumpable. + */ + + retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket); + if (retval < 0) + goto close_fail; + + file = sock_alloc_file(socket, 0, NULL); + if (IS_ERR(file)) + goto close_fail; + + /* + * Set the thread-group leader pid which is used for the + * peer credentials during connect() below. Then + * immediately register it in pidfs... + */ + cprm.pid = task_tgid(current); + retval = pidfs_register_pid(cprm.pid); + if (retval) + goto close_fail; + + /* + * ... and set the coredump information so userspace + * has it available after connect()... + */ + pidfs_coredump(&cprm); + + retval = kernel_connect(socket, (struct sockaddr *)(&addr), + addr_len, O_NONBLOCK | SOCK_COREDUMP); + + /* + * ... Make sure to only put our reference after connect() took + * its own reference keeping the pidfs entry alive ... + */ + pidfs_put_pid(cprm.pid); + + if (retval) { + if (retval == -EAGAIN) + coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path); + else + coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval); + goto close_fail; + } + + /* ... and validate that @sk_peer_pid matches @cprm.pid. */ + if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid)) + goto close_fail; + + cprm.limit = RLIM_INFINITY; + cprm.file = no_free_ptr(file); +#else + coredump_report_failure("Core dump socket support %s disabled", cn.corename); + goto close_fail; +#endif + break; + } + default: + WARN_ON_ONCE(true); + goto close_fail; } /* get us an unshared descriptor table; almost always a no-op */ @@ -770,13 +961,49 @@ void do_coredump(const kernel_siginfo_t *siginfo) file_end_write(cprm.file); free_vma_snapshot(&cprm); } - if (ispipe && core_pipe_limit) - wait_for_dump_helpers(cprm.file); + +#ifdef CONFIG_UNIX + /* Let userspace know we're done processing the coredump. */ + if (sock_from_file(cprm.file)) + kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR); +#endif + + /* + * When core_pipe_limit is set we wait for the coredump server + * or usermodehelper to finish before exiting so it can e.g., + * inspect /proc/<pid>. + */ + if (core_pipe_limit) { + switch (cn.core_type) { + case COREDUMP_PIPE: + wait_for_dump_helpers(cprm.file); + break; +#ifdef CONFIG_UNIX + case COREDUMP_SOCK: { + ssize_t n; + + /* + * We use a simple read to wait for the coredump + * processing to finish. Either the socket is + * closed or we get sent unexpected data. In + * both cases, we're done. + */ + n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL); + if (n != 0) + coredump_report_failure("Unexpected data on coredump socket"); + break; + } +#endif + default: + break; + } + } + close_fail: if (cprm.file) filp_close(cprm.file, NULL); fail_dropcount: - if (ispipe) + if (cn.core_type == COREDUMP_PIPE) atomic_dec(&core_dump_count); fail_unlock: kfree(argv); @@ -799,10 +1026,9 @@ static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr) struct file *file = cprm->file; loff_t pos = file->f_pos; ssize_t n; + if (cprm->written + nr > cprm->limit) return 0; - - if (dump_interrupted()) return 0; n = __kernel_write(file, addr, nr, &pos); @@ -819,20 +1045,21 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr) { static char zeroes[PAGE_SIZE]; struct file *file = cprm->file; + if (file->f_mode & FMODE_LSEEK) { - if (dump_interrupted() || - vfs_llseek(file, nr, SEEK_CUR) < 0) + if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0) return 0; cprm->pos += nr; return 1; - } else { - while (nr > PAGE_SIZE) { - if (!__dump_emit(cprm, zeroes, PAGE_SIZE)) - return 0; - nr -= PAGE_SIZE; - } - return __dump_emit(cprm, zeroes, nr); } + + while (nr > PAGE_SIZE) { + if (!__dump_emit(cprm, zeroes, PAGE_SIZE)) + return 0; + nr -= PAGE_SIZE; + } + + return __dump_emit(cprm, zeroes, nr); } int dump_emit(struct coredump_params *cprm, const void *addr, int nr) @@ -1001,7 +1228,7 @@ EXPORT_SYMBOL(dump_align); void validate_coredump_safety(void) { if (suid_dumpable == SUID_DUMP_ROOT && - core_pattern[0] != '/' && core_pattern[0] != '|') { + core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') { coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: " "pipe handler or fully qualified core dump path required. " @@ -1009,18 +1236,55 @@ void validate_coredump_safety(void) } } +static inline bool check_coredump_socket(void) +{ + if (core_pattern[0] != '@') + return true; + + /* + * Coredump socket must be located in the initial mount + * namespace. Don't give the impression that anything else is + * supported right now. + */ + if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) + return false; + + /* Must be an absolute path. */ + if (*(core_pattern + 1) != '/') + return false; + + return true; +} + static int proc_dostring_coredump(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - int error = proc_dostring(table, write, buffer, lenp, ppos); + int error; + ssize_t retval; + char old_core_pattern[CORENAME_MAX_SIZE]; + + retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); + + error = proc_dostring(table, write, buffer, lenp, ppos); + if (error) + return error; + if (!check_coredump_socket()) { + strscpy(core_pattern, old_core_pattern, retval + 1); + return -EINVAL; + } - if (!error) - validate_coredump_safety(); + validate_coredump_safety(); return error; } static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT; static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX; +static char core_modes[] = { + "file\npipe" +#ifdef CONFIG_UNIX + "\nsocket" +#endif +}; static const struct ctl_table coredump_sysctls[] = { { @@ -1064,6 +1328,13 @@ static const struct ctl_table coredump_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "core_modes", + .data = core_modes, + .maxlen = sizeof(core_modes) - 1, + .mode = 0444, + .proc_handler = proc_dostring, + }, }; static int __init init_fs_coredump_sysctls(void) |