summaryrefslogtreecommitdiff
path: root/fs/coredump.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/coredump.c')
-rw-r--r--fs/coredump.c461
1 files changed, 366 insertions, 95 deletions
diff --git a/fs/coredump.c b/fs/coredump.c
index c33c177a701b..f217ebf2b3b6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -43,6 +43,14 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/pidfs.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <net/af_unix.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <uapi/linux/pidfd.h>
+#include <uapi/linux/un.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -60,6 +68,12 @@ static void free_vma_snapshot(struct coredump_params *cprm);
#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
/* Define a reasonable max cap */
#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
+/*
+ * File descriptor number for the pidfd for the thread-group leader of
+ * the coredumping task installed into the usermode helper's file
+ * descriptor table.
+ */
+#define COREDUMP_PIDFD_NUMBER 3
static int core_uses_pid;
static unsigned int core_pipe_limit;
@@ -68,9 +82,16 @@ static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
+enum coredump_type_t {
+ COREDUMP_FILE = 1,
+ COREDUMP_PIPE = 2,
+ COREDUMP_SOCK = 3,
+};
+
struct core_name {
char *corename;
int used, size;
+ enum coredump_type_t core_type;
};
static int expand_corename(struct core_name *cn, int size)
@@ -210,18 +231,24 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
{
const struct cred *cred = current_cred();
const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
bool was_space = false;
int pid_in_pattern = 0;
int err = 0;
cn->used = 0;
cn->corename = NULL;
+ if (*pat_ptr == '|')
+ cn->core_type = COREDUMP_PIPE;
+ else if (*pat_ptr == '@')
+ cn->core_type = COREDUMP_SOCK;
+ else
+ cn->core_type = COREDUMP_FILE;
if (expand_corename(cn, core_name_size))
return -ENOMEM;
cn->corename[0] = '\0';
- if (ispipe) {
+ switch (cn->core_type) {
+ case COREDUMP_PIPE: {
int argvs = sizeof(core_pattern) / 2;
(*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
if (!(*argv))
@@ -230,6 +257,45 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
++pat_ptr;
if (!(*pat_ptr))
return -ENOMEM;
+ break;
+ }
+ case COREDUMP_SOCK: {
+ /* skip the @ */
+ pat_ptr++;
+ if (!(*pat_ptr))
+ return -ENOMEM;
+
+ err = cn_printf(cn, "%s", pat_ptr);
+ if (err)
+ return err;
+
+ /* Require absolute paths. */
+ if (cn->corename[0] != '/')
+ return -EINVAL;
+
+ /*
+ * Ensure we can uses spaces to indicate additional
+ * parameters in the future.
+ */
+ if (strchr(cn->corename, ' ')) {
+ coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename);
+ return -EINVAL;
+ }
+
+ /*
+ * Currently no need to parse any other options.
+ * Relevant information can be retrieved from the peer
+ * pidfd retrievable via SO_PEERPIDFD by the receiver or
+ * via /proc/<pid>, using the SO_PEERPIDFD to guard
+ * against pid recycling when opening /proc/<pid>.
+ */
+ return 0;
+ }
+ case COREDUMP_FILE:
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ return -EINVAL;
}
/* Repeat as long as we have more pattern to process and more output
@@ -239,7 +305,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
* Split on spaces before doing template expansion so that
* %e and %E don't get split if they have spaces in them
*/
- if (ispipe) {
+ if (cn->core_type == COREDUMP_PIPE) {
if (isspace(*pat_ptr)) {
if (cn->used != 0)
was_space = true;
@@ -339,6 +405,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
case 'C':
err = cn_printf(cn, "%d", cprm->cpu);
break;
+ /* pidfd number */
+ case 'F': {
+ /*
+ * Installing a pidfd only makes sense if
+ * we actually spawn a usermode helper.
+ */
+ if (cn->core_type != COREDUMP_PIPE)
+ break;
+
+ /*
+ * Note that we'll install a pidfd for the
+ * thread-group leader. We know that task
+ * linkage hasn't been removed yet and even if
+ * this @current isn't the actual thread-group
+ * leader we know that the thread-group leader
+ * cannot be reaped until @current has exited.
+ */
+ cprm->pid = task_tgid(current);
+ err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
+ break;
+ }
default:
break;
}
@@ -355,12 +442,10 @@ out:
* If core_pattern does not include a %p (as is the default)
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
- return ispipe;
+ if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
+ return cn_printf(cn, ".%d", task_tgid_vnr(current));
+
+ return 0;
}
static int zap_process(struct signal_struct *signal, int exit_code)
@@ -493,7 +578,7 @@ static void wait_for_dump_helpers(struct file *file)
}
/*
- * umh_pipe_setup
+ * umh_coredump_setup
* helper function to customize the process used
* to collect the core in userspace. Specifically
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -503,11 +588,34 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
{
struct file *files[2];
struct coredump_params *cp = (struct coredump_params *)info->data;
- int err = create_pipe_files(files, 0);
+ int err;
+
+ if (cp->pid) {
+ struct file *pidfs_file __free(fput) = NULL;
+
+ pidfs_file = pidfs_alloc_file(cp->pid, 0);
+ if (IS_ERR(pidfs_file))
+ return PTR_ERR(pidfs_file);
+
+ pidfs_coredump(cp);
+
+ /*
+ * Usermode helpers are childen of either
+ * system_unbound_wq or of kthreadd. So we know that
+ * we're starting off with a clean file descriptor
+ * table. So we should always be able to use
+ * COREDUMP_PIDFD_NUMBER as our file descriptor value.
+ */
+ err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
+ if (err < 0)
+ return err;
+ }
+
+ err = create_pipe_files(files, 0);
if (err)
return err;
@@ -515,10 +623,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
err = replace_fd(0, files[0], 0);
fput(files[0]);
+ if (err < 0)
+ return err;
+
/* and disallow core files too */
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
- return err;
+ return 0;
}
void do_coredump(const kernel_siginfo_t *siginfo)
@@ -530,7 +641,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
- int ispipe;
size_t *argv = NULL;
int argc = 0;
/* require nonrelative corefile path and be extra careful */
@@ -579,70 +689,14 @@ void do_coredump(const kernel_siginfo_t *siginfo)
old_cred = override_creds(cred);
- ispipe = format_corename(&cn, &cprm, &argv, &argc);
-
- if (ispipe) {
- int argi;
- int dump_count;
- char **helper_argv;
- struct subprocess_info *sub_info;
-
- if (ispipe < 0) {
- coredump_report_failure("format_corename failed, aborting core");
- goto fail_unlock;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a special value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- coredump_report_failure("over core_pipe_limit, skipping core dump");
- goto fail_dropcount;
- }
-
- helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
- GFP_KERNEL);
- if (!helper_argv) {
- coredump_report_failure("%s failed to allocate memory", __func__);
- goto fail_dropcount;
- }
- for (argi = 0; argi < argc; argi++)
- helper_argv[argi] = cn.corename + argv[argi];
- helper_argv[argi] = NULL;
-
- retval = -ENOMEM;
- sub_info = call_usermodehelper_setup(helper_argv[0],
- helper_argv, NULL, GFP_KERNEL,
- umh_pipe_setup, NULL, &cprm);
- if (sub_info)
- retval = call_usermodehelper_exec(sub_info,
- UMH_WAIT_EXEC);
+ retval = format_corename(&cn, &cprm, &argv, &argc);
+ if (retval < 0) {
+ coredump_report_failure("format_corename failed, aborting core");
+ goto fail_unlock;
+ }
- kfree(helper_argv);
- if (retval) {
- coredump_report_failure("|%s pipe failed", cn.corename);
- goto close_fail;
- }
- } else {
+ switch (cn.core_type) {
+ case COREDUMP_FILE: {
struct mnt_idmap *idmap;
struct inode *inode;
int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
@@ -736,6 +790,143 @@ void do_coredump(const kernel_siginfo_t *siginfo)
if (do_truncate(idmap, cprm.file->f_path.dentry,
0, 0, cprm.file))
goto close_fail;
+ break;
+ }
+ case COREDUMP_PIPE: {
+ int argi;
+ int dump_count;
+ char **helper_argv;
+ struct subprocess_info *sub_info;
+
+ if (cprm.limit == 1) {
+ /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a special value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things.
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
+ goto fail_unlock;
+ }
+ cprm.limit = RLIM_INFINITY;
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ coredump_report_failure("over core_pipe_limit, skipping core dump");
+ goto fail_dropcount;
+ }
+
+ helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
+ GFP_KERNEL);
+ if (!helper_argv) {
+ coredump_report_failure("%s failed to allocate memory", __func__);
+ goto fail_dropcount;
+ }
+ for (argi = 0; argi < argc; argi++)
+ helper_argv[argi] = cn.corename + argv[argi];
+ helper_argv[argi] = NULL;
+
+ retval = -ENOMEM;
+ sub_info = call_usermodehelper_setup(helper_argv[0],
+ helper_argv, NULL, GFP_KERNEL,
+ umh_coredump_setup, NULL, &cprm);
+ if (sub_info)
+ retval = call_usermodehelper_exec(sub_info,
+ UMH_WAIT_EXEC);
+
+ kfree(helper_argv);
+ if (retval) {
+ coredump_report_failure("|%s pipe failed", cn.corename);
+ goto close_fail;
+ }
+ break;
+ }
+ case COREDUMP_SOCK: {
+#ifdef CONFIG_UNIX
+ struct file *file __free(fput) = NULL;
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ ssize_t addr_len;
+ struct socket *socket;
+
+ addr_len = strscpy(addr.sun_path, cn.corename);
+ if (addr_len < 0)
+ goto close_fail;
+ addr_len += offsetof(struct sockaddr_un, sun_path) + 1;
+
+ /*
+ * It is possible that the userspace process which is
+ * supposed to handle the coredump and is listening on
+ * the AF_UNIX socket coredumps. Userspace should just
+ * mark itself non dumpable.
+ */
+
+ retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket);
+ if (retval < 0)
+ goto close_fail;
+
+ file = sock_alloc_file(socket, 0, NULL);
+ if (IS_ERR(file))
+ goto close_fail;
+
+ /*
+ * Set the thread-group leader pid which is used for the
+ * peer credentials during connect() below. Then
+ * immediately register it in pidfs...
+ */
+ cprm.pid = task_tgid(current);
+ retval = pidfs_register_pid(cprm.pid);
+ if (retval)
+ goto close_fail;
+
+ /*
+ * ... and set the coredump information so userspace
+ * has it available after connect()...
+ */
+ pidfs_coredump(&cprm);
+
+ retval = kernel_connect(socket, (struct sockaddr *)(&addr),
+ addr_len, O_NONBLOCK | SOCK_COREDUMP);
+
+ /*
+ * ... Make sure to only put our reference after connect() took
+ * its own reference keeping the pidfs entry alive ...
+ */
+ pidfs_put_pid(cprm.pid);
+
+ if (retval) {
+ if (retval == -EAGAIN)
+ coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
+ else
+ coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval);
+ goto close_fail;
+ }
+
+ /* ... and validate that @sk_peer_pid matches @cprm.pid. */
+ if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
+ goto close_fail;
+
+ cprm.limit = RLIM_INFINITY;
+ cprm.file = no_free_ptr(file);
+#else
+ coredump_report_failure("Core dump socket support %s disabled", cn.corename);
+ goto close_fail;
+#endif
+ break;
+ }
+ default:
+ WARN_ON_ONCE(true);
+ goto close_fail;
}
/* get us an unshared descriptor table; almost always a no-op */
@@ -770,13 +961,49 @@ void do_coredump(const kernel_siginfo_t *siginfo)
file_end_write(cprm.file);
free_vma_snapshot(&cprm);
}
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
+
+#ifdef CONFIG_UNIX
+ /* Let userspace know we're done processing the coredump. */
+ if (sock_from_file(cprm.file))
+ kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR);
+#endif
+
+ /*
+ * When core_pipe_limit is set we wait for the coredump server
+ * or usermodehelper to finish before exiting so it can e.g.,
+ * inspect /proc/<pid>.
+ */
+ if (core_pipe_limit) {
+ switch (cn.core_type) {
+ case COREDUMP_PIPE:
+ wait_for_dump_helpers(cprm.file);
+ break;
+#ifdef CONFIG_UNIX
+ case COREDUMP_SOCK: {
+ ssize_t n;
+
+ /*
+ * We use a simple read to wait for the coredump
+ * processing to finish. Either the socket is
+ * closed or we get sent unexpected data. In
+ * both cases, we're done.
+ */
+ n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL);
+ if (n != 0)
+ coredump_report_failure("Unexpected data on coredump socket");
+ break;
+ }
+#endif
+ default:
+ break;
+ }
+ }
+
close_fail:
if (cprm.file)
filp_close(cprm.file, NULL);
fail_dropcount:
- if (ispipe)
+ if (cn.core_type == COREDUMP_PIPE)
atomic_dec(&core_dump_count);
fail_unlock:
kfree(argv);
@@ -799,10 +1026,9 @@ static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
struct file *file = cprm->file;
loff_t pos = file->f_pos;
ssize_t n;
+
if (cprm->written + nr > cprm->limit)
return 0;
-
-
if (dump_interrupted())
return 0;
n = __kernel_write(file, addr, nr, &pos);
@@ -819,20 +1045,21 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
{
static char zeroes[PAGE_SIZE];
struct file *file = cprm->file;
+
if (file->f_mode & FMODE_LSEEK) {
- if (dump_interrupted() ||
- vfs_llseek(file, nr, SEEK_CUR) < 0)
+ if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0)
return 0;
cprm->pos += nr;
return 1;
- } else {
- while (nr > PAGE_SIZE) {
- if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
- return 0;
- nr -= PAGE_SIZE;
- }
- return __dump_emit(cprm, zeroes, nr);
}
+
+ while (nr > PAGE_SIZE) {
+ if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
+ return 0;
+ nr -= PAGE_SIZE;
+ }
+
+ return __dump_emit(cprm, zeroes, nr);
}
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
@@ -1001,7 +1228,7 @@ EXPORT_SYMBOL(dump_align);
void validate_coredump_safety(void)
{
if (suid_dumpable == SUID_DUMP_ROOT &&
- core_pattern[0] != '/' && core_pattern[0] != '|') {
+ core_pattern[0] != '/' && core_pattern[0] != '|' && core_pattern[0] != '@') {
coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
"pipe handler or fully qualified core dump path required. "
@@ -1009,18 +1236,55 @@ void validate_coredump_safety(void)
}
}
+static inline bool check_coredump_socket(void)
+{
+ if (core_pattern[0] != '@')
+ return true;
+
+ /*
+ * Coredump socket must be located in the initial mount
+ * namespace. Don't give the impression that anything else is
+ * supported right now.
+ */
+ if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
+ return false;
+
+ /* Must be an absolute path. */
+ if (*(core_pattern + 1) != '/')
+ return false;
+
+ return true;
+}
+
static int proc_dostring_coredump(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int error = proc_dostring(table, write, buffer, lenp, ppos);
+ int error;
+ ssize_t retval;
+ char old_core_pattern[CORENAME_MAX_SIZE];
+
+ retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
+
+ error = proc_dostring(table, write, buffer, lenp, ppos);
+ if (error)
+ return error;
+ if (!check_coredump_socket()) {
+ strscpy(core_pattern, old_core_pattern, retval + 1);
+ return -EINVAL;
+ }
- if (!error)
- validate_coredump_safety();
+ validate_coredump_safety();
return error;
}
static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
+static char core_modes[] = {
+ "file\npipe"
+#ifdef CONFIG_UNIX
+ "\nsocket"
+#endif
+};
static const struct ctl_table coredump_sysctls[] = {
{
@@ -1064,6 +1328,13 @@ static const struct ctl_table coredump_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "core_modes",
+ .data = core_modes,
+ .maxlen = sizeof(core_modes) - 1,
+ .mode = 0444,
+ .proc_handler = proc_dostring,
+ },
};
static int __init init_fs_coredump_sysctls(void)