diff options
Diffstat (limited to 'fs/pidfs.c')
-rw-r--r-- | fs/pidfs.c | 165 |
1 files changed, 131 insertions, 34 deletions
diff --git a/fs/pidfs.c b/fs/pidfs.c index d64a4cbeb0da..c1f0a067be40 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -20,6 +20,7 @@ #include <linux/time_namespace.h> #include <linux/utsname.h> #include <net/net_namespace.h> +#include <linux/coredump.h> #include "internal.h" #include "mount.h" @@ -33,6 +34,7 @@ static struct kmem_cache *pidfs_cachep __ro_after_init; struct pidfs_exit_info { __u64 cgroupid; __s32 exit_code; + __u32 coredump_mask; }; struct pidfs_inode { @@ -240,6 +242,22 @@ static inline bool pid_in_current_pidns(const struct pid *pid) return false; } +static __u32 pidfs_coredump_mask(unsigned long mm_flags) +{ + switch (__get_dumpable(mm_flags)) { + case SUID_DUMP_USER: + return PIDFD_COREDUMP_USER; + case SUID_DUMP_ROOT: + return PIDFD_COREDUMP_ROOT; + case SUID_DUMP_DISABLE: + return PIDFD_COREDUMP_SKIP; + default: + WARN_ON_ONCE(true); + } + + return 0; +} + static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; @@ -280,6 +298,11 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) } } + if (mask & PIDFD_INFO_COREDUMP) { + kinfo.mask |= PIDFD_INFO_COREDUMP; + kinfo.coredump_mask = READ_ONCE(pidfs_i(inode)->__pei.coredump_mask); + } + task = get_pid_task(pid, PIDTYPE_PID); if (!task) { /* @@ -296,6 +319,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) if (!c) return -ESRCH; + if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) { + task_lock(task); + if (task->mm) + kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags); + task_unlock(task); + } + /* Unconditionally return identifiers and credentials, the rest only on request */ user_ns = current_user_ns(); @@ -559,6 +589,31 @@ void pidfs_exit(struct task_struct *tsk) } } +#ifdef CONFIG_COREDUMP +void pidfs_coredump(const struct coredump_params *cprm) +{ + struct pid *pid = cprm->pid; + struct pidfs_exit_info *exit_info; + struct dentry *dentry; + struct inode *inode; + __u32 coredump_mask = 0; + + dentry = pid->stashed; + if (WARN_ON_ONCE(!dentry)) + return; + + inode = d_inode(dentry); + exit_info = &pidfs_i(inode)->__pei; + /* Note how we were coredumped. */ + coredump_mask = pidfs_coredump_mask(cprm->mm_flags); + /* Note that we actually did coredump. */ + coredump_mask |= PIDFD_COREDUMPED; + /* If coredumping is set to skip we should never end up here. */ + VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP); + smp_store_release(&exit_info->coredump_mask, coredump_mask); +} +#endif + static struct vfsmount *pidfs_mnt __ro_after_init; /* @@ -569,36 +624,14 @@ static struct vfsmount *pidfs_mnt __ro_after_init; static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { - return -EOPNOTSUPP; + return anon_inode_setattr(idmap, dentry, attr); } - -/* - * User space expects pidfs inodes to have no file type in st_mode. - * - * In particular, 'lsof' has this legacy logic: - * - * type = s->st_mode & S_IFMT; - * switch (type) { - * ... - * case 0: - * if (!strcmp(p, "anon_inode")) - * Lf->ntype = Ntype = N_ANON_INODE; - * - * to detect our old anon_inode logic. - * - * Rather than mess with our internal sane inode data, just fix it - * up here in getattr() by masking off the format bits. - */ static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(path->dentry); - - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); - stat->mode &= ~S_IFMT; - return 0; + return anon_inode_getattr(idmap, path, stat, request_mask, query_flags); } static const struct inode_operations pidfs_inode_operations = { @@ -768,7 +801,7 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path, { enum pid_type type; - if (flags & PIDFD_CLONE) + if (flags & PIDFD_STALE) return true; /* @@ -777,10 +810,14 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path, * pidfd has been allocated perform another check that the pid * is still alive. If it is exit information is available even * if the task gets reaped before the pidfd is returned to - * userspace. The only exception is PIDFD_CLONE where no task - * linkage has been established for @pid yet and the kernel is - * in the middle of process creation so there's nothing for - * pidfs to miss. + * userspace. The only exception are indicated by PIDFD_STALE: + * + * (1) The kernel is in the middle of task creation and thus no + * task linkage has been established yet. + * (2) The caller knows @pid has been registered in pidfs at a + * time when the task was still alive. + * + * In both cases exit information will have been reported. */ if (flags & PIDFD_THREAD) type = PIDTYPE_PID; @@ -826,7 +863,7 @@ static int pidfs_init_inode(struct inode *inode, void *data) const struct pid *pid = data; inode->i_private = data; - inode->i_flags |= S_PRIVATE; + inode->i_flags |= S_PRIVATE | S_ANON_INODE; inode->i_mode |= S_IRWXU; inode->i_op = &pidfs_inode_operations; inode->i_fop = &pidfs_file_operations; @@ -874,11 +911,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) int ret; /* - * Ensure that PIDFD_CLONE can be passed as a flag without + * Ensure that PIDFD_STALE can be passed as a flag without * overloading other uapi pidfd flags. */ - BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD); - BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK); + BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD); + BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK); ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); if (ret < 0) @@ -887,7 +924,8 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) if (!pidfs_pid_valid(pid, &path, flags)) return ERR_PTR(-ESRCH); - flags &= ~PIDFD_CLONE; + flags &= ~PIDFD_STALE; + flags |= O_RDWR; pidfd_file = dentry_open(&path, flags, current_cred()); /* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */ if (!IS_ERR(pidfd_file)) @@ -896,6 +934,65 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) return pidfd_file; } +/** + * pidfs_register_pid - register a struct pid in pidfs + * @pid: pid to pin + * + * Register a struct pid in pidfs. Needs to be paired with + * pidfs_put_pid() to not risk leaking the pidfs dentry and inode. + * + * Return: On success zero, on error a negative error code is returned. + */ +int pidfs_register_pid(struct pid *pid) +{ + struct path path __free(path_put) = {}; + int ret; + + might_sleep(); + + if (!pid) + return 0; + + ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); + if (unlikely(ret)) + return ret; + /* Keep the dentry and only put the reference to the mount. */ + path.dentry = NULL; + return 0; +} + +/** + * pidfs_get_pid - pin a struct pid through pidfs + * @pid: pid to pin + * + * Similar to pidfs_register_pid() but only valid if the caller knows + * there's a reference to the @pid through a dentry already that can't + * go away. + */ +void pidfs_get_pid(struct pid *pid) +{ + if (!pid) + return; + WARN_ON_ONCE(!stashed_dentry_get(&pid->stashed)); +} + +/** + * pidfs_put_pid - drop a pidfs reference + * @pid: pid to drop + * + * Drop a reference to @pid via pidfs. This is only safe if the + * reference has been taken via pidfs_get_pid(). + */ +void pidfs_put_pid(struct pid *pid) +{ + might_sleep(); + + if (!pid) + return; + VFS_WARN_ON_ONCE(!pid->stashed); + dput(pid->stashed); +} + static void pidfs_inode_init_once(void *data) { struct pidfs_inode *pi = data; |