diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2024-10-10 16:52:32 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2024-10-24 13:54:51 +0200 |
commit | cdda1f26e74bac732eca537a69f19f6a37b641be (patch) | |
tree | 07fcfc8122c1a1ef147f40f70d58fa68232f5200 /fs/pidfs.c | |
parent | 8e929cb546ee42c9a61d24fae60605e9e3192354 (diff) |
pidfd: add ioctl to retrieve pid info
A common pattern when using pid fds is having to get information
about the process, which currently requires /proc being mounted,
resolving the fd to a pid, and then do manual string parsing of
/proc/N/status and friends. This needs to be reimplemented over
and over in all userspace projects (e.g.: I have reimplemented
resolving in systemd, dbus, dbus-daemon, polkit so far), and
requires additional care in checking that the fd is still valid
after having parsed the data, to avoid races.
Having a programmatic API that can be used directly removes all
these requirements, including having /proc mounted.
As discussed at LPC24, add an ioctl with an extensible struct
so that more parameters can be added later if needed. Start with
returning pid/tgid/ppid and creds unconditionally, and cgroupid
optionally.
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/pidfs.c')
-rw-r--r-- | fs/pidfs.c | 86 |
1 files changed, 83 insertions, 3 deletions
diff --git a/fs/pidfs.c b/fs/pidfs.c index 80675b6bf884..618abb1fa1b8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -2,6 +2,7 @@ #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/cgroup.h> #include <linux/magic.h> #include <linux/mount.h> #include <linux/pid.h> @@ -114,6 +115,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) +{ + struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + size_t usize = _IOC_SIZE(cmd); + struct pidfd_info kinfo = {}; + struct user_namespace *user_ns; + const struct cred *c; + __u64 mask; +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; +#endif + + if (!uinfo) + return -EINVAL; + if (usize < PIDFD_INFO_SIZE_VER0) + return -EINVAL; /* First version, no smaller struct possible */ + + if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) + return -EFAULT; + + c = get_task_cred(task); + if (!c) + return -ESRCH; + + /* Unconditionally return identifiers and credentials, the rest only on request */ + + user_ns = current_user_ns(); + kinfo.ruid = from_kuid_munged(user_ns, c->uid); + kinfo.rgid = from_kgid_munged(user_ns, c->gid); + kinfo.euid = from_kuid_munged(user_ns, c->euid); + kinfo.egid = from_kgid_munged(user_ns, c->egid); + kinfo.suid = from_kuid_munged(user_ns, c->suid); + kinfo.sgid = from_kgid_munged(user_ns, c->sgid); + kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); + kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); + kinfo.mask |= PIDFD_INFO_CREDS; + put_cred(c); + +#ifdef CONFIG_CGROUPS + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + kinfo.cgroupid = cgroup_id(cgrp); + kinfo.mask |= PIDFD_INFO_CGROUPID; + rcu_read_unlock(); +#endif + + /* + * Copy pid/tgid last, to reduce the chances the information might be + * stale. Note that it is not possible to ensure it will be valid as the + * task might return as soon as the copy_to_user finishes, but that's ok + * and userspace expects that might happen and can act accordingly, so + * this is just best-effort. What we can do however is checking that all + * the fields are set correctly, or return ESRCH to avoid providing + * incomplete information. */ + + kinfo.ppid = task_ppid_nr_ns(task, NULL); + kinfo.tgid = task_tgid_vnr(task); + kinfo.pid = task_pid_vnr(task); + kinfo.mask |= PIDFD_INFO_PID; + + if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + return -ESRCH; + + /* + * If userspace and the kernel have the same struct size it can just + * be copied. If userspace provides an older struct, only the bits that + * userspace knows about will be copied. If userspace provides a new + * struct, only the bits that the kernel knows about will be copied. + */ + if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) + return -EFAULT; + + return 0; +} + static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; @@ -122,13 +198,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ns_common *ns_common = NULL; struct pid_namespace *pid_ns; - if (arg) - return -EINVAL; - task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; + /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ + if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) + return pidfd_info(task, cmd, arg); + + if (arg) + return -EINVAL; + scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) |