From b27401a30ee466c4476b035487be0580767ba1fb Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Sat, 14 Jan 2023 12:35:02 +0300 Subject: unix: Improve locking scheme in unix_show_fdinfo() After switching to TCP_ESTABLISHED or TCP_LISTEN sk_state, alive SOCK_STREAM and SOCK_SEQPACKET sockets can't change it anymore (since commit 3ff8bff704f4 "unix: Fix race in SOCK_SEQPACKET's unix_dgram_sendmsg()"). Thus, we do not need to take lock here. Signed-off-by: Kirill Tkhai Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/unix/af_unix.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f0c2293f1d3b..009616fa0256 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -807,23 +807,23 @@ static int unix_count_nr_fds(struct sock *sk) static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) { struct sock *sk = sock->sk; + unsigned char s_state; struct unix_sock *u; - int nr_fds; + int nr_fds = 0; if (sk) { + s_state = READ_ONCE(sk->sk_state); u = unix_sk(sk); - if (sock->type == SOCK_DGRAM) { - nr_fds = atomic_read(&u->scm_stat.nr_fds); - goto out_print; - } - unix_state_lock(sk); - if (sk->sk_state != TCP_LISTEN) + /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their + * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN. + * SOCK_DGRAM is ordinary. So, no lock is needed. + */ + if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED) nr_fds = atomic_read(&u->scm_stat.nr_fds); - else + else if (s_state == TCP_LISTEN) nr_fds = unix_count_nr_fds(sk); - unix_state_unlock(sk); -out_print: + seq_printf(m, "scm_fds: %u\n", nr_fds); } } -- cgit From abf08576afe31506b812c8c1be9714f78613f300 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:10 +0100 Subject: fs: port vfs_*() helpers to struct mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- net/unix/af_unix.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f0c2293f1d3b..81ff98298996 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1190,7 +1190,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); struct net *net = sock_net(sk); - struct user_namespace *ns; // barf... + struct mnt_idmap *idmap; struct unix_address *addr; struct dentry *dentry; struct path parent; @@ -1217,10 +1217,10 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, /* * All right, let's create it. */ - ns = mnt_user_ns(parent.mnt); + idmap = mnt_idmap(parent.mnt); err = security_path_mknod(&parent, dentry, mode, 0); if (!err) - err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); + err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); if (err) goto out_path; err = mutex_lock_interruptible(&u->bindlock); @@ -1245,7 +1245,7 @@ out_unlock: err = -EINVAL; out_unlink: /* failed after successful mknod? unlink what we'd created... */ - vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); + vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL); out_path: done_path_create(&parent, dentry); out: -- cgit From 509f15b9c551b750d8f634d404805c3860e9ea17 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Jan 2023 23:14:21 -0800 Subject: net: add missing includes of linux/splice.h Number of files depend on linux/splice.h getting included by linux/skbuff.h which soon will no longer be the case. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 009616fa0256..0be25e712c28 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -112,6 +112,7 @@ #include #include #include +#include #include #include #include -- cgit From 2aab4b96900272885bc157f8b236abf1cdc02e08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Mar 2023 16:45:30 +0000 Subject: af_unix: fix struct pid leaks in OOB support syzbot reported struct pid leak [1]. Issue is that queue_oob() calls maybe_add_creds() which potentially holds a reference on a pid. But skb->destructor is not set (either directly or by calling unix_scm_to_skb()) This means that subsequent kfree_skb() or consume_skb() would leak this reference. In this fix, I chose to fully support scm even for the OOB message. [1] BUG: memory leak unreferenced object 0xffff8881053e7f80 (size 128): comm "syz-executor242", pid 5066, jiffies 4294946079 (age 13.220s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] alloc_pid+0x6a/0x560 kernel/pid.c:180 [] copy_process+0x169f/0x26c0 kernel/fork.c:2285 [] kernel_clone+0xf7/0x610 kernel/fork.c:2684 [] __do_sys_clone+0x7c/0xb0 kernel/fork.c:2825 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 314001f0bf92 ("af_unix: Add OOB support") Reported-by: syzbot+7699d9e5635c10253a27@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Rao Shoaib Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230307164530.771896-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 347122c3575e..0b0f18ecce44 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2105,7 +2105,8 @@ out: #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) -static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, + struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; @@ -2116,6 +2117,11 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (!skb) return err; + err = unix_scm_to_skb(scm, skb, !fds_sent); + if (err < 0) { + kfree_skb(skb); + return err; + } skb_put(skb, 1); err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); @@ -2243,7 +2249,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { - err = queue_oob(sock, msg, other); + err = queue_oob(sock, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; -- cgit