From 709894c52c1cafa36fe2047ba5a0b83bdf398133 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Wed, 9 Apr 2025 14:50:58 +0200 Subject: af_unix: Remove unix_unhash() Dummy unix_unhash() was introduced for sockmap in commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap"), but there's no need to implement it anymore. ->unhash() is only called conditionally: in unix_shutdown() since commit d359902d5c35 ("af_unix: Fix NULL pointer bug in unix_shutdown"), and in BPF proto's sock_map_unhash() since commit 5b4a79ba65a1 ("bpf, sockmap: Don't let sock_map_{close,destroy,unhash} call itself"). Remove it. Signed-off-by: Michal Luczaj Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250409-cleanup-drop-unix-unhash-v1-1-1659e5b8ee84@rbox.co Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f78a2492826f..2ab20821d6bb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -950,13 +950,6 @@ static void unix_close(struct sock *sk, long timeout) */ } -static void unix_unhash(struct sock *sk) -{ - /* Nothing to do here, unix socket does not need a ->unhash(). - * This is merely for sockmap. - */ -} - static bool unix_bpf_bypass_getsockopt(int level, int optname) { if (level == SOL_SOCKET) { @@ -987,7 +980,6 @@ struct proto unix_stream_proto = { .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, - .unhash = unix_unhash, .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_stream_bpf_update_proto, -- cgit From 350d4546295949c8a29e345b340e57772813037a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:52 -0700 Subject: af_unix: Factorise test_bit() for SOCK_PASSCRED and SOCK_PASSPIDFD. Currently, the same checks for SOCK_PASSCRED and SOCK_PASSPIDFD are scattered across many places. Let's centralise the bit tests to make the following changes cleaner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2ab20821d6bb..464e183ffdb8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -765,6 +765,14 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) spin_unlock(&sk->sk_peer_lock); } +static bool unix_may_passcred(const struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + return test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags); +} + static int unix_listen(struct socket *sock, int backlog) { int err; @@ -1411,9 +1419,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !READ_ONCE(unix_sk(sk)->addr)) { + if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1531,9 +1537,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; - if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !READ_ONCE(u->addr)) { + if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1877,16 +1881,6 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen return err; } -static bool unix_passcred_enabled(const struct socket *sock, - const struct sock *other) -{ - return test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || - test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); -} - /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket @@ -1897,7 +1891,9 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, { if (UNIXCB(skb).pid) return; - if (unix_passcred_enabled(sock, other)) { + + if (unix_may_passcred(sock->sk) || + !other->sk_socket || unix_may_passcred(other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } @@ -1974,9 +1970,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !READ_ONCE(u->addr)) { + if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -2846,8 +2840,7 @@ unlock: /* Never glue messages from different writers */ if (!unix_skb_scm_eq(skb, &scm)) break; - } else if (test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) { + } else if (unix_may_passcred(sk)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); -- cgit From 3041bbbeb41b807d2e24d7d78d9cc1387d95898a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:53 -0700 Subject: af_unix: Don't pass struct socket to maybe_add_creds(). We will move SOCK_PASS{CRED,PIDFD,SEC} from struct socket.flags to struct sock for better handling with SOCK_PASSRIGHTS. Then, we don't need to access struct socket in maybe_add_creds(). Let's pass struct sock to maybe_add_creds() and its caller queue_oob(). While at it, we append the unix_ prefix and fix double spaces around the pid assignment. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 464e183ffdb8..a39497fd6e98 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1869,7 +1869,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen { int err = 0; - UNIXCB(skb).pid = get_pid(scm->pid); + UNIXCB(skb).pid = get_pid(scm->pid); UNIXCB(skb).uid = scm->creds.uid; UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; @@ -1886,15 +1886,15 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen * We include credentials if source or destination socket * asserted SOCK_PASSCRED. */ -static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, - const struct sock *other) +static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, + const struct sock *other) { if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sock->sk) || + if (unix_may_passcred(sk) || !other->sk_socket || unix_may_passcred(other)) { - UNIXCB(skb).pid = get_pid(task_tgid(current)); + UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -2133,7 +2133,8 @@ restart_locked: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); - maybe_add_creds(skb, sock, other); + + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); @@ -2161,14 +2162,14 @@ out: #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) -static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, +static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; int err; - skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) return err; @@ -2192,7 +2193,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other goto out; } - maybe_add_creds(skb, sock, other); + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); spin_lock(&other->sk_receive_queue.lock); @@ -2308,7 +2309,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, (other->sk_shutdown & RCV_SHUTDOWN)) goto out_pipe_unlock; - maybe_add_creds(skb, sock, other); + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); @@ -2318,7 +2319,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { - err = queue_oob(sock, msg, other, &scm, fds_sent); + err = queue_oob(sk, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; -- cgit From 0e81cfd971dc4833c699dcd8924e54a5021bc4e8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:57 -0700 Subject: af_unix: Move SOCK_PASS{CRED,PIDFD,SEC} to struct sock. As explained in the next patch, SO_PASSRIGHTS would have a problem if we assigned a corresponding bit to socket->flags, so it must be managed in struct sock. Mixing socket->flags and sk->sk_flags for similar options will look confusing, and sk->sk_flags does not have enough space on 32bit system. Also, as mentioned in commit 16e572626961 ("af_unix: dont send SCM_CREDENTIALS by default"), SOCK_PASSCRED and SOCK_PASSPID handling is known to be slow, and managing the flags in struct socket cannot avoid that for embryo sockets. Let's move SOCK_PASS{CRED,PIDFD,SEC} to struct sock. While at it, other SOCK_XXX flags in net.h are grouped as enum. Note that assign_bit() was atomic, so the writer side is moved down after lock_sock() in setsockopt(), but the bit is only read once in sendmsg() and recvmsg(), so lock_sock() is not needed there. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a39497fd6e98..27ebda4cd9b9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -767,10 +767,7 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) static bool unix_may_passcred(const struct sock *sk) { - struct socket *sock = sk->sk_socket; - - return test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags); + return sk->sk_scm_credentials || sk->sk_scm_pidfd; } static int unix_listen(struct socket *sock, int backlog) @@ -1713,17 +1710,6 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) return 0; } -static void unix_sock_inherit_flags(const struct socket *old, - struct socket *new) -{ - if (test_bit(SOCK_PASSCRED, &old->flags)) - set_bit(SOCK_PASSCRED, &new->flags); - if (test_bit(SOCK_PASSPIDFD, &old->flags)) - set_bit(SOCK_PASSPIDFD, &new->flags); - if (test_bit(SOCK_PASSSEC, &old->flags)) - set_bit(SOCK_PASSSEC, &new->flags); -} - static int unix_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { @@ -1760,7 +1746,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; - unix_sock_inherit_flags(sock, newsock); + tsk->sk_scm_recv_flags = READ_ONCE(sk->sk_scm_recv_flags); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; -- cgit From 3f84d577b79d2fce8221244f2509734940609ca6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:58 -0700 Subject: af_unix: Inherit sk_flags at connect(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For SOCK_STREAM embryo sockets, the SO_PASS{CRED,PIDFD,SEC} options are inherited from the parent listen()ing socket. Currently, this inheritance happens at accept(), because these attributes were stored in sk->sk_socket->flags and the struct socket is not allocated until accept(). This leads to unintentional behaviour. When a peer sends data to an embryo socket in the accept() queue, unix_maybe_add_creds() embeds credentials into the skb, even if neither the peer nor the listener has enabled these options. If the option is enabled, the embryo socket receives the ancillary data after accept(). If not, the data is silently discarded. This conservative approach works for SO_PASS{CRED,PIDFD,SEC}, but would not for SO_PASSRIGHTS; once an SCM_RIGHTS with a hung file descriptor was sent, it'd be game over. To avoid this, we will need to preserve SOCK_PASSRIGHTS even on embryo sockets. Commit aed6ecef55d7 ("af_unix: Save listener for embryo socket.") made it possible to access the parent's flags in sendmsg() via unix_sk(other)->listener->sk->sk_socket->flags, but this introduces an unnecessary condition that is irrelevant for most sockets, accept()ed sockets and clients. Therefore, we moved SOCK_PASSXXX into struct sock. Let’s inherit sk->sk_scm_recv_flags at connect() to avoid receiving SCM_RIGHTS on embryo sockets created from a parent with SO_PASSRIGHTS=0. Note that the parent socket is locked in connect() so we don't need READ_ONCE() for sk_scm_recv_flags. Now, we can remove !other->sk_socket check in unix_maybe_add_creds() to avoid slow SOCK_PASS{CRED,PIDFD} handling for embryo sockets created from a parent with SO_PASS{CRED,PIDFD}=0. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 27ebda4cd9b9..900bad88fbd2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1626,10 +1626,12 @@ restart: /* The way is open! Fastly set all the necessary fields... */ sock_hold(sk); - unix_peer(newsk) = sk; - newsk->sk_state = TCP_ESTABLISHED; - newsk->sk_type = sk->sk_type; + unix_peer(newsk) = sk; + newsk->sk_state = TCP_ESTABLISHED; + newsk->sk_type = sk->sk_type; + newsk->sk_scm_recv_flags = other->sk_scm_recv_flags; init_peercred(newsk); + newu = unix_sk(newsk); newu->listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); @@ -1746,7 +1748,6 @@ static int unix_accept(struct socket *sock, struct socket *newsock, unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; - tsk->sk_scm_recv_flags = READ_ONCE(sk->sk_scm_recv_flags); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; @@ -1878,8 +1879,7 @@ static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sk) || - !other->sk_socket || unix_may_passcred(other)) { + if (unix_may_passcred(sk) || unix_may_passcred(other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } -- cgit From 77cbe1a6d8730a07f99f9263c2d5f2304cf5e830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:59 -0700 Subject: af_unix: Introduce SO_PASSRIGHTS. As long as recvmsg() or recvmmsg() is used with cmsg, it is not possible to avoid receiving file descriptors via SCM_RIGHTS. This behaviour has occasionally been flagged as problematic, as it can be (ab)used to trigger DoS during close(), for example, by passing a FUSE-controlled fd or a hung NFS fd. For instance, as noted on the uAPI Group page [0], an untrusted peer could send a file descriptor pointing to a hung NFS mount and then close it. Once the receiver calls recvmsg() with msg_control, the descriptor is automatically installed, and then the responsibility for the final close() now falls on the receiver, which may result in blocking the process for a long time. Regarding this, systemd calls cmsg_close_all() [1] after each recvmsg() to close() unwanted file descriptors sent via SCM_RIGHTS. However, this cannot work around the issue at all, because the final fput() may still occur on the receiver's side once sendmsg() with SCM_RIGHTS succeeds. Also, even filtering by LSM at recvmsg() does not work for the same reason. Thus, we need a better way to refuse SCM_RIGHTS at sendmsg(). Let's introduce SO_PASSRIGHTS to disable SCM_RIGHTS. Note that this option is enabled by default for backward compatibility. Link: https://uapi-group.org/kernel-features/#disabling-reception-of-scm_rights-for-af_unix-sockets #[0] Link: https://github.com/systemd/systemd/blob/v257.5/src/basic/fd-util.c#L612-L628 #[1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 900bad88fbd2..bd507f74e35e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1015,6 +1015,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sock_init_data(sock, sk); + sk->sk_scm_rights = 1; sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; @@ -2073,6 +2074,11 @@ restart_locked: goto out_unlock; } + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + err = -EPERM; + goto out_unlock; + } + if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) @@ -2174,9 +2180,13 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) { - unix_state_unlock(other); err = -EPIPE; - goto out; + goto out_unlock; + } + + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + err = -EPERM; + goto out_unlock; } unix_maybe_add_creds(skb, sk, other); @@ -2192,6 +2202,8 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, other->sk_data_ready(other); return 0; +out_unlock: + unix_state_unlock(other); out: consume_skb(skb); return err; @@ -2295,6 +2307,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, (other->sk_shutdown & RCV_SHUTDOWN)) goto out_pipe_unlock; + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + unix_state_unlock(other); + err = -EPERM; + goto out_free; + } + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); -- cgit