diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2024-07-15 14:03:44 -0700 | 
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2024-07-15 14:03:44 -0700 | 
| commit | a23e1966932464e1c5226cb9ac4ce1d5fc10ba22 (patch) | |
| tree | bf5f1b57faa01ca31656bfc48c7d6b6f0bc39189 /net/unix/af_unix.c | |
| parent | 7c7b1be19b228b450c2945ec379d7fc6bfef9852 (diff) | |
| parent | f3efefb6fdcce604413135bd8d4c5568e53a1f13 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 6.11 merge window.
Diffstat (limited to 'net/unix/af_unix.c')
| -rw-r--r-- | net/unix/af_unix.c | 193 | 
1 files changed, 129 insertions, 64 deletions
| diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 86930a8ed012..9a6ad5974dff 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,8 +116,7 @@  #include <linux/freezer.h>  #include <linux/file.h>  #include <linux/btf_ids.h> - -#include "scm.h" +#include <linux/bpf-cgroup.h>  static atomic_long_t unix_nr_socks;  static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; @@ -212,8 +211,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)  }  #endif /* CONFIG_SECURITY_NETWORK */ -#define unix_peer(sk) (unix_sk(sk)->peer) -  static inline int unix_our_peer(struct sock *sk, struct sock *osk)  {  	return unix_peer(osk) == sk; @@ -680,7 +677,7 @@ static void unix_release_sock(struct sock *sk, int embrion)  	 *	  What the above comment does talk about? --ANK(980817)  	 */ -	if (unix_tot_inflight) +	if (READ_ONCE(unix_tot_inflight))  		unix_gc();		/* Garbage collect fds */  } @@ -783,19 +780,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);  static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,  				  int); -static int unix_set_peek_off(struct sock *sk, int val) -{ -	struct unix_sock *u = unix_sk(sk); - -	if (mutex_lock_interruptible(&u->iolock)) -		return -EINTR; - -	WRITE_ONCE(sk->sk_peek_off, val); -	mutex_unlock(&u->iolock); - -	return 0; -} -  #ifdef CONFIG_PROC_FS  static int unix_count_nr_fds(struct sock *sk)  { @@ -863,7 +847,7 @@ static const struct proto_ops unix_stream_ops = {  	.read_skb =	unix_stream_read_skb,  	.mmap =		sock_no_mmap,  	.splice_read =	unix_stream_splice_read, -	.set_peek_off =	unix_set_peek_off, +	.set_peek_off =	sk_set_peek_off,  	.show_fdinfo =	unix_show_fdinfo,  }; @@ -887,7 +871,7 @@ static const struct proto_ops unix_dgram_ops = {  	.read_skb =	unix_read_skb,  	.recvmsg =	unix_dgram_recvmsg,  	.mmap =		sock_no_mmap, -	.set_peek_off =	unix_set_peek_off, +	.set_peek_off =	sk_set_peek_off,  	.show_fdinfo =	unix_show_fdinfo,  }; @@ -910,7 +894,7 @@ static const struct proto_ops unix_seqpacket_ops = {  	.sendmsg =	unix_seqpacket_sendmsg,  	.recvmsg =	unix_seqpacket_recvmsg,  	.mmap =		sock_no_mmap, -	.set_peek_off =	unix_set_peek_off, +	.set_peek_off =	sk_set_peek_off,  	.show_fdinfo =	unix_show_fdinfo,  }; @@ -994,11 +978,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,  	sk->sk_write_space	= unix_write_space;  	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;  	sk->sk_destruct		= unix_sock_destructor; -	u	  = unix_sk(sk); +	u = unix_sk(sk); +	u->inflight = 0;  	u->path.dentry = NULL;  	u->path.mnt = NULL;  	spin_lock_init(&u->lock); -	atomic_long_set(&u->inflight, 0);  	INIT_LIST_HEAD(&u->link);  	mutex_init(&u->iolock); /* single task reading lock */  	mutex_init(&u->bindlock); /* single task binding lock */ @@ -1345,13 +1329,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)  		unix_state_lock(sk1);  		return;  	} -	if (sk1 < sk2) { -		unix_state_lock(sk1); -		unix_state_lock_nested(sk2); -	} else { -		unix_state_lock(sk2); -		unix_state_lock_nested(sk1); -	} +	if (sk1 > sk2) +		swap(sk1, sk2); + +	unix_state_lock(sk1); +	unix_state_lock_nested(sk2, U_LOCK_SECOND);  }  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1381,6 +1363,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,  		if (err)  			goto out; +		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); +		if (err) +			goto out; +  		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||  		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&  		    !unix_sk(sk)->addr) { @@ -1490,6 +1476,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,  	if (err)  		goto out; +	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); +	if (err) +		goto out; +  	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||  	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {  		err = unix_autobind(sk); @@ -1584,7 +1574,7 @@ restart:  		goto out_unlock;  	} -	unix_state_lock_nested(sk); +	unix_state_lock_nested(sk, U_LOCK_SECOND);  	if (sk->sk_state != st) {  		unix_state_unlock(sk); @@ -1770,12 +1760,65 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)  	} else {  		err = addr->len;  		memcpy(sunaddr, addr->name, addr->len); + +		if (peer) +			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, +					       CGROUP_UNIX_GETPEERNAME); +		else +			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, +					       CGROUP_UNIX_GETSOCKNAME);  	}  	sock_put(sk);  out:  	return err;  } +/* The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ +	struct user_struct *user = current_user(); + +	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) +		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); +	return false; +} + +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ +	int i; + +	if (too_many_unix_fds(current)) +		return -ETOOMANYREFS; + +	/* Need to duplicate file references for the sake of garbage +	 * collection.  Otherwise a socket in the fps might become a +	 * candidate for GC while the skb is not yet queued. +	 */ +	UNIXCB(skb).fp = scm_fp_dup(scm->fp); +	if (!UNIXCB(skb).fp) +		return -ENOMEM; + +	for (i = scm->fp->count - 1; i >= 0; i--) +		unix_inflight(scm->fp->user, scm->fp->fp[i]); + +	return 0; +} + +static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ +	int i; + +	scm->fp = UNIXCB(skb).fp; +	UNIXCB(skb).fp = NULL; + +	for (i = scm->fp->count - 1; i >= 0; i--) +		unix_notinflight(scm->fp->user, scm->fp->fp[i]); +} +  static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)  {  	scm->fp = scm_fp_dup(UNIXCB(skb).fp); @@ -1823,6 +1866,21 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)  	spin_unlock(&unix_gc_lock);  } +static void unix_destruct_scm(struct sk_buff *skb) +{ +	struct scm_cookie scm; + +	memset(&scm, 0, sizeof(scm)); +	scm.pid  = UNIXCB(skb).pid; +	if (UNIXCB(skb).fp) +		unix_detach_fds(&scm, skb); + +	/* Alas, it calls VFS */ +	/* So fscking what? fput() had been SMP-safe since the last Summer */ +	scm_destroy(&scm); +	sock_wfree(skb); +} +  static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)  {  	int err = 0; @@ -1909,11 +1967,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,  	long timeo;  	int err; -	wait_for_unix_gc();  	err = scm_send(sock, msg, &scm, false);  	if (err < 0)  		return err; +	wait_for_unix_gc(scm.fp); +  	err = -EOPNOTSUPP;  	if (msg->msg_flags&MSG_OOB)  		goto out; @@ -1922,6 +1981,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,  		err = unix_validate_addr(sunaddr, msg->msg_namelen);  		if (err)  			goto out; + +		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, +							    msg->msg_name, +							    &msg->msg_namelen, +							    NULL); +		if (err) +			goto out;  	} else {  		sunaddr = NULL;  		err = -ENOTCONN; @@ -2178,11 +2244,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,  	bool fds_sent = false;  	int data_len; -	wait_for_unix_gc();  	err = scm_send(sock, msg, &scm, false);  	if (err < 0)  		return err; +	wait_for_unix_gc(scm.fp); +  	err = -EOPNOTSUPP;  	if (msg->msg_flags & MSG_OOB) {  #if IS_ENABLED(CONFIG_AF_UNIX_OOB) @@ -2390,9 +2457,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,  						EPOLLOUT | EPOLLWRNORM |  						EPOLLWRBAND); -	if (msg->msg_name) +	if (msg->msg_name) {  		unix_copy_addr(msg, skb->sk); +		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, +						      msg->msg_name, +						      &msg->msg_namelen); +	} +  	if (size > skb->len - skip)  		size = skb->len - skip;  	else if (size < skb->len - skip) @@ -2553,15 +2625,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)  	if (!(state->flags & MSG_PEEK))  		WRITE_ONCE(u->oob_skb, NULL); - +	else +		skb_get(oob_skb);  	unix_state_unlock(sk);  	chunk = state->recv_actor(oob_skb, 0, chunk, state); -	if (!(state->flags & MSG_PEEK)) { +	if (!(state->flags & MSG_PEEK))  		UNIXCB(oob_skb).consumed += 1; -		kfree_skb(oob_skb); -	} + +	consume_skb(oob_skb);  	mutex_unlock(&u->iolock); @@ -2590,9 +2663,13 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,  					WRITE_ONCE(u->oob_skb, NULL);  					consume_skb(skb);  				} -			} else if (!(flags & MSG_PEEK)) { +			} else if (flags & MSG_PEEK) { +				skb = NULL; +			} else {  				skb_unlink(skb, &sk->sk_receive_queue); -				consume_skb(skb); +				WRITE_ONCE(u->oob_skb, NULL); +				if (!WARN_ON_ONCE(skb_unref(skb))) +					kfree_skb(skb);  				skb = skb_peek(&sk->sk_receive_queue);  			}  		} @@ -2666,18 +2743,16 @@ redo:  		last = skb = skb_peek(&sk->sk_receive_queue);  		last_len = last ? last->len : 0; +again:  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)  		if (skb) {  			skb = manage_oob(skb, sk, flags, copied); -			if (!skb) { +			if (!skb && copied) {  				unix_state_unlock(sk); -				if (copied) -					break; -				goto redo; +				break;  			}  		}  #endif -again:  		if (skb == NULL) {  			if (copied >= target)  				goto unlock; @@ -2744,6 +2819,11 @@ unlock:  			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,  					 state->msg->msg_name);  			unix_copy_addr(state->msg, skb->sk); + +			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, +							      state->msg->msg_name, +							      &state->msg->msg_namelen); +  			sunaddr = NULL;  		} @@ -3311,7 +3391,7 @@ static const struct seq_operations unix_seq_ops = {  	.show   = unix_seq_show,  }; -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) +#ifdef CONFIG_BPF_SYSCALL  struct bpf_unix_iter_state {  	struct seq_net_private p;  	unsigned int cur_sk; @@ -3573,7 +3653,7 @@ static struct pernet_operations unix_net_ops = {  	.exit = unix_net_exit,  }; -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)  DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,  		     struct unix_sock *unix_sk, uid_t uid) @@ -3673,7 +3753,7 @@ static int __init af_unix_init(void)  	register_pernet_subsys(&unix_net_ops);  	unix_bpf_build_proto(); -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)  	bpf_iter_register();  #endif @@ -3681,20 +3761,5 @@ out:  	return rc;  } -static void __exit af_unix_exit(void) -{ -	sock_unregister(PF_UNIX); -	proto_unregister(&unix_dgram_proto); -	proto_unregister(&unix_stream_proto); -	unregister_pernet_subsys(&unix_net_ops); -} - -/* Earlier than device_initcall() so that other drivers invoking -   request_module() don't end up in a loop when modprobe tries -   to use a UNIX socket. But later than subsys_initcall() because -   we depend on stuff initialised there */ +/* Later than subsys_initcall() because we depend on stuff initialised there */  fs_initcall(af_unix_init); -module_exit(af_unix_exit); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_UNIX); | 
