diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 191 | 
1 files changed, 171 insertions, 20 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad6435ba6d72..5084333b5ab6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -76,6 +76,7 @@  #include <linux/proc_fs.h>  #include <linux/seq_file.h>  #include <linux/inetdevice.h> +#include <linux/btf_ids.h>  #include <crypto/hash.h>  #include <linux/scatterlist.h> @@ -1111,9 +1112,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,  	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);  	if (key) { -		/* Pre-existing entry - just update that one. */ -		memcpy(key->key, newkey, newkeylen); -		key->keylen = newkeylen; +		/* Pre-existing entry - just update that one. +		 * Note that the key might be used concurrently. +		 * data_race() is telling kcsan that we do not care of +		 * key mismatches, since changing MD5 key on live flows +		 * can lead to packet drops. +		 */ +		data_race(memcpy(key->key, newkey, newkeylen)); + +		/* Pairs with READ_ONCE() in tcp_md5_hash_key(). +		 * Also note that a reader could catch new key->keylen value +		 * but old key->key[], this is the reason we use __GFP_ZERO +		 * at sock_kmalloc() time below these lines. +		 */ +		WRITE_ONCE(key->keylen, newkeylen); +  		return 0;  	} @@ -1129,7 +1142,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,  		rcu_assign_pointer(tp->md5sig_info, md5sig);  	} -	key = sock_kmalloc(sk, sizeof(*key), gfp); +	key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);  	if (!key)  		return -ENOMEM;  	if (!tcp_alloc_md5sig_pool()) { @@ -1182,7 +1195,7 @@ static void tcp_clear_md5_list(struct sock *sk)  }  static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, -				 char __user *optval, int optlen) +				 sockptr_t optval, int optlen)  {  	struct tcp_md5sig cmd;  	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; @@ -1193,7 +1206,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,  	if (optlen < sizeof(cmd))  		return -EINVAL; -	if (copy_from_user(&cmd, optval, sizeof(cmd))) +	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))  		return -EFAULT;  	if (sin->sin_family != AF_INET) @@ -2122,10 +2135,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {  	.getsockopt	   = ip_getsockopt,  	.addr2sockaddr	   = inet_csk_addr2sockaddr,  	.sockaddr_len	   = sizeof(struct sockaddr_in), -#ifdef CONFIG_COMPAT -	.compat_setsockopt = compat_ip_setsockopt, -	.compat_getsockopt = compat_ip_getsockopt, -#endif  	.mtu_reduced	   = tcp_v4_mtu_reduced,  };  EXPORT_SYMBOL(ipv4_specific); @@ -2211,13 +2220,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);   */  static void *listening_get_next(struct seq_file *seq, void *cur)  { -	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); +	struct tcp_seq_afinfo *afinfo;  	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq);  	struct inet_listen_hashbucket *ilb;  	struct hlist_nulls_node *node;  	struct sock *sk = cur; +	if (st->bpf_seq_afinfo) +		afinfo = st->bpf_seq_afinfo; +	else +		afinfo = PDE_DATA(file_inode(seq->file)); +  	if (!sk) {  get_head:  		ilb = &tcp_hashinfo.listening_hash[st->bucket]; @@ -2235,7 +2249,8 @@ get_sk:  	sk_nulls_for_each_from(sk, node) {  		if (!net_eq(sock_net(sk), net))  			continue; -		if (sk->sk_family == afinfo->family) +		if (afinfo->family == AF_UNSPEC || +		    sk->sk_family == afinfo->family)  			return sk;  	}  	spin_unlock(&ilb->lock); @@ -2272,11 +2287,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)   */  static void *established_get_first(struct seq_file *seq)  { -	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); +	struct tcp_seq_afinfo *afinfo;  	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq);  	void *rc = NULL; +	if (st->bpf_seq_afinfo) +		afinfo = st->bpf_seq_afinfo; +	else +		afinfo = PDE_DATA(file_inode(seq->file)); +  	st->offset = 0;  	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {  		struct sock *sk; @@ -2289,7 +2309,8 @@ static void *established_get_first(struct seq_file *seq)  		spin_lock_bh(lock);  		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { -			if (sk->sk_family != afinfo->family || +			if ((afinfo->family != AF_UNSPEC && +			     sk->sk_family != afinfo->family) ||  			    !net_eq(sock_net(sk), net)) {  				continue;  			} @@ -2304,19 +2325,25 @@ out:  static void *established_get_next(struct seq_file *seq, void *cur)  { -	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); +	struct tcp_seq_afinfo *afinfo;  	struct sock *sk = cur;  	struct hlist_nulls_node *node;  	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq); +	if (st->bpf_seq_afinfo) +		afinfo = st->bpf_seq_afinfo; +	else +		afinfo = PDE_DATA(file_inode(seq->file)); +  	++st->num;  	++st->offset;  	sk = sk_nulls_next(sk);  	sk_nulls_for_each_from(sk, node) { -		if (sk->sk_family == afinfo->family && +		if ((afinfo->family == AF_UNSPEC || +		     sk->sk_family == afinfo->family) &&  		    net_eq(sock_net(sk), net))  			return sk;  	} @@ -2595,6 +2622,74 @@ out:  	return 0;  } +#ifdef CONFIG_BPF_SYSCALL +struct bpf_iter__tcp { +	__bpf_md_ptr(struct bpf_iter_meta *, meta); +	__bpf_md_ptr(struct sock_common *, sk_common); +	uid_t uid __aligned(8); +}; + +static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, +			     struct sock_common *sk_common, uid_t uid) +{ +	struct bpf_iter__tcp ctx; + +	meta->seq_num--;  /* skip SEQ_START_TOKEN */ +	ctx.meta = meta; +	ctx.sk_common = sk_common; +	ctx.uid = uid; +	return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) +{ +	struct bpf_iter_meta meta; +	struct bpf_prog *prog; +	struct sock *sk = v; +	uid_t uid; + +	if (v == SEQ_START_TOKEN) +		return 0; + +	if (sk->sk_state == TCP_TIME_WAIT) { +		uid = 0; +	} else if (sk->sk_state == TCP_NEW_SYN_RECV) { +		const struct request_sock *req = v; + +		uid = from_kuid_munged(seq_user_ns(seq), +				       sock_i_uid(req->rsk_listener)); +	} else { +		uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); +	} + +	meta.seq = seq; +	prog = bpf_iter_get_info(&meta, false); +	return tcp_prog_seq_show(prog, &meta, v, uid); +} + +static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) +{ +	struct bpf_iter_meta meta; +	struct bpf_prog *prog; + +	if (!v) { +		meta.seq = seq; +		prog = bpf_iter_get_info(&meta, true); +		if (prog) +			(void)tcp_prog_seq_show(prog, &meta, v, 0); +	} + +	tcp_seq_stop(seq, v); +} + +static const struct seq_operations bpf_iter_tcp_seq_ops = { +	.show		= bpf_iter_tcp_seq_show, +	.start		= tcp_seq_start, +	.next		= tcp_seq_next, +	.stop		= bpf_iter_tcp_seq_stop, +}; +#endif +  static const struct seq_operations tcp4_seq_ops = {  	.show		= tcp4_seq_show,  	.start		= tcp_seq_start, @@ -2675,10 +2770,6 @@ struct proto tcp_prot = {  	.rsk_prot		= &tcp_request_sock_ops,  	.h.hashinfo		= &tcp_hashinfo,  	.no_autobind		= true, -#ifdef CONFIG_COMPAT -	.compat_setsockopt	= compat_tcp_setsockopt, -	.compat_getsockopt	= compat_tcp_getsockopt, -#endif  	.diag_destroy		= tcp_abort,  };  EXPORT_SYMBOL(tcp_prot); @@ -2826,8 +2917,68 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {         .exit_batch = tcp_sk_exit_batch,  }; +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, +		     struct sock_common *sk_common, uid_t uid) + +static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) +{ +	struct tcp_iter_state *st = priv_data; +	struct tcp_seq_afinfo *afinfo; +	int ret; + +	afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); +	if (!afinfo) +		return -ENOMEM; + +	afinfo->family = AF_UNSPEC; +	st->bpf_seq_afinfo = afinfo; +	ret = bpf_iter_init_seq_net(priv_data, aux); +	if (ret) +		kfree(afinfo); +	return ret; +} + +static void bpf_iter_fini_tcp(void *priv_data) +{ +	struct tcp_iter_state *st = priv_data; + +	kfree(st->bpf_seq_afinfo); +	bpf_iter_fini_seq_net(priv_data); +} + +static const struct bpf_iter_seq_info tcp_seq_info = { +	.seq_ops		= &bpf_iter_tcp_seq_ops, +	.init_seq_private	= bpf_iter_init_tcp, +	.fini_seq_private	= bpf_iter_fini_tcp, +	.seq_priv_size		= sizeof(struct tcp_iter_state), +}; + +static struct bpf_iter_reg tcp_reg_info = { +	.target			= "tcp", +	.ctx_arg_info_size	= 1, +	.ctx_arg_info		= { +		{ offsetof(struct bpf_iter__tcp, sk_common), +		  PTR_TO_BTF_ID_OR_NULL }, +	}, +	.seq_info		= &tcp_seq_info, +}; + +static void __init bpf_iter_register(void) +{ +	tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]; +	if (bpf_iter_reg_target(&tcp_reg_info)) +		pr_warn("Warning: could not register bpf iterator tcp\n"); +} + +#endif +  void __init tcp_v4_init(void)  {  	if (register_pernet_subsys(&tcp_sk_ops))  		panic("Failed to create the TCP control socket.\n"); + +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +	bpf_iter_register(); +#endif  }  | 
