diff options
29 files changed, 213 insertions, 70 deletions
| diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 755702eefd9c..c5195524d1ef 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -19,7 +19,7 @@  #define SO_BROADCAST	0x0020  #define SO_LINGER	0x0080  #define SO_OOBINLINE	0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT	0x0200  #define SO_TYPE		0x1008  #define SO_ERROR	0x1007 diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index f3f38a0e2ef9..51c6401582ea 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -22,7 +22,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 406b5838defd..50692b738c75 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -24,7 +24,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index d8e1132a1ab6..595391f0f98c 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -22,7 +22,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index c8b87a828206..43e32621da7d 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -22,7 +22,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index f390896c3104..c567adc8bea5 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -31,7 +31,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 6a895155e7a3..519afa2755db 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -22,7 +22,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 9d11a7713923..7e2723637b35 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -28,8 +28,7 @@  #define SO_LINGER	0x0080	/* Block on close of a reliable  				   socket to transmit pending data.  */  #define SO_OOBINLINE 0x0100	/* Receive out-of-band data in-band.  */ -#if 0 -To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */ +#define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */  #endif  #define SO_TYPE		0x1008	/* Compatible name for SO_STYLE.  */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index ab702c40b30e..5c7c7c988544 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -22,7 +22,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index da2c8d3c209e..526e4b9aece0 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -13,7 +13,7 @@  #define SO_BROADCAST	0x0020  #define SO_LINGER	0x0080  #define SO_OOBINLINE	0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT	0x0200  #define SO_SNDBUF	0x1001  #define SO_RCVBUF	0x1002  #define SO_SNDBUFFORCE	0x100a diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index e6ca31816cc9..a26dcaece509 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -29,7 +29,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_RCVLOWAT	16  #define SO_SNDLOWAT	17  #define SO_RCVTIMEO	18 diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 9ce60b68f070..f99eea7fff0f 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -28,7 +28,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index fbbba57547d1..cbbad74b2e06 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -15,7 +15,7 @@  #define SO_PEERCRED	0x0040  #define SO_LINGER	0x0080  #define SO_OOBINLINE	0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT	0x0200  #define SO_BSDCOMPAT    0x0400  #define SO_RCVLOWAT     0x0800  #define SO_SNDLOWAT     0x1000 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index dbf316487b51..35905cb6e419 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -32,7 +32,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT	15  #define SO_PASSCRED	16  #define SO_PEERCRED	17  #define SO_RCVLOWAT	18 diff --git a/include/linux/random.h b/include/linux/random.h index d9846088c2c5..347ce553a306 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -74,4 +74,10 @@ static inline int arch_get_random_int(unsigned int *v)  }  #endif +/* Pseudo random number generator from numerical recipes. */ +static inline u32 next_pseudo_random32(u32 seed) +{ +	return seed * 1664525 + 1013904223; +} +  #endif /* _LINUX_RANDOM_H */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 9e34c877a770..7ca75cbbf75e 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -71,6 +71,8 @@ extern struct sock *__inet6_lookup_established(struct net *net,  extern struct sock *inet6_lookup_listener(struct net *net,  					  struct inet_hashinfo *hashinfo, +					  const struct in6_addr *saddr, +					  const __be16 sport,  					  const struct in6_addr *daddr,  					  const unsigned short hnum,  					  const int dif); @@ -88,7 +90,8 @@ static inline struct sock *__inet6_lookup(struct net *net,  	if (sk)  		return sk; -	return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif); +	return inet6_lookup_listener(net, hashinfo, saddr, sport, +				     daddr, hnum, dif);  }  static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 67a8fa098e3a..7b2ae9d37076 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -81,7 +81,9 @@ struct inet_bind_bucket {  	struct net		*ib_net;  #endif  	unsigned short		port; -	signed short		fastreuse; +	signed char		fastreuse; +	signed char		fastreuseport; +	kuid_t			fastuid;  	int			num_owners;  	struct hlist_node	node;  	struct hlist_head	owners; @@ -257,15 +259,19 @@ extern void inet_unhash(struct sock *sk);  extern struct sock *__inet_lookup_listener(struct net *net,  					   struct inet_hashinfo *hashinfo, +					   const __be32 saddr, +					   const __be16 sport,  					   const __be32 daddr,  					   const unsigned short hnum,  					   const int dif);  static inline struct sock *inet_lookup_listener(struct net *net,  		struct inet_hashinfo *hashinfo, +		__be32 saddr, __be16 sport,  		__be32 daddr, __be16 dport, int dif)  { -	return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif); +	return __inet_lookup_listener(net, hashinfo, saddr, sport, +				      daddr, ntohs(dport), dif);  }  /* Socket demux engine toys. */ @@ -358,7 +364,8 @@ static inline struct sock *__inet_lookup(struct net *net,  	struct sock *sk = __inet_lookup_established(net, hashinfo,  				saddr, sport, daddr, hnum, dif); -	return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif); +	return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport, +					     daddr, hnum, dif);  }  static inline struct sock *inet_lookup(struct net *net, diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 75ca9291cf2c..36d9379d4c4b 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -82,6 +82,7 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,  			break;  		case NFT_LOOKUP_LISTENER:  			sk = inet_lookup_listener(net, &tcp_hashinfo, +						    saddr, sport,  						    daddr, dport,  						    in->ifindex); @@ -151,6 +152,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,  			break;  		case NFT_LOOKUP_LISTENER:  			sk = inet6_lookup_listener(net, &tcp_hashinfo, +						   saddr, sport,  						   daddr, ntohs(dport),  						   in->ifindex); diff --git a/include/net/sock.h b/include/net/sock.h index 5a34e2f03657..581dc6bd7dc6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair;   *	@skc_family: network address family   *	@skc_state: Connection state   *	@skc_reuse: %SO_REUSEADDR setting + *	@skc_reuseport: %SO_REUSEPORT setting   *	@skc_bound_dev_if: bound device index if != 0   *	@skc_bind_node: bind hash linkage for various protocol lookup tables   *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol @@ -179,7 +180,8 @@ struct sock_common {  	unsigned short		skc_family;  	volatile unsigned char	skc_state; -	unsigned char		skc_reuse; +	unsigned char		skc_reuse:4; +	unsigned char		skc_reuseport:4;  	int			skc_bound_dev_if;  	union {  		struct hlist_node	skc_bind_node; @@ -297,6 +299,7 @@ struct sock {  #define sk_family		__sk_common.skc_family  #define sk_state		__sk_common.skc_state  #define sk_reuse		__sk_common.skc_reuse +#define sk_reuseport		__sk_common.skc_reuseport  #define sk_bound_dev_if		__sk_common.skc_bound_dev_if  #define sk_bind_node		__sk_common.skc_bind_node  #define sk_prot			__sk_common.skc_prot diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 3f6a99201410..4ef3acbba5da 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -22,8 +22,7 @@  #define SO_PRIORITY	12  #define SO_LINGER	13  #define SO_BSDCOMPAT	14 -/* To add :#define SO_REUSEPORT 15 */ - +#define SO_REUSEPORT	15  #ifndef SO_PASSCRED /* powerpc only differs in these */  #define SO_PASSCRED	16  #define SO_PEERCRED	17 diff --git a/net/core/sock.c b/net/core/sock.c index 8258fb741e9a..235fb89e8973 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,  	case SO_REUSEADDR:  		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);  		break; +	case SO_REUSEPORT: +		sk->sk_reuseport = valbool; +		break;  	case SO_TYPE:  	case SO_PROTOCOL:  	case SO_DOMAIN: @@ -972,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,  		v.val = sk->sk_reuse;  		break; +	case SO_REUSEPORT: +		v.val = sk->sk_reuseport; +		break; +  	case SO_KEEPALIVE:  		v.val = sock_flag(sk, SOCK_KEEPOPEN);  		break; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d0670f00d524..8bb623d357ad 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -59,6 +59,8 @@ int inet_csk_bind_conflict(const struct sock *sk,  	struct sock *sk2;  	struct hlist_node *node;  	int reuse = sk->sk_reuse; +	int reuseport = sk->sk_reuseport; +	kuid_t uid = sock_i_uid((struct sock *)sk);  	/*  	 * Unlike other sk lookup places we do not check @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,  		    (!sk->sk_bound_dev_if ||  		     !sk2->sk_bound_dev_if ||  		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { -			if (!reuse || !sk2->sk_reuse || -			    sk2->sk_state == TCP_LISTEN) { +			if ((!reuse || !sk2->sk_reuse || +			    sk2->sk_state == TCP_LISTEN) && +			    (!reuseport || !sk2->sk_reuseport || +			    (sk2->sk_state != TCP_TIME_WAIT && +			     !uid_eq(uid, sock_i_uid(sk2))))) {  				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);  				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||  				    sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -106,6 +111,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)  	int ret, attempts = 5;  	struct net *net = sock_net(sk);  	int smallest_size = -1, smallest_rover; +	kuid_t uid = sock_i_uid(sk);  	local_bh_disable();  	if (!snum) { @@ -125,9 +131,12 @@ again:  			spin_lock(&head->lock);  			inet_bind_bucket_for_each(tb, node, &head->chain)  				if (net_eq(ib_net(tb), net) && tb->port == rover) { -					if (tb->fastreuse > 0 && -					    sk->sk_reuse && -					    sk->sk_state != TCP_LISTEN && +					if (((tb->fastreuse > 0 && +					      sk->sk_reuse && +					      sk->sk_state != TCP_LISTEN) || +					     (tb->fastreuseport > 0 && +					      sk->sk_reuseport && +					      uid_eq(tb->fastuid, uid))) &&  					    (tb->num_owners < smallest_size || smallest_size == -1)) {  						smallest_size = tb->num_owners;  						smallest_rover = rover; @@ -185,14 +194,17 @@ tb_found:  		if (sk->sk_reuse == SK_FORCE_REUSE)  			goto success; -		if (tb->fastreuse > 0 && -		    sk->sk_reuse && sk->sk_state != TCP_LISTEN && +		if (((tb->fastreuse > 0 && +		      sk->sk_reuse && sk->sk_state != TCP_LISTEN) || +		     (tb->fastreuseport > 0 && +		      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&  		    smallest_size == -1) {  			goto success;  		} else {  			ret = 1;  			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { -				if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && +				if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || +				     (sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&  				    smallest_size != -1 && --attempts >= 0) {  					spin_unlock(&head->lock);  					goto again; @@ -212,9 +224,23 @@ tb_not_found:  			tb->fastreuse = 1;  		else  			tb->fastreuse = 0; -	} else if (tb->fastreuse && -		   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) -		tb->fastreuse = 0; +		if (sk->sk_reuseport) { +			tb->fastreuseport = 1; +			tb->fastuid = uid; +		} else { +			tb->fastreuseport = 0; +			tb->fastuid = 0; +		} +	} else { +		if (tb->fastreuse && +		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) +			tb->fastreuse = 0; +		if (tb->fastreuseport && +		    (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) { +			tb->fastreuseport = 0; +			tb->fastuid = 0; +		} +	}  success:  	if (!inet_csk(sk)->icsk_bind_hash)  		inet_bind_hash(sk, tb, snum); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fa3ae8148710..0ce0595d9861 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,  		write_pnet(&tb->ib_net, hold_net(net));  		tb->port      = snum;  		tb->fastreuse = 0; +		tb->fastreuseport = 0;  		tb->num_owners = 0;  		INIT_HLIST_HEAD(&tb->owners);  		hlist_add_head(&tb->node, &head->chain); @@ -151,16 +152,16 @@ static inline int compute_score(struct sock *sk, struct net *net,  	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&  			!ipv6_only_sock(sk)) {  		__be32 rcv_saddr = inet->inet_rcv_saddr; -		score = sk->sk_family == PF_INET ? 1 : 0; +		score = sk->sk_family == PF_INET ? 2 : 1;  		if (rcv_saddr) {  			if (rcv_saddr != daddr)  				return -1; -			score += 2; +			score += 4;  		}  		if (sk->sk_bound_dev_if) {  			if (sk->sk_bound_dev_if != dif)  				return -1; -			score += 2; +			score += 4;  		}  	}  	return score; @@ -176,6 +177,7 @@ static inline int compute_score(struct sock *sk, struct net *net,  struct sock *__inet_lookup_listener(struct net *net,  				    struct inet_hashinfo *hashinfo, +				    const __be32 saddr, __be16 sport,  				    const __be32 daddr, const unsigned short hnum,  				    const int dif)  { @@ -183,17 +185,29 @@ struct sock *__inet_lookup_listener(struct net *net,  	struct hlist_nulls_node *node;  	unsigned int hash = inet_lhashfn(net, hnum);  	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; -	int score, hiscore; +	int score, hiscore, matches = 0, reuseport = 0; +	u32 phash = 0;  	rcu_read_lock();  begin:  	result = NULL; -	hiscore = -1; +	hiscore = 0;  	sk_nulls_for_each_rcu(sk, node, &ilb->head) {  		score = compute_score(sk, net, hnum, daddr, dif);  		if (score > hiscore) {  			result = sk;  			hiscore = score; +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				phash = inet_ehashfn(net, daddr, hnum, +						     saddr, sport); +				matches = 1; +			} +		} else if (score == hiscore && reuseport) { +			matches++; +			if (((u64)phash * matches) >> 32 == 0) +				result = sk; +			phash = next_pseudo_random32(phash);  		}  	}  	/* @@ -501,7 +515,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  			inet_bind_bucket_for_each(tb, node, &head->chain) {  				if (net_eq(ib_net(tb), net) &&  				    tb->port == port) { -					if (tb->fastreuse >= 0) +					if (tb->fastreuse >= 0 || +					    tb->fastreuseport >= 0)  						goto next_port;  					WARN_ON(hlist_empty(&tb->owners));  					if (!check_established(death_row, sk, @@ -518,6 +533,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  				break;  			}  			tb->fastreuse = -1; +			tb->fastreuseport = -1;  			goto ok;  		next_port: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c6ce9ca98d23..bbbdcc5c1973 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)  		 * no RST generated if md5 hash doesn't match.  		 */  		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), -					     &tcp_hashinfo, ip_hdr(skb)->daddr, +					     &tcp_hashinfo, ip_hdr(skb)->saddr, +					     th->source, ip_hdr(skb)->daddr,  					     ntohs(th->source), inet_iif(skb));  		/* don't send rst if it can't find key */  		if (!sk1) @@ -2074,6 +2075,7 @@ do_time_wait:  	case TCP_TW_SYN: {  		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),  							&tcp_hashinfo, +							iph->saddr, th->source,  							iph->daddr, th->dest,  							inet_iif(skb));  		if (sk2) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf6158f1f46b..e0610e4b5158 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -139,6 +139,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,  {  	struct sock *sk2;  	struct hlist_nulls_node *node; +	kuid_t uid = sock_i_uid(sk);  	sk_nulls_for_each(sk2, node, &hslot->head)  		if (net_eq(sock_net(sk2), net) && @@ -147,6 +148,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,  		    (!sk2->sk_reuse || !sk->sk_reuse) &&  		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||  		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && +		    (!sk2->sk_reuseport || !sk->sk_reuseport || +		      !uid_eq(uid, sock_i_uid(sk2))) &&  		    (*saddr_comp)(sk, sk2)) {  			if (bitmap)  				__set_bit(udp_sk(sk2)->udp_port_hash >> log, @@ -169,6 +172,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,  {  	struct sock *sk2;  	struct hlist_nulls_node *node; +	kuid_t uid = sock_i_uid(sk);  	int res = 0;  	spin_lock(&hslot2->lock); @@ -179,6 +183,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,  		    (!sk2->sk_reuse || !sk->sk_reuse) &&  		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||  		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && +		    (!sk2->sk_reuseport || !sk->sk_reuseport || +		      !uid_eq(uid, sock_i_uid(sk2))) &&  		    (*saddr_comp)(sk, sk2)) {  			res = 1;  			break; @@ -337,26 +343,26 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,  			!ipv6_only_sock(sk)) {  		struct inet_sock *inet = inet_sk(sk); -		score = (sk->sk_family == PF_INET ? 1 : 0); +		score = (sk->sk_family == PF_INET ? 2 : 1);  		if (inet->inet_rcv_saddr) {  			if (inet->inet_rcv_saddr != daddr)  				return -1; -			score += 2; +			score += 4;  		}  		if (inet->inet_daddr) {  			if (inet->inet_daddr != saddr)  				return -1; -			score += 2; +			score += 4;  		}  		if (inet->inet_dport) {  			if (inet->inet_dport != sport)  				return -1; -			score += 2; +			score += 4;  		}  		if (sk->sk_bound_dev_if) {  			if (sk->sk_bound_dev_if != dif)  				return -1; -			score += 2; +			score += 4;  		}  	}  	return score; @@ -365,7 +371,6 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,  /*   * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)   */ -#define SCORE2_MAX (1 + 2 + 2 + 2)  static inline int compute_score2(struct sock *sk, struct net *net,  				 __be32 saddr, __be16 sport,  				 __be32 daddr, unsigned int hnum, int dif) @@ -380,21 +385,21 @@ static inline int compute_score2(struct sock *sk, struct net *net,  		if (inet->inet_num != hnum)  			return -1; -		score = (sk->sk_family == PF_INET ? 1 : 0); +		score = (sk->sk_family == PF_INET ? 2 : 1);  		if (inet->inet_daddr) {  			if (inet->inet_daddr != saddr)  				return -1; -			score += 2; +			score += 4;  		}  		if (inet->inet_dport) {  			if (inet->inet_dport != sport)  				return -1; -			score += 2; +			score += 4;  		}  		if (sk->sk_bound_dev_if) {  			if (sk->sk_bound_dev_if != dif)  				return -1; -			score += 2; +			score += 4;  		}  	}  	return score; @@ -409,19 +414,29 @@ static struct sock *udp4_lib_lookup2(struct net *net,  {  	struct sock *sk, *result;  	struct hlist_nulls_node *node; -	int score, badness; +	int score, badness, matches = 0, reuseport = 0; +	u32 hash = 0;  begin:  	result = NULL; -	badness = -1; +	badness = 0;  	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {  		score = compute_score2(sk, net, saddr, sport,  				      daddr, hnum, dif);  		if (score > badness) {  			result = sk;  			badness = score; -			if (score == SCORE2_MAX) -				goto exact_match; +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				hash = inet_ehashfn(net, daddr, hnum, +						    saddr, htons(sport)); +				matches = 1; +			} +		} else if (score == badness && reuseport) { +			matches++; +			if (((u64)hash * matches) >> 32 == 0) +				result = sk; +			hash = next_pseudo_random32(hash);  		}  	}  	/* @@ -431,9 +446,7 @@ begin:  	 */  	if (get_nulls_value(node) != slot2)  		goto begin; -  	if (result) { -exact_match:  		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))  			result = NULL;  		else if (unlikely(compute_score2(result, net, saddr, sport, @@ -457,7 +470,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,  	unsigned short hnum = ntohs(dport);  	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);  	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; -	int score, badness; +	int score, badness, matches = 0, reuseport = 0; +	u32 hash = 0;  	rcu_read_lock();  	if (hslot->count > 10) { @@ -486,13 +500,24 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,  	}  begin:  	result = NULL; -	badness = -1; +	badness = 0;  	sk_nulls_for_each_rcu(sk, node, &hslot->head) {  		score = compute_score(sk, net, saddr, hnum, sport,  				      daddr, dport, dif);  		if (score > badness) {  			result = sk;  			badness = score; +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				hash = inet_ehashfn(net, daddr, hnum, +						    saddr, htons(sport)); +				matches = 1; +			} +		} else if (score == badness && reuseport) { +			matches++; +			if (((u64)hash * matches) >> 32 == 0) +				result = sk; +			hash = next_pseudo_random32(hash);  		}  	}  	/* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 30647857a375..e4297a393678 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -32,6 +32,9 @@ int inet6_csk_bind_conflict(const struct sock *sk,  {  	const struct sock *sk2;  	const struct hlist_node *node; +	int reuse = sk->sk_reuse; +	int reuseport = sk->sk_reuseport; +	int uid = sock_i_uid((struct sock *)sk);  	/* We must walk the whole port owner list in this case. -DaveM */  	/* @@ -42,11 +45,17 @@ int inet6_csk_bind_conflict(const struct sock *sk,  		if (sk != sk2 &&  		    (!sk->sk_bound_dev_if ||  		     !sk2->sk_bound_dev_if || -		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && -		    (!sk->sk_reuse || !sk2->sk_reuse || -		     sk2->sk_state == TCP_LISTEN) && -		     ipv6_rcv_saddr_equal(sk, sk2)) -			break; +		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { +			if ((!reuse || !sk2->sk_reuse || +			     sk2->sk_state == TCP_LISTEN) && +			    (!reuseport || !sk2->sk_reuseport || +			     (sk2->sk_state != TCP_TIME_WAIT && +			      !uid_eq(uid, +				      sock_i_uid((struct sock *)sk2))))) { +				if (ipv6_rcv_saddr_equal(sk, sk2)) +					break; +			} +		}  	}  	return node != NULL; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index dea17fd28e50..32b4a1675d82 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net,  }  struct sock *inet6_lookup_listener(struct net *net, -		struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, +		struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, +		const __be16 sport, const struct in6_addr *daddr,  		const unsigned short hnum, const int dif)  {  	struct sock *sk;  	const struct hlist_nulls_node *node;  	struct sock *result; -	int score, hiscore; +	int score, hiscore, matches = 0, reuseport = 0; +	u32 phash = 0;  	unsigned int hash = inet_lhashfn(net, hnum);  	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];  	rcu_read_lock();  begin:  	result = NULL; -	hiscore = -1; +	hiscore = 0;  	sk_nulls_for_each(sk, node, &ilb->head) {  		score = compute_score(sk, net, hnum, daddr, dif);  		if (score > hiscore) {  			hiscore = score;  			result = sk; +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				phash = inet6_ehashfn(net, daddr, hnum, +						      saddr, sport); +				matches = 1; +			} +		} else if (score == hiscore && reuseport) { +			matches++; +			if (((u64)phash * matches) >> 32 == 0) +				result = sk; +			phash = next_pseudo_random32(phash);  		}  	}  	/* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3701c3c6e2eb..06087e58738a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -834,7 +834,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)  		 * no RST generated if md5 hash doesn't match.  		 */  		sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), -					   &tcp_hashinfo, &ipv6h->daddr, +					   &tcp_hashinfo, &ipv6h->saddr, +					   th->source, &ipv6h->daddr,  					   ntohs(th->source), inet6_iif(skb));  		if (!sk1)  			return; @@ -1598,6 +1599,7 @@ do_time_wait:  		struct sock *sk2;  		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, +					    &ipv6_hdr(skb)->saddr, th->source,  					    &ipv6_hdr(skb)->daddr,  					    ntohs(th->dest), inet6_iif(skb));  		if (sk2 != NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1afb635d9b57..cb5bf497c09c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@  #include <net/tcp_states.h>  #include <net/ip6_checksum.h>  #include <net/xfrm.h> +#include <net/inet6_hashtables.h>  #include <linux/proc_fs.h>  #include <linux/seq_file.h> @@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net,  {  	struct sock *sk, *result;  	struct hlist_nulls_node *node; -	int score, badness; +	int score, badness, matches = 0, reuseport = 0; +	u32 hash = 0;  begin:  	result = NULL; @@ -214,8 +216,18 @@ begin:  		if (score > badness) {  			result = sk;  			badness = score; -			if (score == SCORE2_MAX) +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				hash = inet6_ehashfn(net, daddr, hnum, +						     saddr, sport); +				matches = 1; +			} else if (score == SCORE2_MAX)  				goto exact_match; +		} else if (score == badness && reuseport) { +			matches++; +			if (((u64)hash * matches) >> 32 == 0) +				result = sk; +			hash = next_pseudo_random32(hash);  		}  	}  	/* @@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net,  	unsigned short hnum = ntohs(dport);  	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);  	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; -	int score, badness; +	int score, badness, matches = 0, reuseport = 0; +	u32 hash = 0;  	rcu_read_lock();  	if (hslot->count > 10) { @@ -284,6 +297,17 @@ begin:  		if (score > badness) {  			result = sk;  			badness = score; +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				hash = inet6_ehashfn(net, daddr, hnum, +						     saddr, sport); +				matches = 1; +			} +		} else if (score == badness && reuseport) { +			matches++; +			if (((u64)hash * matches) >> 32 == 0) +				result = sk; +			hash = next_pseudo_random32(hash);  		}  	}  	/* | 
