diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2021-12-17 11:33:33 +0100 | 
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2021-12-17 11:33:33 +0100 | 
| commit | 1758047057dbe329be712a31b79db7151b5871f8 (patch) | |
| tree | 00203eb55328f2feda70b3d37c964287b364796f /net/ipv4/tcp.c | |
| parent | bcae3af286f49bf4f6cda03f165fbe530f4a6bed (diff) | |
| parent | 1c405ca11bf563de1725e5ecfb4a74ee289d2ee9 (diff) | |
Merge drm/drm-next into drm-misc-next-fixes
Backmerging to bring drm-misc-next-fixes up to the latest state for
the current release cycle.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'net/ipv4/tcp.c')
| -rw-r--r-- | net/ipv4/tcp.c | 163 | 
1 files changed, 82 insertions, 81 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e8b48df73c85..bbb3d39c69af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -260,7 +260,6 @@  #include <linux/random.h>  #include <linux/memblock.h>  #include <linux/highmem.h> -#include <linux/swap.h>  #include <linux/cache.h>  #include <linux/err.h>  #include <linux/time.h> @@ -287,8 +286,8 @@ enum {  	TCP_CMSG_TS = 2  }; -struct percpu_counter tcp_orphan_count; -EXPORT_SYMBOL_GPL(tcp_orphan_count); +DEFINE_PER_CPU(unsigned int, tcp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);  long sysctl_tcp_mem[3] __read_mostly;  EXPORT_SYMBOL(sysctl_tcp_mem); @@ -325,11 +324,6 @@ struct tcp_splice_state {  unsigned long tcp_memory_pressure __read_mostly;  EXPORT_SYMBOL_GPL(tcp_memory_pressure); -DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); -EXPORT_SYMBOL(tcp_rx_skb_cache_key); - -DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); -  void tcp_enter_memory_pressure(struct sock *sk)  {  	unsigned long val; @@ -486,10 +480,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target)  {  	if (tcp_epollin_ready(sk, target))  		return true; - -	if (sk->sk_prot->stream_memory_read) -		return sk->sk_prot->stream_memory_read(sk); -	return false; +	return sk_is_readable(sk);  }  /* @@ -647,7 +638,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)  }  EXPORT_SYMBOL(tcp_ioctl); -static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) +void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)  {  	TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;  	tp->pushed_seq = tp->write_seq; @@ -658,15 +649,13 @@ static inline bool forced_push(const struct tcp_sock *tp)  	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));  } -static void skb_entail(struct sock *sk, struct sk_buff *skb) +void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -	skb->csum    = 0;  	tcb->seq     = tcb->end_seq = tp->write_seq;  	tcb->tcp_flags = TCPHDR_ACK; -	tcb->sacked  = 0;  	__skb_header_release(skb);  	tcp_add_write_queue_tail(sk, skb);  	sk_wmem_queued_add(sk, skb->truesize); @@ -861,33 +850,19 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,  }  EXPORT_SYMBOL(tcp_splice_read); -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, -				    bool force_schedule) +struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, +				     bool force_schedule)  {  	struct sk_buff *skb; -	if (likely(!size)) { -		skb = sk->sk_tx_skb_cache; -		if (skb) { -			skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); -			sk->sk_tx_skb_cache = NULL; -			pskb_trim(skb, 0); -			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); -			skb_shinfo(skb)->tx_flags = 0; -			memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb)); -			return skb; -		} -	} -	/* The TCP header must be at least 32-bit aligned.  */ -	size = ALIGN(size, 4); -  	if (unlikely(tcp_under_memory_pressure(sk)))  		sk_mem_reclaim_partial(sk); -	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); +	skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);  	if (likely(skb)) {  		bool mem_scheduled; +		skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));  		if (force_schedule) {  			mem_scheduled = true;  			sk_forced_mem_schedule(sk, skb->truesize); @@ -895,12 +870,8 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,  			mem_scheduled = sk_wmem_schedule(sk, skb->truesize);  		}  		if (likely(mem_scheduled)) { -			skb_reserve(skb, sk->sk_prot->max_header); -			/* -			 * Make sure that we have exactly size bytes -			 * available to the caller, no more, no less. -			 */ -			skb->reserved_tailroom = skb->end - skb->tail - size; +			skb_reserve(skb, MAX_TCP_HEADER); +			skb->ip_summed = CHECKSUM_PARTIAL;  			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);  			return skb;  		} @@ -953,18 +924,20 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)   * importantly be able to generate EPOLLOUT for Edge Trigger epoll()   * users.   */ -void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +void tcp_remove_empty_skb(struct sock *sk)  { -	if (skb && !skb->len) { +	struct sk_buff *skb = tcp_write_queue_tail(sk); + +	if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {  		tcp_unlink_write_queue(skb, sk);  		if (tcp_write_queue_empty(sk))  			tcp_chrono_stop(sk, TCP_CHRONO_BUSY); -		sk_wmem_free_skb(sk, skb); +		tcp_wmem_free_skb(sk, skb);  	}  } -struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, -			       struct page *page, int offset, size_t *size) +static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, +				      struct page *page, int offset, size_t *size)  {  	struct sk_buff *skb = tcp_write_queue_tail(sk);  	struct tcp_sock *tp = tcp_sk(sk); @@ -977,15 +950,15 @@ new_segment:  		if (!sk_stream_memory_free(sk))  			return NULL; -		skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, -					  tcp_rtx_and_write_queues_empty(sk)); +		skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, +					   tcp_rtx_and_write_queues_empty(sk));  		if (!skb)  			return NULL;  #ifdef CONFIG_TLS_DEVICE  		skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);  #endif -		skb_entail(sk, skb); +		tcp_skb_entail(sk, skb);  		copy = size_goal;  	} @@ -1016,7 +989,6 @@ new_segment:  	skb->truesize += copy;  	sk_wmem_queued_add(sk, copy);  	sk_mem_charge(sk, copy); -	skb->ip_summed = CHECKSUM_PARTIAL;  	WRITE_ONCE(tp->write_seq, tp->write_seq + copy);  	TCP_SKB_CB(skb)->end_seq += copy;  	tcp_skb_pcount_set(skb, 0); @@ -1107,7 +1079,7 @@ out:  	return copied;  do_error: -	tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); +	tcp_remove_empty_skb(sk);  	if (copied)  		goto out;  out_err: @@ -1306,15 +1278,14 @@ new_segment:  					goto restart;  			}  			first_skb = tcp_rtx_and_write_queues_empty(sk); -			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, -						  first_skb); +			skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, +						   first_skb);  			if (!skb)  				goto wait_for_space;  			process_backlog++; -			skb->ip_summed = CHECKSUM_PARTIAL; -			skb_entail(sk, skb); +			tcp_skb_entail(sk, skb);  			copy = size_goal;  			/* All packets are restored as if they have @@ -1329,14 +1300,7 @@ new_segment:  		if (copy > msg_data_left(msg))  			copy = msg_data_left(msg); -		/* Where to copy to? */ -		if (skb_availroom(skb) > 0 && !zc) { -			/* We have some space in skb head. Superb! */ -			copy = min_t(int, copy, skb_availroom(skb)); -			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); -			if (err) -				goto do_fault; -		} else if (!zc) { +		if (!zc) {  			bool merge = true;  			int i = skb_shinfo(skb)->nr_frags;  			struct page_frag *pfrag = sk_page_frag(sk); @@ -1355,6 +1319,15 @@ new_segment:  			copy = min_t(int, copy, pfrag->size - pfrag->offset); +			/* skb changing from pure zc to mixed, must charge zc */ +			if (unlikely(skb_zcopy_pure(skb))) { +				if (!sk_wmem_schedule(sk, skb->data_len)) +					goto wait_for_space; + +				sk_mem_charge(sk, skb->data_len); +				skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; +			} +  			if (!sk_wmem_schedule(sk, copy))  				goto wait_for_space; @@ -1375,8 +1348,16 @@ new_segment:  			}  			pfrag->offset += copy;  		} else { -			if (!sk_wmem_schedule(sk, copy)) -				goto wait_for_space; +			/* First append to a fragless skb builds initial +			 * pure zerocopy skb +			 */ +			if (!skb->len) +				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; + +			if (!skb_zcopy_pure(skb)) { +				if (!sk_wmem_schedule(sk, copy)) +					goto wait_for_space; +			}  			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);  			if (err == -EMSGSIZE || err == -EEXIST) { @@ -1435,9 +1416,7 @@ out_nopush:  	return copied + copied_syn;  do_error: -	skb = tcp_write_queue_tail(sk); -do_fault: -	tcp_remove_empty_skb(sk, skb); +	tcp_remove_empty_skb(sk);  	if (copied + copied_syn)  		goto out; @@ -1779,6 +1758,9 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,  {  	skb_frag_t *frag; +	if (unlikely(offset_skb >= skb->len)) +		return NULL; +  	offset_skb -= skb_headlen(skb);  	if ((int)offset_skb < 0 || skb_has_frag_list(skb))  		return NULL; @@ -2690,11 +2672,36 @@ void tcp_shutdown(struct sock *sk, int how)  }  EXPORT_SYMBOL(tcp_shutdown); +int tcp_orphan_count_sum(void) +{ +	int i, total = 0; + +	for_each_possible_cpu(i) +		total += per_cpu(tcp_orphan_count, i); + +	return max(total, 0); +} + +static int tcp_orphan_cache; +static struct timer_list tcp_orphan_timer; +#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100) + +static void tcp_orphan_update(struct timer_list *unused) +{ +	WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum()); +	mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); +} + +static bool tcp_too_many_orphans(int shift) +{ +	return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; +} +  bool tcp_check_oom(struct sock *sk, int shift)  {  	bool too_many_orphans, out_of_socket_memory; -	too_many_orphans = tcp_too_many_orphans(sk, shift); +	too_many_orphans = tcp_too_many_orphans(shift);  	out_of_socket_memory = tcp_out_of_memory(sk);  	if (too_many_orphans) @@ -2803,7 +2810,7 @@ adjudge_to_death:  	/* remove backlog if any, without releasing ownership. */  	__release_sock(sk); -	percpu_counter_inc(sk->sk_prot->orphan_count); +	this_cpu_inc(tcp_orphan_count);  	/* Have we already been destroyed by a softirq or backlog? */  	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -2906,7 +2913,7 @@ static void tcp_rtx_queue_purge(struct sock *sk)  		 * list_del(&skb->tcp_tsorted_anchor)  		 */  		tcp_rtx_queue_unlink(skb, sk); -		sk_wmem_free_skb(sk, skb); +		tcp_wmem_free_skb(sk, skb);  	}  } @@ -2917,14 +2924,9 @@ void tcp_write_queue_purge(struct sock *sk)  	tcp_chrono_stop(sk, TCP_CHRONO_BUSY);  	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {  		tcp_skb_tsorted_anchor_cleanup(skb); -		sk_wmem_free_skb(sk, skb); +		tcp_wmem_free_skb(sk, skb);  	}  	tcp_rtx_queue_purge(sk); -	skb = sk->sk_tx_skb_cache; -	if (skb) { -		__kfree_skb(skb); -		sk->sk_tx_skb_cache = NULL; -	}  	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);  	sk_mem_reclaim(sk);  	tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -2961,10 +2963,6 @@ int tcp_disconnect(struct sock *sk, int flags)  	tcp_clear_xmit_timers(sk);  	__skb_queue_purge(&sk->sk_receive_queue); -	if (sk->sk_rx_skb_cache) { -		__kfree_skb(sk->sk_rx_skb_cache); -		sk->sk_rx_skb_cache = NULL; -	}  	WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);  	tp->urg_data = 0;  	tcp_write_queue_purge(sk); @@ -4505,7 +4503,10 @@ void __init tcp_init(void)  		     sizeof_field(struct sk_buff, cb));  	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); -	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + +	timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); +	mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); +  	inet_hashinfo_init(&tcp_hashinfo);  	inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",  			    thash_entries, 21,  /* one slot per 2 MB*/  | 
