From 0f1e4d06591d0a7907c71f7b6d1c79f8a4de8098 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:19 -0700 Subject: tcp: Fix data-races around sysctl_tcp_workaround_signed_windows. While reading sysctl_tcp_workaround_signed_windows, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers. Fixes: 15d99e02baba ("[TCP]: sysctl to allow TCP window > 32767 sans wscale") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c38e07b50639..88f7d51e6691 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -230,7 +230,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ - if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = min_t(u32, space, U16_MAX); @@ -285,7 +285,7 @@ static u16 tcp_select_window(struct sock *sk) * scaled window. */ if (!tp->rx_opt.rcv_wscale && - sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); -- cgit From 9fb90193fbd66b4c5409ef729fd081861f8b6351 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:20 -0700 Subject: tcp: Fix a data-race around sysctl_tcp_limit_output_bytes. While reading sysctl_tcp_limit_output_bytes, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 88f7d51e6691..80c9bed73337 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2507,7 +2507,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, - sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); limit <<= factor; if (static_branch_unlikely(&tcp_tx_delay_enabled) && -- cgit From e0bb4ab9dfddd872622239f49fb2bd403b70853b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:22 -0700 Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs. While reading sysctl_tcp_min_tso_segs, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 80c9bed73337..6e29cf391a64 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1995,7 +1995,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) min_tso = ca_ops->min_tso_segs ? ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); -- cgit From 2455e61b85e9c99af38cd889a7101f1d48b33cb4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:23 -0700 Subject: tcp: Fix a data-race around sysctl_tcp_tso_rtt_log. While reading sysctl_tcp_tso_rtt_log, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 65466904b015 ("tcp: adjust TSO packet sizes based on min_rtt") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6e29cf391a64..cf6713c9567e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1976,7 +1976,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); - r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log; + r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) bytes += sk->sk_gso_max_size >> r; -- cgit From 4d8f24eeedc58d5f87b650ddda73c16e8ba56559 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 21 Jul 2022 20:44:04 +0000 Subject: Revert "tcp: change pingpong threshold to 3" This reverts commit 4a41f453bedfd5e9cd040bad509d9da49feb3e2c. This to-be-reverted commit was meant to apply a stricter rule for the stack to enter pingpong mode. However, the condition used to check for interactive session "before(tp->lsndtime, icsk->icsk_ack.lrcvtime)" is jiffy based and might be too coarse, which delays the stack entering pingpong mode. We revert this patch so that we no longer use the above condition to determine interactive session, and also reduce pingpong threshold to 1. Fixes: 4a41f453bedf ("tcp: change pingpong threshold to 3") Reported-by: LemmyHuang Suggested-by: Neal Cardwell Signed-off-by: Wei Wang Acked-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220721204404.388396-1-weiwan@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cf6713c9567e..2efe41c84ee8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - /* If this is the first data packet sent in response to the - * previous received data, - * and it is a reply for ato after last received packet, - * increase pingpong count. - */ - if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && - (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_inc_pingpong_cnt(sk); - tp->lsndtime = now; + + /* If it is a reply for ato after last received + * packet, enter pingpong mode. + */ + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ -- cgit From 02739545951ad4c1215160db7fbf9b7a918d3c0b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:00 -0700 Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?. While reading these sysctl variables, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. - .sysctl_rmem - .sysctl_rwmem - .sysctl_rmem_offset - .sysctl_wmem_offset - sysctl_tcp_rmem[1, 2] - sysctl_tcp_wmem[1, 2] - sysctl_decnet_rmem[1] - sysctl_decnet_wmem[1] - sysctl_tipc_rmem[1] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2efe41c84ee8..4c376b6d8764 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, *rcv_wscale = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, -- cgit