summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorIlpo Järvinen <ij@kernel.org>2025-03-05 23:38:51 +0100
committerDavid S. Miller <davem@davemloft.net>2025-03-17 13:56:17 +0000
commit4618e195f9251947a5f15f7e5533bcc3a19b93ef (patch)
treeeed671d0ce82612d2ad7d3d39238859e1b36d0f8 /net/ipv6
parentd722762c4eaa918d3d2f78b41cca3f480a616d65 (diff)
tcp: add new TCP_TW_ACK_OOW state and allow ECN bits in TOS
ECN bits in TOS are always cleared when sending in ACKs in TW. Clearing them is problematic for TCP flows that used Accurate ECN because ECN bits decide which service queue the packet is placed into (L4S vs Classic). Effectively, TW ACKs are always downgraded from L4S to Classic queue which might impact, e.g., delay the ACK will experience on the path compared with the other packets of the flow. Change the TW ACK sending code to differentiate: - In tcp_v4_send_reset(), commit ba9e04a7ddf4f ("ip: fix tos reflection in ack and reset packets") cleans ECN bits for TW reset and this is not affected. - In tcp_v4_timewait_ack(), ECN bits for all TW ACKs are cleaned. But now only ECN bits of ACKs for oow data or paws_reject are cleaned, and ECN bits of other ACKs will not be cleaned. - In tcp_v4_reqsk_send_ack(), commit 66b13d99d96a1 ("ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT") did not clean ECN bits of ACKs for oow data or paws_reject. But now the ECN bits rae cleaned for these ACKs. Signed-off-by: Ilpo Järvinen <ij@kernel.org> Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/tcp_ipv6.c24
1 files changed, 17 insertions, 7 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a2fcc317a88e..e182ee0a2330 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -999,7 +999,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
- tclass & ~INET_ECN_MASK, priority);
+ tclass, priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -1135,7 +1135,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
trace_tcp_send_reset(sk, skb, reason);
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
- ipv6_get_dsfield(ipv6h), label, priority, txhash,
+ ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
+ label, priority, txhash,
&key);
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
@@ -1155,11 +1156,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
tclass, label, priority, txhash, key);
}
-static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
+ enum tcp_tw_status tw_status)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ u8 tclass = tw->tw_tclass;
struct tcp_key key = {};
+
+ if (tw_status == TCP_TW_ACK_OOW)
+ tclass &= ~INET_ECN_MASK;
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao_info;
@@ -1203,7 +1209,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
- &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ &key, tclass, cpu_to_be32(tw->tw_flowlabel),
tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
@@ -1280,7 +1286,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
req->ts_recent, sk->sk_bound_dev_if,
- &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
+ 0,
READ_ONCE(sk->sk_priority),
READ_ONCE(tcp_rsk(req)->txhash));
if (tcp_key_is_ao(&key))
@@ -1742,6 +1749,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
struct net *net = dev_net_rcu(skb->dev);
enum skb_drop_reason drop_reason;
+ enum tcp_tw_status tw_status;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1962,7 +1970,8 @@ do_time_wait:
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
+ switch (tw_status) {
case TCP_TW_SYN:
{
struct sock *sk2;
@@ -1987,7 +1996,8 @@ do_time_wait:
/* to ACK */
fallthrough;
case TCP_TW_ACK:
- tcp_v6_timewait_ack(sk, skb);
+ case TCP_TW_ACK_OOW:
+ tcp_v6_timewait_ack(sk, skb, tw_status);
break;
case TCP_TW_RST:
tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);