diff options
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 161 |
1 files changed, 109 insertions, 52 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 44d431849d39..81f51335e326 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * TCP over IPv6 * Linux INET6 implementation @@ -16,11 +17,6 @@ * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/bottom_half.h> @@ -43,6 +39,7 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> +#include <linux/indirect_call_wrapper.h> #include <net/tcp.h> #include <net/ndisc.h> @@ -90,6 +87,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, } #endif +/* Helper returning the inet6 address from a given tcp socket. + * It can be used in TCP stack instead of inet6_sk(sk). + * This avoids a dereference and allow compiler optimizations. + * It is a specialized version of inet6_sk_generic(). + */ +static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) +{ + unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); + + return (struct ipv6_pinfo *)(((u8 *)sk) + offset); +} + static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -99,7 +108,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } } @@ -138,7 +147,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct ipv6_txoptions *opt; @@ -162,7 +171,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (!flowlabel) + if (IS_ERR(flowlabel)) return -EINVAL; fl6_sock_release(flowlabel); } @@ -206,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; - tp->write_seq = 0; + WRITE_ONCE(tp->write_seq, 0); } sk->sk_v6_daddr = usin->sin6_addr; @@ -302,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (likely(!tp->repair)) { if (!tp->write_seq) - tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport); + WRITE_ONCE(tp->write_seq, + secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport)); tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); @@ -390,14 +400,14 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; - if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { + if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = tp->fastopen_rsk; + fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { @@ -405,7 +415,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - np = inet6_sk(sk); + np = tcp_inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { @@ -478,7 +488,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; @@ -503,7 +513,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass, + sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -737,7 +748,7 @@ static void tcp_v6_init_req(struct request_sock *req, { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); - const struct ipv6_pinfo *np = inet6_sk(sk_listener); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -794,7 +805,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, __be32 label) + u8 tclass, __be32 label, u32 priority) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -874,9 +885,17 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_oif = oif; } - if (sk) - mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + if (sk) { + if (sk->sk_state == TCP_TIME_WAIT) { + mark = inet_twsk(sk)->tw_mark; + /* autoflowlabel relies on buff->hash */ + skb_set_hash(buff, inet_twsk(sk)->tw_txhash, + PKT_HASH_TYPE_L4); + } else { + mark = sk->sk_mark; + } + buff->tstamp = tcp_transmit_time(sk); + } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; @@ -890,7 +909,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass); + ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass, + priority); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); @@ -903,15 +923,18 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; #ifdef CONFIG_TCP_MD5SIG const __u8 *hash_location = NULL; - struct ipv6hdr *ipv6h = ipv6_hdr(skb); unsigned char newhash[16]; int genhash; struct sock *sk1 = NULL; #endif + __be32 label = 0; + u32 priority = 0; + struct net *net; int oif = 0; if (th->rst) @@ -923,6 +946,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (!sk && !ipv6_unicast_destination(skb)) return; + net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); @@ -936,7 +960,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ - sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), + sk1 = inet6_lookup_listener(net, &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, @@ -964,11 +988,25 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; - if (sk_fullsock(sk)) + if (sk_fullsock(sk)) { + const struct ipv6_pinfo *np = tcp_inet6_sk(sk); + trace_tcp_send_reset(sk, skb); + if (np->repflow) + label = ip6_flowlabel(ipv6h); + priority = sk->sk_priority; + } + if (sk->sk_state == TCP_TIME_WAIT) { + label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); + priority = inet_twsk(sk)->tw_priority; + } + } else { + if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) + label = ip6_flowlabel(ipv6h); } - tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, + label, priority); #ifdef CONFIG_TCP_MD5SIG out: @@ -979,10 +1017,10 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, - __be32 label) + __be32 label, u32 priority) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, - tclass, label); + tclass, label, priority); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -994,7 +1032,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), - tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel)); + tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); inet_twsk_put(tw); } @@ -1017,7 +1055,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), - 0, 0); + 0, 0, sk->sk_priority); } @@ -1032,6 +1070,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, th); + if (mss) { + *cookie = __cookie_v6_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) @@ -1066,9 +1119,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; - const struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; - struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1088,11 +1140,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (!newsk) return NULL; - newtcp6sk = (struct tcp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newinet = inet_sk(newsk); - newnp = inet6_sk(newsk); + newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1156,12 +1207,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); - newtcp6sk = (struct tcp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); - newnp = inet6_sk(newsk); + newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1276,9 +1326,9 @@ out: */ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp; + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; + struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. @@ -1426,8 +1476,9 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } -static int tcp_v6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { + struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; @@ -1524,7 +1575,7 @@ process: return 0; } } - if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { + if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1554,12 +1605,17 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v6_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + } else { + if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); - + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) sock_put(sk); @@ -1639,7 +1695,7 @@ do_time_wait: goto discard_it; } -static void tcp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -1669,7 +1725,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); if (dst && inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); @@ -1835,12 +1891,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) state = inet_sk_state_load(sp); if (state == TCP_LISTEN) - rx_queue = sp->sk_ack_backlog; + rx_queue = READ_ONCE(sp->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. */ - rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - + READ_ONCE(tp->copied_seq), 0); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " @@ -1851,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, state, - tp->write_seq - tp->snd_una, + READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), |
