diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2022-05-12 16:52:21 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2022-05-12 16:52:22 -0700 |
| commit | b67fd3d9d94223b424674f45eeadeff58b4b03ef (patch) | |
| tree | 7d2bdd36a14880cafe9595bf3317696342ea33b9 /net/ipv4/tcp_ipv4.c | |
| parent | 0c1822d9072538cf8e10dc0ab08842700e717d8e (diff) | |
| parent | ec8cb4f617a23700d37018d249e3b05149d44a38 (diff) | |
Merge branch 'net-inet-retire-port-only-listening_hash'
Martin KaFai Lau says:
====================
net: inet: Retire port only listening_hash
This series is to retire the port only listening_hash.
The listen sk is currently stored in two hash tables,
listening_hash (hashed by port) and lhash2 (hashed by port and address).
After commit 0ee58dad5b06 ("net: tcp6: prefer listeners bound to an address")
and commit d9fbc7f6431f ("net: tcp: prefer listeners bound to an address"),
the TCP-SYN lookup fast path does not use listening_hash.
The commit 05c0b35709c5 ("tcp: seq_file: Replace listening_hash with lhash2")
also moved the seq_file (/proc/net/tcp) iteration usage from
listening_hash to lhash2.
There are still a few listening_hash usages left.
One of them is inet_reuseport_add_sock() which uses the listening_hash
to search a listen sk during the listen() system call. This turns
out to be very slow on use cases that listen on many different
VIPs at a popular port (e.g. 443). [ On top of the slowness in
adding to the tail in the IPv6 case ]. A latter patch has a
selftest to demonstrate this case.
This series takes this chance to move all remaining listening_hash
usages to lhash2 and then retire listening_hash.
====================
Link: https://lore.kernel.org/r/20220512000546.188616-1-kafai@fb.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 21 |
1 files changed, 9 insertions, 12 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 918816ec5dd4..218ad871c0e4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2283,16 +2283,15 @@ static void *listening_get_first(struct seq_file *seq) st->offset = 0; for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { struct inet_listen_hashbucket *ilb2; - struct inet_connection_sock *icsk; + struct hlist_nulls_node *node; struct sock *sk; ilb2 = &tcp_hashinfo.lhash2[st->bucket]; - if (hlist_empty(&ilb2->head)) + if (hlist_nulls_empty(&ilb2->nulls_head)) continue; spin_lock(&ilb2->lock); - inet_lhash2_for_each_icsk(icsk, &ilb2->head) { - sk = (struct sock *)icsk; + sk_nulls_for_each(sk, node, &ilb2->nulls_head) { if (seq_sk_match(seq, sk)) return sk; } @@ -2311,15 +2310,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur) { struct tcp_iter_state *st = seq->private; struct inet_listen_hashbucket *ilb2; - struct inet_connection_sock *icsk; + struct hlist_nulls_node *node; struct sock *sk = cur; ++st->num; ++st->offset; - icsk = inet_csk(sk); - inet_lhash2_for_each_icsk_continue(icsk) { - sk = (struct sock *)icsk; + sk = sk_nulls_next(sk); + sk_nulls_for_each_from(sk, node) { if (seq_sk_match(seq, sk)) return sk; } @@ -2728,16 +2726,15 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, { struct bpf_tcp_iter_state *iter = seq->private; struct tcp_iter_state *st = &iter->state; - struct inet_connection_sock *icsk; + struct hlist_nulls_node *node; unsigned int expected = 1; struct sock *sk; sock_hold(start_sk); iter->batch[iter->end_sk++] = start_sk; - icsk = inet_csk(start_sk); - inet_lhash2_for_each_icsk_continue(icsk) { - sk = (struct sock *)icsk; + sk = sk_nulls_next(start_sk); + sk_nulls_for_each_from(sk, node) { if (seq_sk_match(seq, sk)) { if (iter->end_sk < iter->max_sk) { sock_hold(sk); |
