summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-05-12 16:52:21 -0700
committerJakub Kicinski <kuba@kernel.org>2022-05-12 16:52:22 -0700
commitb67fd3d9d94223b424674f45eeadeff58b4b03ef (patch)
tree7d2bdd36a14880cafe9595bf3317696342ea33b9 /net/ipv4/tcp_ipv4.c
parent0c1822d9072538cf8e10dc0ab08842700e717d8e (diff)
parentec8cb4f617a23700d37018d249e3b05149d44a38 (diff)
Merge branch 'net-inet-retire-port-only-listening_hash'
Martin KaFai Lau says: ==================== net: inet: Retire port only listening_hash This series is to retire the port only listening_hash. The listen sk is currently stored in two hash tables, listening_hash (hashed by port) and lhash2 (hashed by port and address). After commit 0ee58dad5b06 ("net: tcp6: prefer listeners bound to an address") and commit d9fbc7f6431f ("net: tcp: prefer listeners bound to an address"), the TCP-SYN lookup fast path does not use listening_hash. The commit 05c0b35709c5 ("tcp: seq_file: Replace listening_hash with lhash2") also moved the seq_file (/proc/net/tcp) iteration usage from listening_hash to lhash2. There are still a few listening_hash usages left. One of them is inet_reuseport_add_sock() which uses the listening_hash to search a listen sk during the listen() system call. This turns out to be very slow on use cases that listen on many different VIPs at a popular port (e.g. 443). [ On top of the slowness in adding to the tail in the IPv6 case ]. A latter patch has a selftest to demonstrate this case. This series takes this chance to move all remaining listening_hash usages to lhash2 and then retire listening_hash. ==================== Link: https://lore.kernel.org/r/20220512000546.188616-1-kafai@fb.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c21
1 files changed, 9 insertions, 12 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 918816ec5dd4..218ad871c0e4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2283,16 +2283,15 @@ static void *listening_get_first(struct seq_file *seq)
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk;
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
- if (hlist_empty(&ilb2->head))
+ if (hlist_nulls_empty(&ilb2->nulls_head))
continue;
spin_lock(&ilb2->lock);
- inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each(sk, node, &ilb2->nulls_head) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2311,15 +2310,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
{
struct tcp_iter_state *st = seq->private;
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk = cur;
++st->num;
++st->offset;
- icsk = inet_csk(sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2728,16 +2726,15 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
{
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
unsigned int expected = 1;
struct sock *sk;
sock_hold(start_sk);
iter->batch[iter->end_sk++] = start_sk;
- icsk = inet_csk(start_sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk)) {
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);