From 6493517eaea9b052e081e557f7c8bb06cc6b1852 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 Mar 2015 00:04:51 -0500 Subject: tcp_metrics: panic when tcp_metrics_init fails. There is not a practical way to cleanup during boot so just panic if there is a problem initializing tcp_metrics. That will at least give us a clear place to start debugging if something does go wrong. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e5f41bd5ec1b..4206b14d956d 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1175,16 +1175,10 @@ void __init tcp_metrics_init(void) ret = register_pernet_subsys(&tcp_net_metrics_ops); if (ret < 0) - goto cleanup; + panic("Could not allocate the tcp_metrics hash table\n"); + ret = genl_register_family_with_ops(&tcp_metrics_nl_family, tcp_metrics_nl_ops); if (ret < 0) - goto cleanup_subsys; - return; - -cleanup_subsys: - unregister_pernet_subsys(&tcp_net_metrics_ops); - -cleanup: - return; + panic("Could not register tcp_metrics generic netlink\n"); } -- cgit From 3e5da62d0bcbfa86332f66cca0e3983e70557fac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 Mar 2015 00:05:24 -0500 Subject: tcp_metrics: Mix the network namespace into the hash function. In preparation for using one hash table for all network namespaces mix the network namespace into the hash value. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4206b14d956d..fbb42f44501e 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -252,6 +252,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, } net = dev_net(dst->dev); + hash ^= net_hash_mix(net); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; @@ -299,6 +300,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock return NULL; net = twsk_net(tw); + hash ^= net_hash_mix(net); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; @@ -347,6 +349,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, return NULL; net = dev_net(dst->dev); + hash ^= net_hash_mix(net); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); tm = __tcp_get_metrics(&saddr, &daddr, net, hash); @@ -994,6 +997,7 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!reply) goto nla_put_failure; + hash ^= net_hash_mix(net); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); ret = -ESRCH; rcu_read_lock(); @@ -1070,6 +1074,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) if (ret < 0) src = false; + hash ^= net_hash_mix(net); hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); hb = net->ipv4.tcp_metrics_hash + hash; pp = &hb->chain; -- cgit From 849e8a0ca8d5d286510ab30b5f67b91aa6965ef6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 Mar 2015 00:05:52 -0500 Subject: tcp_metrics: Add a field tcpm_net and verify it matches on lookup In preparation for using one tcp metrics hash table for all network namespaces add a field tcpm_net to struct tcp_metrics_block, and verify that field on all hash table lookups. Make the field tcpm_net of type possible_net_t so it takes no space when network namespaces are disabled. Further add a function tm_net to read that field so we can be efficient when network namespaces are disabled and concise the rest of the time. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index fbb42f44501e..461c3d2e1ca4 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -40,6 +40,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; + possible_net_t tcpm_net; struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; @@ -52,6 +53,11 @@ struct tcp_metrics_block { struct rcu_head rcu_head; }; +static inline struct net *tm_net(struct tcp_metrics_block *tm) +{ + return read_pnet(&tm->tcpm_net); +} + static bool tcp_metric_locked(struct tcp_metrics_block *tm, enum tcp_metric_index idx) { @@ -183,6 +189,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } + write_pnet(&tm->tcpm_net, net); tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; @@ -217,7 +224,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, saddr) && - addr_same(&tm->tcpm_daddr, daddr)) + addr_same(&tm->tcpm_daddr, daddr) && + net_eq(tm_net(tm), net)) break; depth++; } @@ -258,7 +266,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr)) + addr_same(&tm->tcpm_daddr, &daddr) && + net_eq(tm_net(tm), net)) break; } tcpm_check_stamp(tm, dst); @@ -306,7 +315,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr)) + addr_same(&tm->tcpm_daddr, &daddr) && + net_eq(tm_net(tm), net)) break; } return tm; @@ -912,6 +922,8 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb, rcu_read_lock(); for (col = 0, tm = rcu_dereference(hb->chain); tm; tm = rcu_dereference(tm->tcpm_next), col++) { + if (!net_eq(tm_net(tm), net)) + continue; if (col < s_col) continue; if (tcp_metrics_dump_info(skb, cb, tm) < 0) { @@ -1004,7 +1016,8 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_daddr, &daddr) && - (!src || addr_same(&tm->tcpm_saddr, &saddr))) { + (!src || addr_same(&tm->tcpm_saddr, &saddr)) && + net_eq(tm_net(tm), net)) { ret = tcp_metrics_fill_info(msg, tm); break; } @@ -1081,7 +1094,8 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&tcp_metrics_lock); for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { if (addr_same(&tm->tcpm_daddr, &daddr) && - (!src || addr_same(&tm->tcpm_saddr, &saddr))) { + (!src || addr_same(&tm->tcpm_saddr, &saddr)) && + net_eq(tm_net(tm), net)) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); found = true; -- cgit From 8a4bff714fc088729abdd479acbfe934ddf16f7e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 Mar 2015 00:06:43 -0500 Subject: tcp_metrics: Remove the unused return code from tcp_metrics_flush_all tcp_metrics_flush_all always returns 0. Remove the unnecessary return code. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 461c3d2e1ca4..0d07e14f2ca5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1043,7 +1043,7 @@ out_free: #define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) -static int tcp_metrics_flush_all(struct net *net) +static void tcp_metrics_flush_all(struct net *net) { unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; @@ -1064,7 +1064,6 @@ static int tcp_metrics_flush_all(struct net *net) tm = next; } } - return 0; } static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -1081,8 +1080,10 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) ret = parse_nl_addr(info, &daddr, &hash, 1); if (ret < 0) return ret; - if (ret > 0) - return tcp_metrics_flush_all(net); + if (ret > 0) { + tcp_metrics_flush_all(net); + return 0; + } ret = parse_nl_saddr(info, &saddr); if (ret < 0) src = false; -- cgit From 04f721c671656f93de888b1d176ba30b7336cca3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 Mar 2015 00:07:10 -0500 Subject: tcp_metrics: Rewrite tcp_metrics_flush_all Rewrite tcp_metrics_flush_all so that it can cope with entries from different network namespaces on it's hash chain. This is based on the logic in tcp_metrics_nl_cmd_del for deleting a selection of entries from a tcp metrics hash chain. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0d07e14f2ca5..baccb070427d 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1051,18 +1051,19 @@ static void tcp_metrics_flush_all(struct net *net) unsigned int row; for (row = 0; row < max_rows; row++, hb++) { + struct tcp_metrics_block __rcu **pp; spin_lock_bh(&tcp_metrics_lock); - tm = deref_locked_genl(hb->chain); - if (tm) - hb->chain = NULL; - spin_unlock_bh(&tcp_metrics_lock); - while (tm) { - struct tcp_metrics_block *next; - - next = deref_genl(tm->tcpm_next); - kfree_rcu(tm, rcu_head); - tm = next; + pp = &hb->chain; + for (tm = deref_locked_genl(*pp); tm; + tm = deref_locked_genl(*pp)) { + if (net_eq(tm_net(tm), net)) { + *pp = tm->tcpm_next; + kfree_rcu(tm, rcu_head); + } else { + pp = &tm->tcpm_next; + } } + spin_unlock_bh(&tcp_metrics_lock); } } -- cgit From 098a697b497e3154a1a583c1d34c67568acaadcc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 Mar 2015 00:07:44 -0500 Subject: tcp_metrics: Use a single hash table for all network namespaces. Now that all of the operations are safe on a single hash table accross network namespaces, allocate a single global hash table and update the code to use it. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 66 +++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index baccb070427d..366728cbee4a 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -97,6 +97,9 @@ struct tcpm_hash_bucket { struct tcp_metrics_block __rcu *chain; }; +static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; +static unsigned int tcp_metrics_hash_log __read_mostly; + static DEFINE_SPINLOCK(tcp_metrics_lock); static void tcpm_suck_dst(struct tcp_metrics_block *tm, @@ -177,7 +180,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (unlikely(reclaim)) { struct tcp_metrics_block *oldest; - oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); + oldest = rcu_dereference(tcp_metrics_hash[hash].chain); for (tm = rcu_dereference(oldest->tcpm_next); tm; tm = rcu_dereference(tm->tcpm_next)) { if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) @@ -196,8 +199,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, tcpm_suck_dst(tm, dst, true); if (likely(!reclaim)) { - tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; - rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); + tm->tcpm_next = tcp_metrics_hash[hash].chain; + rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm); } out_unlock: @@ -221,7 +224,7 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s struct tcp_metrics_block *tm; int depth = 0; - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, saddr) && addr_same(&tm->tcpm_daddr, daddr) && @@ -261,9 +264,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, net = dev_net(dst->dev); hash ^= net_hash_mix(net); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash = hash_32(hash, tcp_metrics_hash_log); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && addr_same(&tm->tcpm_daddr, &daddr) && @@ -310,9 +313,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock net = twsk_net(tw); hash ^= net_hash_mix(net); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash = hash_32(hash, tcp_metrics_hash_log); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_saddr, &saddr) && addr_same(&tm->tcpm_daddr, &daddr) && @@ -360,7 +363,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, net = dev_net(dst->dev); hash ^= net_hash_mix(net); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash = hash_32(hash, tcp_metrics_hash_log); tm = __tcp_get_metrics(&saddr, &daddr, net, hash); if (tm == TCP_METRICS_RECLAIM_PTR) @@ -911,13 +914,13 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; + unsigned int max_rows = 1U << tcp_metrics_hash_log; unsigned int row, s_row = cb->args[0]; int s_col = cb->args[1], col = s_col; for (row = s_row; row < max_rows; row++, s_col = 0) { struct tcp_metrics_block *tm; - struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + struct tcpm_hash_bucket *hb = tcp_metrics_hash + row; rcu_read_lock(); for (col = 0, tm = rcu_dereference(hb->chain); tm; @@ -1010,10 +1013,10 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) goto nla_put_failure; hash ^= net_hash_mix(net); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + hash = hash_32(hash, tcp_metrics_hash_log); ret = -ESRCH; rcu_read_lock(); - for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { if (addr_same(&tm->tcpm_daddr, &daddr) && (!src || addr_same(&tm->tcpm_saddr, &saddr)) && @@ -1045,8 +1048,8 @@ out_free: static void tcp_metrics_flush_all(struct net *net) { - unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; - struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; + unsigned int max_rows = 1U << tcp_metrics_hash_log; + struct tcpm_hash_bucket *hb = tcp_metrics_hash; struct tcp_metrics_block *tm; unsigned int row; @@ -1090,8 +1093,8 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) src = false; hash ^= net_hash_mix(net); - hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); - hb = net->ipv4.tcp_metrics_hash + hash; + hash = hash_32(hash, tcp_metrics_hash_log); + hb = tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { @@ -1147,6 +1150,9 @@ static int __net_init tcp_net_metrics_init(struct net *net) size_t size; unsigned int slots; + if (!net_eq(net, &init_net)) + return 0; + slots = tcpmhash_entries; if (!slots) { if (totalram_pages >= 128 * 1024) @@ -1155,14 +1161,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = 8 * 1024; } - net->ipv4.tcp_metrics_hash_log = order_base_2(slots); - size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; + tcp_metrics_hash_log = order_base_2(slots); + size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; - net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!net->ipv4.tcp_metrics_hash) - net->ipv4.tcp_metrics_hash = vzalloc(size); + tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!tcp_metrics_hash) + tcp_metrics_hash = vzalloc(size); - if (!net->ipv4.tcp_metrics_hash) + if (!tcp_metrics_hash) return -ENOMEM; return 0; @@ -1170,19 +1176,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) static void __net_exit tcp_net_metrics_exit(struct net *net) { - unsigned int i; - - for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { - struct tcp_metrics_block *tm, *next; - - tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); - while (tm) { - next = rcu_dereference_protected(tm->tcpm_next, 1); - kfree(tm); - tm = next; - } - } - kvfree(net->ipv4.tcp_metrics_hash); + tcp_metrics_flush_all(net); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { -- cgit From 9f1ab18672bee992b6169bbfa2b5ae86b42e88a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Mar 2015 07:14:34 -0700 Subject: tcp_metrics: fix wrong lockdep annotations Changes in tcp_metric hash table are protected by tcp_metrics_lock only, not by genl_mutex While we are at it use deref_locked() instead of rcu_dereference() in tcp_new() to avoid unnecessary barrier, as we hold tcp_metrics_lock as well. Reported-by: Andrew Vagin Signed-off-by: Eric Dumazet Fixes: 098a697b497e ("tcp_metrics: Use a single hash table for all network namespaces.") Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 366728cbee4a..5bef3513af77 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -152,6 +152,9 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst #define TCP_METRICS_RECLAIM_DEPTH 5 #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL +#define deref_locked(p) \ + rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock)) + static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, struct inetpeer_addr *saddr, struct inetpeer_addr *daddr, @@ -180,9 +183,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (unlikely(reclaim)) { struct tcp_metrics_block *oldest; - oldest = rcu_dereference(tcp_metrics_hash[hash].chain); - for (tm = rcu_dereference(oldest->tcpm_next); tm; - tm = rcu_dereference(tm->tcpm_next)) { + oldest = deref_locked(tcp_metrics_hash[hash].chain); + for (tm = deref_locked(oldest->tcpm_next); tm; + tm = deref_locked(tm->tcpm_next)) { if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) oldest = tm; } @@ -1040,12 +1043,6 @@ out_free: return ret; } -#define deref_locked_genl(p) \ - rcu_dereference_protected(p, lockdep_genl_is_held() && \ - lockdep_is_held(&tcp_metrics_lock)) - -#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) - static void tcp_metrics_flush_all(struct net *net) { unsigned int max_rows = 1U << tcp_metrics_hash_log; @@ -1057,8 +1054,7 @@ static void tcp_metrics_flush_all(struct net *net) struct tcp_metrics_block __rcu **pp; spin_lock_bh(&tcp_metrics_lock); pp = &hb->chain; - for (tm = deref_locked_genl(*pp); tm; - tm = deref_locked_genl(*pp)) { + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { if (net_eq(tm_net(tm), net)) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); @@ -1097,7 +1093,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) hb = tcp_metrics_hash + hash; pp = &hb->chain; spin_lock_bh(&tcp_metrics_lock); - for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { if (addr_same(&tm->tcpm_daddr, &daddr) && (!src || addr_same(&tm->tcpm_saddr, &saddr)) && net_eq(tm_net(tm), net)) { -- cgit From 8f55db48608b109ad8c7ff4b946ad39b3189a540 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:23 +0200 Subject: tcp: simplify inetpeer_addr_base use In many places, the a6 field is typecasted to struct in6_addr. As the fields are in union anyway, just add in6_addr type to the union and get rid of the typecasting. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 5bef3513af77..f62c2c68ced0 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -80,17 +80,11 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - const struct in6_addr *a6, *b6; - if (a->family != b->family) return false; if (a->family == AF_INET) return a->addr.a4 == b->addr.a4; - - a6 = (const struct in6_addr *) &a->addr.a6[0]; - b6 = (const struct in6_addr *) &b->addr.a6[0]; - - return ipv6_addr_equal(a6, b6); + return ipv6_addr_equal(&a->addr.in6, &b->addr.in6); } struct tcpm_hash_bucket { @@ -256,8 +250,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; - *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr; + daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr; hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; #endif @@ -304,9 +298,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock hash = (__force unsigned int) daddr.addr.a4; } else { saddr.family = AF_INET6; - *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; + saddr.addr.in6 = tw->tw_v6_rcv_saddr; daddr.family = AF_INET6; - *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; + daddr.addr.in6 = tw->tw_v6_daddr; hash = ipv6_addr_hash(&tw->tw_v6_daddr); } } @@ -354,9 +348,9 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, hash = (__force unsigned int) daddr.addr.a4; } else { saddr.family = AF_INET6; - *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; + saddr.addr.in6 = sk->sk_v6_rcv_saddr; daddr.family = AF_INET6; - *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; + daddr.addr.in6 = sk->sk_v6_daddr; hash = ipv6_addr_hash(&sk->sk_v6_daddr); } } @@ -966,7 +960,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, addr->family = AF_INET6; memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); if (hash) - *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); + *hash = ipv6_addr_hash(&addr->addr.in6); return 0; } return optional ? 1 : -EAFNOSUPPORT; -- cgit From 930345ea630405aa6e6f42efcb149c3f360a6b67 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:25 +0200 Subject: netlink: implement nla_put_in_addr and nla_put_in6_addr IP addresses are often stored in netlink attributes. Add generic functions to do that. For nla_put_in_addr, it would be nicer to pass struct in_addr but this is not used universally throughout the kernel, in way too many places __be32 is used to store IPv4 address. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f62c2c68ced0..32e36ea6bc0f 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -786,19 +786,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, switch (tm->tcpm_daddr.family) { case AF_INET: - if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, - tm->tcpm_daddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4, + tm->tcpm_daddr.addr.a4) < 0) goto nla_put_failure; - if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, - tm->tcpm_saddr.addr.a4) < 0) + if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4, + tm->tcpm_saddr.addr.a4) < 0) goto nla_put_failure; break; case AF_INET6: - if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, - tm->tcpm_daddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6, + &tm->tcpm_daddr.addr.in6) < 0) goto nla_put_failure; - if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, - tm->tcpm_saddr.addr.a6) < 0) + if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6, + &tm->tcpm_saddr.addr.in6) < 0) goto nla_put_failure; break; default: -- cgit From 67b61f6c130a05b2cd4c3dfded49a751ff42c534 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:26 +0200 Subject: netlink: implement nla_get_in_addr and nla_get_in6_addr Those are counterparts to nla_put_in_addr and nla_put_in6_addr. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 32e36ea6bc0f..71ec14c87579 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -948,7 +948,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, a = info->attrs[v4]; if (a) { addr->family = AF_INET; - addr->addr.a4 = nla_get_be32(a); + addr->addr.a4 = nla_get_in_addr(a); if (hash) *hash = (__force unsigned int) addr->addr.a4; return 0; @@ -958,7 +958,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, if (nla_len(a) != sizeof(struct in6_addr)) return -EINVAL; addr->family = AF_INET6; - memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); + addr->addr.in6 = nla_get_in6_addr(a); if (hash) *hash = ipv6_addr_hash(&addr->addr.in6); return 0; -- cgit From 51456b2914a34d16b1255b7c55d5cbf6a681d306 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Fri, 3 Apr 2015 09:17:26 +0100 Subject: ipv4: coding style: comparison for equality with NULL The ipv4 code uses a mixture of coding styles. In some instances check for NULL pointer is done as x == NULL and sometimes as !x. !x is preferred according to checkpatch and this patch makes the code consistent by adopting the latter form. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 71ec14c87579..78ecc4a01712 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -505,7 +505,7 @@ void tcp_init_metrics(struct sock *sk) struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ - if (dst == NULL) + if (!dst) goto reset; dst_confirm(dst); -- cgit From 2646c831c00c5d22aa72b79d24069c1b412cda7c Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Mon, 6 Apr 2015 14:37:27 -0700 Subject: tcp: RFC7413 option support for Fast Open client Fast Open has been using an experimental option with a magic number (RFC6994). This patch makes the client by default use the RFC7413 option (34) to get and send Fast Open cookies. This patch makes the client solicit cookies from a given server first with the RFC7413 option. If that fails to elicit a cookie, then it tries the RFC6994 experimental option. If that also fails, it uses the RFC7413 option on all subsequent connect attempts. If the server returns a Fast Open cookie then the client caches the form of the option that successfully elicited a cookie, and uses that form on later connects when it presents that cookie. The idea is to gradually obsolete the use of experimental options as the servers and clients upgrade, while keeping the interoperability meanwhile. Signed-off-by: Daniel Lee Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_metrics.c') diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 78ecc4a01712..a51d63a43e33 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -28,7 +28,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s struct tcp_fastopen_metrics { u16 mss; - u16 syn_loss:10; /* Recurring Fast Open SYN losses */ + u16 syn_loss:10, /* Recurring Fast Open SYN losses */ + try_exp:2; /* Request w/ exp. option (once) */ unsigned long last_syn_loss; /* Last Fast Open SYN loss */ struct tcp_fastopen_cookie cookie; }; @@ -131,6 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; + tm->tcpm_fastopen.try_exp = 0; + tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.len = 0; } } @@ -713,6 +716,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, if (tfom->mss) *mss = tfom->mss; *cookie = tfom->cookie; + if (cookie->len <= 0 && tfom->try_exp == 1) + cookie->exp = true; *syn_loss = tfom->syn_loss; *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; } while (read_seqretry(&fastopen_seqlock, seq)); @@ -721,7 +726,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, } void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie, bool syn_lost) + struct tcp_fastopen_cookie *cookie, bool syn_lost, + u16 try_exp) { struct dst_entry *dst = __sk_dst_get(sk); struct tcp_metrics_block *tm; @@ -738,6 +744,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, tfom->mss = mss; if (cookie && cookie->len > 0) tfom->cookie = *cookie; + else if (try_exp > tfom->try_exp && + tfom->cookie.len <= 0 && !tfom->cookie.exp) + tfom->try_exp = try_exp; if (syn_lost) { ++tfom->syn_loss; tfom->last_syn_loss = jiffies; -- cgit