diff options
author | Steffen Klassert <steffen.klassert@secunet.com> | 2024-08-27 07:01:20 +0200 |
---|---|---|
committer | Steffen Klassert <steffen.klassert@secunet.com> | 2024-08-27 07:01:20 +0200 |
commit | 5ce90c842ef57bceb515503347306174e231575f (patch) | |
tree | c9b6d360628828a71c8c8a034ca5d96b6e10359e | |
parent | 54f2f78d6b9f1f90e91aaf5dbb34a6198f65fdfd (diff) | |
parent | a54ad727f74559f7c3dfcfd2a63d0ce7683a82e8 (diff) |
Merge branch 'xfrm: speed up policy insertions'
Florian Westphal says:
====================
Policy insertions do not scale well, due to both a lienar list walk
to find the insertion spot and another list walk to set the 'pos' value
(a tie-breaker to detect which policy is older when there is ambiguity
as to which one should be matched).
First patch gets rid of the second list walk on insert.
Rest of the patches get rid of the insertion walk.
This list walk was only needed because when I moved the policy db
implementation to rbtree I retained the old insertion method for the
sake of XFRM_MIGRATE.
Switching that to tree-based lookup avoids the need for the full
list search.
After this, insertion of a policy is largely independent of the number
of pre-existing policies as long as they do not share the same source/
destination networks.
Note that this is compile tested only as I did not find any
tests for XFRM_MIGRATE.
====================
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
-rw-r--r-- | include/net/xfrm.h | 1 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 201 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/net/xfrm_policy_add_speed.sh | 83 |
4 files changed, 175 insertions, 112 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f7244ac4fa08..1fa2da22a49e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -555,7 +555,6 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; - struct hlist_node bydst_inexact_list; struct rcu_head rcu; struct xfrm_dev_offload xdo; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c56c61b0c12e..b79ac453ea37 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -196,8 +196,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net, static struct xfrm_policy * xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl); -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy); static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, @@ -410,7 +408,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); - INIT_HLIST_NODE(&policy->bydst_inexact_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -1228,26 +1225,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl) return ERR_PTR(-EEXIST); } - chain = &net->xfrm.policy_inexact[dir]; - xfrm_policy_insert_inexact_list(chain, policy); - if (delpol) __xfrm_policy_inexact_prune_bin(bin, false); return delpol; } +static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy) +{ + int dir; + + if (policy->walk.dead) + return true; + + dir = xfrm_policy_id2dir(policy->index); + return dir >= XFRM_POLICY_MAX; +} + static void xfrm_hash_rebuild(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hthresh.work); - unsigned int hmask; struct xfrm_policy *pol; struct xfrm_policy *policy; struct hlist_head *chain; - struct hlist_head *odst; struct hlist_node *newpos; - int i; int dir; unsigned seq; u8 lbits4, rbits4, lbits6, rbits6; @@ -1311,23 +1313,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) goto out_unlock; } - /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - struct hlist_node *n; - - hlist_for_each_entry_safe(policy, n, - &net->xfrm.policy_inexact[dir], - bydst_inexact_list) { - hlist_del_rcu(&policy->bydst); - hlist_del_init(&policy->bydst_inexact_list); - } - - hmask = net->xfrm.policy_bydst[dir].hmask; - odst = net->xfrm.policy_bydst[dir].table; - for (i = hmask; i >= 0; i--) { - hlist_for_each_entry_safe(policy, n, odst + i, bydst) - hlist_del_rcu(&policy->bydst); - } if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; @@ -1352,6 +1338,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* skip socket policies */ continue; } + + hlist_del_rcu(&policy->bydst); + newpos = NULL; chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); @@ -1519,42 +1508,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = { .automatic_shrinking = true, }; -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy) -{ - struct xfrm_policy *pol, *delpol = NULL; - struct hlist_node *newpos = NULL; - int i = 0; - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if (pol->type == policy->type && - pol->if_id == policy->if_id && - !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(&policy->mark, pol) && - xfrm_sec_ctx_match(pol->security, policy->security) && - !WARN_ON(delpol)) { - delpol = pol; - if (policy->priority > pol->priority) - continue; - } else if (policy->priority >= pol->priority) { - newpos = &pol->bydst_inexact_list; - continue; - } - if (delpol) - break; - } - - if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) - hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos); - else - hlist_add_head_rcu(&policy->bydst_inexact_list, chain); - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - pol->pos = i; - i++; - } -} - static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl) @@ -2294,10 +2247,52 @@ out: return pol; } +static u32 xfrm_gen_pos_slow(struct net *net) +{ + struct xfrm_policy *policy; + u32 i = 0; + + /* oldest entry is last in list */ + list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + if (!xfrm_policy_is_dead_or_sk(policy)) + policy->pos = ++i; + } + + return i; +} + +static u32 xfrm_gen_pos(struct net *net) +{ + const struct xfrm_policy *policy; + u32 i = 0; + + /* most recently added policy is at the head of the list */ + list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { + if (xfrm_policy_is_dead_or_sk(policy)) + continue; + + if (policy->pos == UINT_MAX) + return xfrm_gen_pos_slow(net); + + i = policy->pos + 1; + break; + } + + return i; +} + static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); + switch (dir) { + case XFRM_POLICY_IN: + case XFRM_POLICY_FWD: + case XFRM_POLICY_OUT: + pol->pos = xfrm_gen_pos(net); + break; + } + list_add(&pol->walk.all, &net->xfrm.policy_all); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); @@ -2314,7 +2309,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { hlist_del_rcu(&pol->bydst); - hlist_del_init(&pol->bydst_inexact_list); hlist_del(&pol->byidx); } @@ -4437,63 +4431,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, - const struct xfrm_selector *sel_tgt) -{ - if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { - if (sel_tgt->family == sel_cmp->family && - xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) && - xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) && - sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && - sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { - return true; - } - } else { - if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { - return true; - } - } - return false; -} - static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; - struct hlist_head *chain; - u32 priority = ~0U; + struct flowi fl; - spin_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, chain, bydst) { - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; - priority = ret->priority; - break; - } - } - chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if ((pol->priority >= priority) && ret) - break; + memset(&fl, 0, sizeof(fl)); - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; + fl.flowi_proto = sel->proto; + + switch (sel->family) { + case AF_INET: + fl.u.ip4.saddr = sel->saddr.a4; + fl.u.ip4.daddr = sel->daddr.a4; + if (sel->proto == IPSEC_ULPROTO_ANY) break; - } + fl.u.flowi4_oif = sel->ifindex; + fl.u.ip4.fl4_sport = sel->sport; + fl.u.ip4.fl4_dport = sel->dport; + break; + case AF_INET6: + fl.u.ip6.saddr = sel->saddr.in6; + fl.u.ip6.daddr = sel->daddr.in6; + if (sel->proto == IPSEC_ULPROTO_ANY) + break; + fl.u.flowi6_oif = sel->ifindex; + fl.u.ip6.fl4_sport = sel->sport; + fl.u.ip6.fl4_dport = sel->dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); } - xfrm_pol_hold(ret); + rcu_read_lock(); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id); + if (IS_ERR_OR_NULL(pol)) + goto out_unlock; - return ret; + if (!xfrm_pol_hold_rcu(ret)) + pol = NULL; +out_unlock: + rcu_read_unlock(); + return pol; } static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) @@ -4630,9 +4611,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* Stage 1 - find policy */ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); - if (!pol) { + if (IS_ERR_OR_NULL(pol)) { NL_SET_ERR_MSG(extack, "Target policy not found"); - err = -ENOENT; + err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; goto out; } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8eaffd7a641c..e127a80ff713 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 000000000000..2fab29d3cb91 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret |