summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/core/lwtunnel.c65
-rw-r--r--net/ipv6/ioam6_iptunnel.c8
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
5 files changed, 306 insertions, 16 deletions
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 711cd3b4347a..4417a18b3e95 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -23,6 +23,8 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>
+#include "dev.h"
+
DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
@@ -325,13 +327,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
- int ret = -EINVAL;
+ struct dst_entry *dst;
+ int ret;
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+ ret = -ENETDOWN;
+ goto drop;
+ }
- if (!dst)
+ dst = skb_dst(skb);
+ if (!dst) {
+ ret = -EINVAL;
goto drop;
+ }
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
@@ -341,8 +353,11 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
- if (likely(ops && ops->output))
+ if (likely(ops && ops->output)) {
+ dev_xmit_recursion_inc();
ret = ops->output(net, sk, skb);
+ dev_xmit_recursion_dec();
+ }
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
@@ -359,13 +374,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_output);
int lwtunnel_xmit(struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
- int ret = -EINVAL;
+ struct dst_entry *dst;
+ int ret;
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+ ret = -ENETDOWN;
+ goto drop;
+ }
- if (!dst)
+ dst = skb_dst(skb);
+ if (!dst) {
+ ret = -EINVAL;
goto drop;
+ }
lwtstate = dst->lwtstate;
@@ -376,8 +401,11 @@ int lwtunnel_xmit(struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
- if (likely(ops && ops->xmit))
+ if (likely(ops && ops->xmit)) {
+ dev_xmit_recursion_inc();
ret = ops->xmit(skb);
+ dev_xmit_recursion_dec();
+ }
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
@@ -394,13 +422,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_xmit);
int lwtunnel_input(struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
- int ret = -EINVAL;
+ struct dst_entry *dst;
+ int ret;
- if (!dst)
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+ ret = -ENETDOWN;
goto drop;
+ }
+
+ dst = skb_dst(skb);
+ if (!dst) {
+ ret = -EINVAL;
+ goto drop;
+ }
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
@@ -410,8 +448,11 @@ int lwtunnel_input(struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
- if (likely(ops && ops->input))
+ if (likely(ops && ops->input)) {
+ dev_xmit_recursion_inc();
ret = ops->input(skb);
+ dev_xmit_recursion_dec();
+ }
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 2c383c12a431..09065187378e 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -337,7 +337,6 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
- struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
u32 pkt_cnt;
@@ -352,8 +351,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
goto out;
- orig_daddr = ipv6_hdr(skb)->daddr;
-
local_bh_disable();
cache_dst = dst_cache_get(&ilwt->cache);
local_bh_enable();
@@ -422,7 +419,10 @@ do_encap:
goto drop;
}
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
+ /* avoid lwtunnel_output() reentry loop when destination is the same
+ * after transformation (e.g., with the inline mode)
+ */
+ if (dst->lwtstate != cache_dst->lwtstate) {
skb_dst_drop(skb);
skb_dst_set(skb, cache_dst);
return dst_output(net, sk, skb);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 5916f3b81c39..843ab747645d 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -101,6 +101,7 @@ TEST_PROGS += vlan_bridge_binding.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
+TEST_PROGS += lwt_dst_cache_ref_loop.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..61e5116987f3 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_RPL_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+# - ila_lwt (output only)
+# - ioam6_iptunnel (output only)
+# - rpl_iptunnel (both input and output)
+# - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+ setup_ns tmp_node &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Cannot create netns."
+ exit $ksft_skip
+ fi
+
+ ip link add name veth0 netns $tmp_node type veth \
+ peer name veth1 netns $tmp_node &>/dev/null
+ local ret=$?
+
+ ip -netns $tmp_node link set veth0 up &>/dev/null
+ ret=$((ret + $?))
+
+ ip -netns $tmp_node link set veth1 up &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret != 0 ]; then
+ echo "SKIP: Cannot configure links."
+ cleanup_ns $tmp_node
+ exit $ksft_skip
+ fi
+
+ lsmod 2>/dev/null | grep -q "ila"
+ ila_lsmod=$?
+ [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:1::/64 \
+ encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:3::/64 \
+ encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:4::/64 \
+ encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+ skip_ila=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+ skip_ioam6=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+ skip_rpl=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+ skip_seg6=$?
+
+ cleanup_ns $tmp_node
+}
+
+setup()
+{
+ setup_ns alpha beta gamma &>/dev/null
+
+ ip link add name veth-alpha netns $alpha type veth \
+ peer name veth-betaL netns $beta &>/dev/null
+
+ ip link add name veth-betaR netns $beta type veth \
+ peer name veth-gamma netns $gamma &>/dev/null
+
+ ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+ ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+ ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $alpha link set veth0 up &>/dev/null
+ ip -netns $alpha link set lo up &>/dev/null
+ ip -netns $alpha route add 2001:db8:2::/64 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+ ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+ ip -netns $beta link set veth0 up &>/dev/null
+ ip -netns $beta link set veth1 up &>/dev/null
+ ip -netns $beta link set lo up &>/dev/null
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ ip netns exec $beta \
+ sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+ ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+ ip -netns $gamma link set veth0 up &>/dev/null
+ ip -netns $gamma link set lo up &>/dev/null
+ ip -netns $gamma route add 2001:db8:1::/64 \
+ via 2001:db8:2::1 dev veth0 &>/dev/null
+
+ sleep 1
+
+ ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Setup failed."
+ exit $ksft_skip
+ fi
+
+ sleep 1
+}
+
+cleanup()
+{
+ cleanup_ns $alpha $beta $gamma
+ [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+ if [ $skip_ila != 0 ]; then
+ echo "SKIP: ila (output)"
+ return
+ fi
+
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+ encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ila (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ sleep 1
+}
+
+run_ioam6()
+{
+ if [ $skip_ioam6 != 0 ]; then
+ echo "SKIP: ioam6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ioam6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_rpl()
+{
+ if [ $skip_rpl != 0 ]; then
+ echo "SKIP: rpl (input)"
+ echo "SKIP: rpl (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap rpl segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_seg6()
+{
+ if [ $skip_seg6 != 0 ]; then
+ echo "SKIP: seg6 (input)"
+ echo "SKIP: seg6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap seg6 mode inline segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run()
+{
+ run_ila
+ run_ioam6
+ run_rpl
+ run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges."
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool."
+ exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass