diff options
| author | Paolo Abeni <pabeni@redhat.com> | 2025-03-20 11:25:55 +0100 |
|---|---|---|
| committer | Paolo Abeni <pabeni@redhat.com> | 2025-03-20 11:25:56 +0100 |
| commit | f31b6fbfe8c7f1da319b5a51a08ed730bf7e7fc2 (patch) | |
| tree | 601a74870b5582f60ed6cb23a7980fa33facb172 /net | |
| parent | 47a9b5e52abd2b717dfc8b9460589f89936d93cf (diff) | |
| parent | 3ed61b8938c66680e13a1d1929afb9b145c26a86 (diff) | |
Merge branch 'net-fix-lwtunnel-reentry-loops'
Justin Iurman says:
====================
net: fix lwtunnel reentry loops
When the destination is the same after the transformation, we enter a
lwtunnel loop. This is true for most of lwt users: ioam6, rpl, seg6,
seg6_local, ila_lwt, and lwt_bpf. It can happen in their input() and
output() handlers respectively, where either dst_input() or dst_output()
is called at the end. It can also happen in xmit() handlers.
Here is an example for rpl_input():
dump_stack_lvl+0x60/0x80
rpl_input+0x9d/0x320
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
[...]
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
lwtunnel_input+0x64/0xa0
ip6_sublist_rcv_finish+0x85/0x90
ip6_sublist_rcv+0x236/0x2f0
... until rpl_do_srh() fails, which means skb_cow_head() failed.
This series provides a fix at the core level of lwtunnel to catch such
loops when they're not caught by the respective lwtunnel users, and
handle the loop case in ioam6 which is one of the users. This series
also comes with a new selftest to detect some dst cache reference loops
in lwtunnel users.
====================
Link: https://patch.msgid.link/20250314120048.12569-1-justin.iurman@uliege.be
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/lwtunnel.c | 65 | ||||
| -rw-r--r-- | net/ipv6/ioam6_iptunnel.c | 8 |
2 files changed, 57 insertions, 16 deletions
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 711cd3b4347a..4417a18b3e95 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -23,6 +23,8 @@ #include <net/ip6_fib.h> #include <net/rtnh.h> +#include "dev.h" + DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); @@ -325,13 +327,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; + goto drop; + } - if (!dst) + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; goto drop; + } lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || @@ -341,8 +353,11 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->output)) + if (likely(ops && ops->output)) { + dev_xmit_recursion_inc(); ret = ops->output(net, sk, skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) @@ -359,13 +374,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_output); int lwtunnel_xmit(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; + goto drop; + } - if (!dst) + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; goto drop; + } lwtstate = dst->lwtstate; @@ -376,8 +401,11 @@ int lwtunnel_xmit(struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->xmit)) + if (likely(ops && ops->xmit)) { + dev_xmit_recursion_inc(); ret = ops->xmit(skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) @@ -394,13 +422,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_xmit); int lwtunnel_input(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; - if (!dst) + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; goto drop; + } + + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; + goto drop; + } lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || @@ -410,8 +448,11 @@ int lwtunnel_input(struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->input)) + if (likely(ops && ops->input)) { + dev_xmit_recursion_inc(); ret = ops->input(skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 2c383c12a431..09065187378e 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -337,7 +337,6 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; - struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; u32 pkt_cnt; @@ -352,8 +351,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k) goto out; - orig_daddr = ipv6_hdr(skb)->daddr; - local_bh_disable(); cache_dst = dst_cache_get(&ilwt->cache); local_bh_enable(); @@ -422,7 +419,10 @@ do_encap: goto drop; } - if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { + /* avoid lwtunnel_output() reentry loop when destination is the same + * after transformation (e.g., with the inline mode) + */ + if (dst->lwtstate != cache_dst->lwtstate) { skb_dst_drop(skb); skb_dst_set(skb, cache_dst); return dst_output(net, sk, skb); |
