summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-07-24 12:30:39 +0200
committerPaolo Abeni <pabeni@redhat.com>2025-07-24 12:30:40 +0200
commit291d5dc80eca1fc67a0fa4c861d13c101345501a (patch)
tree2ac41b87f8865b379b92a6f0c1b1174bb70cf8bd
parent89fd905dab912a5712149ec5c06d251ceaedf743 (diff)
parent28712d6ed32028b0f2e0defe6681411496971ca3 (diff)
Merge tag 'ipsec-2025-07-23' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
Steffen Klassert says: ==================== pull request (net): ipsec 2025-07-23 1) Premption fixes for xfrm_state_find. From Sabrina Dubroca. 2) Initialize offload path also for SW IPsec GRO. This fixes a performance regression on SW IPsec offload. From Leon Romanovsky. 3) Fix IPsec UDP GRO for IKE packets. From Tobias Brunner, 4) Fix transport header setting for IPcomp after decompressing. From Fernando Fernandez Mancera. 5) Fix use-after-free when xfrmi_changelink tries to change collect_md for a xfrm interface. From Eyal Birger . 6) Delete the special IPcomp x->tunnel state along with the state x to avoid refcount problems. From Sabrina Dubroca. Please pull or let me know if there are problems. * tag 'ipsec-2025-07-23' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec: Revert "xfrm: destroy xfrm_state synchronously on net exit path" xfrm: delete x->tunnel as we delete x xfrm: interface: fix use-after-free after changing collect_md xfrm interface xfrm: ipcomp: adjust transport header after decompressing xfrm: Set transport header to fix UDP GRO handling xfrm: always initialize offload path xfrm: state: use a consistent pcpu_id in xfrm_state_find xfrm: state: initialize state_ptrs earlier in xfrm_state_find ==================== Link: https://patch.msgid.link/20250723075417.3432644-1-steffen.klassert@secunet.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/net/xfrm.h15
-rw-r--r--net/ipv4/ipcomp.c2
-rw-r--r--net/ipv4/xfrm4_input.c3
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_interface_core.c7
-rw-r--r--net/xfrm/xfrm_ipcomp.c3
-rw-r--r--net/xfrm/xfrm_state.c69
-rw-r--r--net/xfrm/xfrm_user.c3
12 files changed, 46 insertions, 66 deletions
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a21e276dbe44..f3014e4f54fc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -441,7 +441,6 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo);
int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo);
void xfrm_flush_gc(void);
-void xfrm_state_delete_tunnel(struct xfrm_state *x);
struct xfrm_type {
struct module *owner;
@@ -474,7 +473,7 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
-void xfrm_set_type_offload(struct xfrm_state *x);
+void xfrm_set_type_offload(struct xfrm_state *x, bool try_load);
static inline void xfrm_unset_type_offload(struct xfrm_state *x)
{
if (!x->type_offload)
@@ -916,7 +915,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
xfrm_pol_put(pols[i]);
}
-void __xfrm_state_destroy(struct xfrm_state *, bool);
+void __xfrm_state_destroy(struct xfrm_state *);
static inline void __xfrm_state_put(struct xfrm_state *x)
{
@@ -926,13 +925,7 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
static inline void xfrm_state_put(struct xfrm_state *x)
{
if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x, false);
-}
-
-static inline void xfrm_state_put_sync(struct xfrm_state *x)
-{
- if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x, true);
+ __xfrm_state_destroy(x);
}
static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1770,7 +1763,7 @@ struct xfrmk_spdinfo {
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
bool task_valid);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5a4fb2539b08..9a45aed508d1 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -54,6 +54,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info)
}
/* We always hold one tunnel user reference to indicate a tunnel */
+static struct lock_class_key xfrm_state_lock_key;
static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -62,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t = xfrm_state_alloc(net);
if (!t)
goto out;
+ lockdep_set_class(&t->lock, &xfrm_state_lock_key);
t->id.proto = IPPROTO_IPIP;
t->id.spi = x->props.saddr.a4;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 0d31a8c108d4..f28cfd88eaf5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -202,6 +202,9 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 72d4858dec18..8607569de34f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -71,6 +71,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+static struct lock_class_key xfrm_state_lock_key;
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -79,6 +80,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
t = xfrm_state_alloc(net);
if (!t)
goto out;
+ lockdep_set_class(&t->lock, &xfrm_state_lock_key);
t->id.proto = IPPROTO_IPV6;
t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 841c81abaaf4..9005fc156a20 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bf140ef781c1..5120a763da0d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -334,8 +334,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
xfrm_flush_gc();
- xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/key/af_key.c b/net/key/af_key.c
index efc2a91f4c48..b5d761700776 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1766,7 +1766,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
if (proto == 0)
return -EINVAL;
- err = xfrm_state_flush(net, proto, true, false);
+ err = xfrm_state_flush(net, proto, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 81fd486b5e56..d2819baea414 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -305,7 +305,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- xfrm_set_type_offload(x);
if (!x->type_offload) {
NL_SET_ERR_MSG(extack, "Type doesn't support offload");
dev_put(dev);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index cb1e12740c87..330a05286a56 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -875,7 +875,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
return -EINVAL;
}
- if (p.collect_md) {
+ if (p.collect_md || xi->p.collect_md) {
NL_SET_ERR_MSG(extack, "collect_md can't be changed");
return -EINVAL;
}
@@ -886,11 +886,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
} else {
if (xi->dev != dev)
return -EEXIST;
- if (xi->p.collect_md) {
- NL_SET_ERR_MSG(extack,
- "device can't be changed to collect_md");
- return -EINVAL;
- }
}
return xfrmi_update(xi, &p);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 907c3ccb440d..43fdc6ed8dd1 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -97,7 +97,7 @@ static int ipcomp_input_done2(struct sk_buff *skb, int err)
struct ip_comp_hdr *ipch = ip_comp_hdr(skb);
const int plen = skb->len;
- skb_reset_transport_header(skb);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
return ipcomp_post_acomp(skb, err, 0) ?:
skb->len < (plen + sizeof(ip_comp_hdr)) ? -EINVAL :
@@ -313,7 +313,6 @@ void ipcomp_destroy(struct xfrm_state *x)
struct ipcomp_data *ipcd = x->data;
if (!ipcd)
return;
- xfrm_state_delete_tunnel(x);
ipcomp_free_data(ipcd);
kfree(ipcd);
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 77cc418ad69e..97ff756191ba 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -424,11 +424,10 @@ void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
}
EXPORT_SYMBOL(xfrm_unregister_type_offload);
-void xfrm_set_type_offload(struct xfrm_state *x)
+void xfrm_set_type_offload(struct xfrm_state *x, bool try_load)
{
const struct xfrm_type_offload *type = NULL;
struct xfrm_state_afinfo *afinfo;
- bool try_load = true;
retry:
afinfo = xfrm_state_get_afinfo(x->props.family);
@@ -593,7 +592,7 @@ void xfrm_state_free(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_free);
-static void ___xfrm_state_destroy(struct xfrm_state *x)
+static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
if (x->mode_cbs && x->mode_cbs->destroy_state)
x->mode_cbs->destroy_state(x);
@@ -607,6 +606,7 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
kfree(x->coaddr);
kfree(x->replay_esn);
kfree(x->preplay_esn);
+ xfrm_unset_type_offload(x);
if (x->type) {
x->type->destructor(x);
xfrm_put_type(x->type);
@@ -631,7 +631,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
synchronize_rcu();
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
- ___xfrm_state_destroy(x);
+ xfrm_state_gc_destroy(x);
}
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -780,8 +780,6 @@ void xfrm_dev_state_free(struct xfrm_state *x)
struct xfrm_dev_offload *xso = &x->xso;
struct net_device *dev = READ_ONCE(xso->dev);
- xfrm_unset_type_offload(x);
-
if (dev && dev->xfrmdev_ops) {
spin_lock_bh(&xfrm_state_dev_gc_lock);
if (!hlist_unhashed(&x->dev_gclist))
@@ -797,22 +795,18 @@ void xfrm_dev_state_free(struct xfrm_state *x)
}
#endif
-void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
+void __xfrm_state_destroy(struct xfrm_state *x)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
- if (sync) {
- synchronize_rcu();
- ___xfrm_state_destroy(x);
- } else {
- spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &xfrm_state_gc_list);
- spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&xfrm_state_gc_work);
- }
+ spin_lock_bh(&xfrm_state_gc_lock);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+ spin_unlock_bh(&xfrm_state_gc_lock);
+ schedule_work(&xfrm_state_gc_work);
}
EXPORT_SYMBOL(__xfrm_state_destroy);
+static void xfrm_state_delete_tunnel(struct xfrm_state *x);
int __xfrm_state_delete(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -840,6 +834,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
xfrm_dev_state_delete(x);
+ xfrm_state_delete_tunnel(x);
+
/* All xfrm_state objects are created by xfrm_state_alloc.
* The xfrm_state_alloc call gives a reference, and that
* is what we are dropping here.
@@ -921,7 +917,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0, cnt = 0;
@@ -943,10 +939,7 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
- if (sync)
- xfrm_state_put_sync(x);
- else
- xfrm_state_put(x);
+ xfrm_state_put(x);
if (!err)
cnt++;
@@ -1307,14 +1300,8 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
const struct flowi *fl, unsigned short family,
struct xfrm_state **best, int *acq_in_progress,
- int *error)
+ int *error, unsigned int pcpu_id)
{
- /* We need the cpu id just as a lookup key,
- * we don't require it to be stable.
- */
- unsigned int pcpu_id = get_cpu();
- put_cpu();
-
/* Resolution logic:
* 1. There is a valid state with matching selector. Done.
* 2. Valid state with inappropriate selector. Skip.
@@ -1381,14 +1368,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
/* We need the cpu id just as a lookup key,
* we don't require it to be stable.
*/
- pcpu_id = get_cpu();
- put_cpu();
+ pcpu_id = raw_smp_processor_id();
to_put = NULL;
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
@@ -1400,7 +1388,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, encap_family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best)
@@ -1417,7 +1405,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
cached:
@@ -1429,8 +1417,6 @@ cached:
else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
WARN_ON(1);
- xfrm_hash_ptrs_get(net, &state_ptrs);
-
h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask);
hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1460,7 +1446,7 @@ cached:
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best || acquire_in_progress)
goto found;
@@ -1495,7 +1481,7 @@ cached:
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
found:
@@ -3077,20 +3063,17 @@ void xfrm_flush_gc(void)
}
EXPORT_SYMBOL(xfrm_flush_gc);
-/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
-void xfrm_state_delete_tunnel(struct xfrm_state *x)
+static void xfrm_state_delete_tunnel(struct xfrm_state *x)
{
if (x->tunnel) {
struct xfrm_state *t = x->tunnel;
- if (atomic_read(&t->tunnel_users) == 2)
+ if (atomic_dec_return(&t->tunnel_users) == 1)
xfrm_state_delete(t);
- atomic_dec(&t->tunnel_users);
- xfrm_state_put_sync(t);
+ xfrm_state_put(t);
x->tunnel = NULL;
}
}
-EXPORT_SYMBOL(xfrm_state_delete_tunnel);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
@@ -3295,8 +3278,8 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&xfrm_state_gc_work);
- xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 59f258daf830..684239018bec 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -977,6 +977,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ xfrm_set_type_offload(x, attrs[XFRMA_OFFLOAD_DEV]);
/* configure the hardware if offload is requested */
if (attrs[XFRMA_OFFLOAD_DEV]) {
err = xfrm_dev_state_add(net, x,
@@ -2634,7 +2635,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
int err;
- err = xfrm_state_flush(net, p->proto, true, false);
+ err = xfrm_state_flush(net, p->proto, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;