summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/core/dev_ioctl.c8
-rw-r--r--net/core/timestamping.c5
-rw-r--r--net/dsa/Kconfig6
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/tag_8021q.c84
-rw-r--r--net/dsa/tag_8021q.h7
-rw-r--r--net/dsa/tag_ocelot_8021q.c2
-rw-r--r--net/dsa/tag_sja1105.c72
-rw-r--r--net/dsa/tag_vsc73xx_8021q.c68
-rw-r--r--net/dsa/user.c2
-rw-r--r--net/ethtool/common.c11
-rw-r--r--net/ethtool/common.h2
-rw-r--r--net/ethtool/ioctl.c14
-rw-r--r--net/ethtool/tsinfo.c6
-rw-r--r--net/ipv4/esp4.c8
-rw-r--r--net/ipv4/esp4_offload.c17
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/xfrm/Makefile3
-rw-r--r--net/xfrm/xfrm_compat.c6
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_nat_keepalive.c292
-rw-r--r--net/xfrm/xfrm_policy.c13
-rw-r--r--net/xfrm/xfrm_state.c17
-rw-r--r--net/xfrm/xfrm_user.c15
28 files changed, 575 insertions, 105 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3efba4f857ac..217be32426b5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -677,7 +677,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
}
static int vlan_ethtool_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *info)
+ struct kernel_ethtool_ts_info *info)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b9719ed3c3fd..8592c052c0f4 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -259,9 +259,7 @@ static int dev_eth_ioctl(struct net_device *dev,
* @dev: Network device
* @cfg: Timestamping configuration structure
*
- * Helper for enforcing a common policy that phylib timestamping, if available,
- * should take precedence in front of hardware timestamping provided by the
- * netdev.
+ * Helper for calling the default hardware provider timestamping.
*
* Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and
* there only exists a phydev->mii_ts->hwtstamp() method. So this will return
@@ -271,7 +269,7 @@ static int dev_eth_ioctl(struct net_device *dev,
static int dev_get_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg)
{
- if (phy_has_hwtstamp(dev->phydev))
+ if (phy_is_default_hwtstamp(dev->phydev))
return phy_hwtstamp_get(dev->phydev, cfg);
return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
@@ -327,7 +325,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
- bool phy_ts = phy_has_hwtstamp(dev->phydev);
+ bool phy_ts = phy_is_default_hwtstamp(dev->phydev);
struct kernel_hwtstamp_config old_cfg = {};
bool changed = false;
int err;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 04840697fe79..3717fb152ecc 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -25,7 +25,8 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
struct sk_buff *clone;
unsigned int type;
- if (!skb->sk)
+ if (!skb->sk || !skb->dev ||
+ !phy_is_default_hwtstamp(skb->dev->phydev))
return;
type = classify(skb);
@@ -47,7 +48,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
struct mii_timestamper *mii_ts;
unsigned int type;
- if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts)
+ if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev))
return false;
if (skb_headroom(skb) < ETH_HLEN)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 8d5bf869eb14..2dfe9063613f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -166,6 +166,12 @@ config NET_DSA_TAG_TRAILER
Say Y or M if you want to enable support for tagging frames at
with a trailed. e.g. Marvell 88E6060.
+config NET_DSA_TAG_VSC73XX_8021Q
+ tristate "Tag driver for Microchip/Vitesse VSC73xx family of switches, using VLAN"
+ help
+ Say Y or M if you want to enable support for tagging frames with a
+ custom VLAN-based header.
+
config NET_DSA_TAG_XRS700X
tristate "Tag driver for XRS700x switches"
help
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 8a1894a42552..555c07cfeb71 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+obj-$(CONFIG_NET_DSA_TAG_VSC73XX_8021Q) += tag_vsc73xx_8021q.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
# for tracing framework to find trace.h
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 71b26ae6db39..3ee53e28ec2e 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -286,7 +286,8 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
* be used for VLAN-unaware bridging.
*/
int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
+ struct dsa_bridge bridge, bool *tx_fwd_offload,
+ struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_to_port(ds, port);
u16 standalone_vid, bridge_vid;
@@ -304,6 +305,8 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false);
+ *tx_fwd_offload = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join);
@@ -468,8 +471,8 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
- int vbid)
+static struct net_device *
+dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid)
{
struct dsa_port *cpu_dp = conduit->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
@@ -495,30 +498,91 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
return NULL;
}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid);
+struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit,
+ int source_port, int switch_id,
+ int vid, int vbid)
+{
+ /* Always prefer precise source port information, if available */
+ if (source_port != -1 && switch_id != -1)
+ return dsa_conduit_find_user(conduit, switch_id, source_port);
+ else if (vbid >= 1)
+ return dsa_tag_8021q_find_port_by_vbid(conduit, vbid);
+
+ return dsa_find_designated_bridge_port_by_vid(conduit, vid);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_user);
+
+/**
+ * dsa_8021q_rcv - Decode source information from tag_8021q header
+ * @skb: RX socket buffer
+ * @source_port: pointer to storage for precise source port information.
+ * If this is known already from outside tag_8021q, the pre-initialized
+ * value is preserved. If not known, pass -1.
+ * @switch_id: similar to source_port.
+ * @vbid: pointer to storage for imprecise bridge ID. Must be pre-initialized
+ * with -1. If a positive value is returned, the source_port and switch_id
+ * are invalid.
+ * @vid: pointer to storage for original VID, in case tag_8021q decoding failed.
+ *
+ * If the packet has a tag_8021q header, decode it and set @source_port,
+ * @switch_id and @vbid, and strip the header. Otherwise set @vid and keep the
+ * header in the hwaccel area of the packet.
+ */
void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *vbid)
+ int *vbid, int *vid)
{
- u16 vid, tci;
+ int tmp_source_port, tmp_switch_id, tmp_vbid;
+ __be16 vlan_proto;
+ u16 tmp_vid, tci;
if (skb_vlan_tag_present(skb)) {
+ vlan_proto = skb->vlan_proto;
tci = skb_vlan_tag_get(skb);
__vlan_hwaccel_clear_tag(skb);
} else {
+ struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+
+ vlan_proto = hdr->h_vlan_proto;
skb_push_rcsum(skb, ETH_HLEN);
__skb_vlan_pop(skb, &tci);
skb_pull_rcsum(skb, ETH_HLEN);
}
- vid = tci & VLAN_VID_MASK;
+ tmp_vid = tci & VLAN_VID_MASK;
+ if (!vid_is_dsa_8021q(tmp_vid)) {
+ /* Not a tag_8021q frame, so return the VID to the
+ * caller for further processing, and put the tag back
+ */
+ if (vid)
+ *vid = tmp_vid;
+
+ __vlan_hwaccel_put_tag(skb, vlan_proto, tci);
+
+ return;
+ }
- *source_port = dsa_8021q_rx_source_port(vid);
- *switch_id = dsa_8021q_rx_switch_id(vid);
+ tmp_source_port = dsa_8021q_rx_source_port(tmp_vid);
+ tmp_switch_id = dsa_8021q_rx_switch_id(tmp_vid);
+ tmp_vbid = dsa_tag_8021q_rx_vbid(tmp_vid);
+
+ /* Precise source port information is unknown when receiving from a
+ * VLAN-unaware bridging domain, and tmp_source_port and tmp_switch_id
+ * are zeroes in this case.
+ *
+ * Preserve the source information from hardware-specific mechanisms,
+ * if available. This allows us to not overwrite a valid source port
+ * and switch ID with less precise values.
+ */
+ if (tmp_vbid == 0 && *source_port == -1)
+ *source_port = tmp_source_port;
+ if (tmp_vbid == 0 && *switch_id == -1)
+ *switch_id = tmp_switch_id;
if (vbid)
- *vbid = dsa_tag_8021q_rx_vbid(vid);
+ *vbid = tmp_vbid;
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ return;
}
EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h
index 41f7167ac520..27b8906f99ec 100644
--- a/net/dsa/tag_8021q.h
+++ b/net/dsa/tag_8021q.h
@@ -14,10 +14,11 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
u16 tpid, u16 tci);
void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *vbid);
+ int *vbid, int *vid);
-struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit,
- int vbid);
+struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit,
+ int source_port, int switch_id,
+ int vid, int vbid);
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_tag_8021q_vlan_info *info);
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index b059381310fe..8e8b1bef6af6 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -81,7 +81,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
{
int src_port, switch_id;
- dsa_8021q_rcv(skb, &src_port, &switch_id, NULL);
+ dsa_8021q_rcv(skb, &src_port, &switch_id, NULL, NULL);
skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port);
if (!skb->dev)
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 1aba1d05c27a..3e902af7eea6 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -472,37 +472,14 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110;
}
-/* If the VLAN in the packet is a tag_8021q one, set @source_port and
- * @switch_id and strip the header. Otherwise set @vid and keep it in the
- * packet.
- */
-static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
- int *switch_id, int *vbid, u16 *vid)
-{
- struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
- u16 vlan_tci;
-
- if (skb_vlan_tag_present(skb))
- vlan_tci = skb_vlan_tag_get(skb);
- else
- vlan_tci = ntohs(hdr->h_vlan_TCI);
-
- if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK))
- return dsa_8021q_rcv(skb, source_port, switch_id, vbid);
-
- /* Try our best with imprecise RX */
- *vid = vlan_tci & VLAN_VID_MASK;
-}
-
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1, vbid = -1;
+ int source_port = -1, switch_id = -1, vbid = -1, vid = -1;
struct sja1105_meta meta = {0};
struct ethhdr *hdr;
bool is_link_local;
bool is_meta;
- u16 vid;
hdr = eth_hdr(skb);
is_link_local = sja1105_is_link_local(skb);
@@ -524,37 +501,16 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
/* Normal data plane traffic and link-local frames are tagged with
* a tag_8021q VLAN which we have to strip
*/
- if (sja1105_skb_has_tag_8021q(skb)) {
- int tmp_source_port = -1, tmp_switch_id = -1;
-
- sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vbid,
- &vid);
- /* Preserve the source information from the INCL_SRCPT option,
- * if available. This allows us to not overwrite a valid source
- * port and switch ID with zeroes when receiving link-local
- * frames from a VLAN-unaware bridged port (non-zero vbid) or a
- * VLAN-aware bridged port (non-zero vid). Furthermore, the
- * tag_8021q source port information is only of trust when the
- * vbid is 0 (precise port). Otherwise, tmp_source_port and
- * tmp_switch_id will be zeroes.
- */
- if (vbid == 0 && source_port == -1)
- source_port = tmp_source_port;
- if (vbid == 0 && switch_id == -1)
- switch_id = tmp_switch_id;
- } else if (source_port == -1 && switch_id == -1) {
+ if (sja1105_skb_has_tag_8021q(skb))
+ dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid);
+ else if (source_port == -1 && switch_id == -1)
/* Packets with no source information have no chance of
* getting accepted, drop them straight away.
*/
return NULL;
- }
- if (source_port != -1 && switch_id != -1)
- skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port);
- else if (vbid >= 1)
- skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
- else
- skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+ skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id,
+ vid, vbid);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
@@ -677,9 +633,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1, vbid = -1;
+ int source_port = -1, switch_id = -1, vbid = -1, vid = -1;
bool host_only = false;
- u16 vid = 0;
if (sja1110_skb_has_inband_control_extension(skb)) {
skb = sja1110_rcv_inband_control_extension(skb, &source_port,
@@ -691,14 +646,11 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
/* Packets with in-band control extensions might still have RX VLANs */
if (likely(sja1105_skb_has_tag_8021q(skb)))
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
-
- if (vbid >= 1)
- skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
- else if (source_port == -1 || switch_id == -1)
- skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
- else
- skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port);
+ dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid);
+
+ skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id,
+ vid, vbid);
+
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
diff --git a/net/dsa/tag_vsc73xx_8021q.c b/net/dsa/tag_vsc73xx_8021q.c
new file mode 100644
index 000000000000..af121a9aff7f
--- /dev/null
+++ b/net/dsa/tag_vsc73xx_8021q.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright (C) 2024 Pawel Dembicki <paweldembicki@gmail.com>
+ */
+#include <linux/dsa/8021q.h>
+
+#include "tag.h"
+#include "tag_8021q.h"
+
+#define VSC73XX_8021Q_NAME "vsc73xx-8021q"
+
+static struct sk_buff *
+vsc73xx_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct dsa_port *dp = dsa_user_to_port(netdev);
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
+ u8 pcp;
+
+ if (skb->offload_fwd_mark) {
+ unsigned int bridge_num = dsa_port_bridge_num_get(dp);
+ struct net_device *br = dsa_port_bridge_dev_get(dp);
+
+ if (br_vlan_enabled(br))
+ return skb;
+
+ tx_vid = dsa_tag_8021q_bridge_vid(bridge_num);
+ }
+
+ pcp = netdev_txq_to_tc(netdev, queue_mapping);
+
+ return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
+ ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+}
+
+static struct sk_buff *
+vsc73xx_rcv(struct sk_buff *skb, struct net_device *netdev)
+{
+ int src_port = -1, switch_id = -1, vbid = -1, vid = -1;
+
+ dsa_8021q_rcv(skb, &src_port, &switch_id, &vbid, &vid);
+
+ skb->dev = dsa_tag_8021q_find_user(netdev, src_port, switch_id,
+ vid, vbid);
+ if (!skb->dev) {
+ dev_warn_ratelimited(&netdev->dev,
+ "Couldn't decode source port\n");
+ return NULL;
+ }
+
+ dsa_default_offload_fwd_mark(skb);
+
+ return skb;
+}
+
+static const struct dsa_device_ops vsc73xx_8021q_netdev_ops = {
+ .name = VSC73XX_8021Q_NAME,
+ .proto = DSA_TAG_PROTO_VSC73XX_8021Q,
+ .xmit = vsc73xx_xmit,
+ .rcv = vsc73xx_rcv,
+ .needed_headroom = VLAN_HLEN,
+ .promisc_on_conduit = true,
+};
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DSA tag driver for VSC73XX family of switches, using VLAN");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_VSC73XX_8021Q, VSC73XX_8021Q_NAME);
+
+module_dsa_tag_driver(vsc73xx_8021q_netdev_ops);
diff --git a/net/dsa/user.c b/net/dsa/user.c
index e8f56a40b614..f5adfa1d978a 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -1729,7 +1729,7 @@ static int dsa_user_set_rxnfc(struct net_device *dev,
}
static int dsa_user_get_ts_info(struct net_device *dev,
- struct ethtool_ts_info *ts)
+ struct kernel_ethtool_ts_info *ts)
{
struct dsa_user_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 67d06cd002a5..07032babd1b6 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -211,6 +211,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(10, T1S, Full),
__DEFINE_LINK_MODE_NAME(10, T1S, Half),
__DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
+ __DEFINE_LINK_MODE_NAME(10, T1BRR, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -251,6 +252,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1S_P2MP 1
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
+#define __LINK_MODE_LANES_T1BRR 1
#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
[ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
@@ -374,6 +376,7 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(10, T1S, Full),
__DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
__DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -658,7 +661,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops)
return 0;
}
-int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
@@ -666,7 +669,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
memset(info, 0, sizeof(*info));
info->cmd = ETHTOOL_GET_TS_INFO;
- if (phy_has_tsinfo(phydev))
+ if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev))
return phy_ts_info(phydev, info);
if (ops->get_ts_info)
return ops->get_ts_info(dev, info);
@@ -680,7 +683,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
{
- struct ethtool_ts_info info = { };
+ struct kernel_ethtool_ts_info info = { };
int num = 0;
if (!__ethtool_get_ts_info(dev, &info))
@@ -690,7 +693,7 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
}
EXPORT_SYMBOL(ethtool_get_phc_vclocks);
-int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info)
{
return __ethtool_get_ts_info(dev, info);
}
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index b55705a9ad5a..863806fcf01a 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -44,7 +44,7 @@ bool convert_legacy_settings_to_link_ksettings(
const struct ethtool_cmd *legacy_settings);
u32 ethtool_get_max_rxfh_channel(struct net_device *dev);
int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max);
-int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info);
+int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
extern const struct ethtool_pse_ops *ethtool_pse_ops;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 5ff128eacc9b..983fee76f5cf 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -65,7 +65,8 @@ u32 ethtool_op_get_link(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_link);
-int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+int ethtool_op_get_ts_info(struct net_device *dev,
+ struct kernel_ethtool_ts_info *info)
{
info->so_timestamping =
SOF_TIMESTAMPING_TX_SOFTWARE |
@@ -2594,13 +2595,20 @@ out:
static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
{
- struct ethtool_ts_info info;
+ struct kernel_ethtool_ts_info kernel_info;
+ struct ethtool_ts_info info = {};
int err;
- err = __ethtool_get_ts_info(dev, &info);
+ err = __ethtool_get_ts_info(dev, &kernel_info);
if (err)
return err;
+ info.cmd = kernel_info.cmd;
+ info.so_timestamping = kernel_info.so_timestamping;
+ info.phc_index = kernel_info.phc_index;
+ info.tx_types = kernel_info.tx_types;
+ info.rx_filters = kernel_info.rx_filters;
+
if (copy_to_user(useraddr, &info, sizeof(info)))
return -EFAULT;
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 57d496287e52..03d12d6f79ca 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -12,7 +12,7 @@ struct tsinfo_req_info {
struct tsinfo_reply_data {
struct ethnl_reply_data base;
- struct ethtool_ts_info ts_info;
+ struct kernel_ethtool_ts_info ts_info;
struct ethtool_ts_stats stats;
};
@@ -55,7 +55,7 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base,
{
const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
- const struct ethtool_ts_info *ts_info = &data->ts_info;
+ const struct kernel_ethtool_ts_info *ts_info = &data->ts_info;
int len = 0;
int ret;
@@ -136,7 +136,7 @@ static int tsinfo_fill_reply(struct sk_buff *skb,
{
const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
- const struct ethtool_ts_info *ts_info = &data->ts_info;
+ const struct kernel_ethtool_ts_info *ts_info = &data->ts_info;
int ret;
if (ts_info->so_timestamping) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 3968d3f98e08..73981595f062 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -349,6 +349,7 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
{
struct udphdr *uh;
unsigned int len;
+ struct xfrm_offload *xo = xfrm_offload(skb);
len = skb->len + esp->tailen - skb_transport_offset(skb);
if (len + sizeof(struct iphdr) > IP_MAX_MTU)
@@ -360,7 +361,12 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
uh->len = htons(len);
uh->check = 0;
- *skb_mac_header(skb) = IPPROTO_UDP;
+ /* For IPv4 ESP with UDP encapsulation, if xo is not null, the skb is in the crypto offload
+ * data path, which means that esp_output_udp_encap is called outside of the XFRM stack.
+ * In this case, the mac header doesn't point to the IPv4 protocol field, so don't set it.
+ */
+ if (!xo || encap_type != UDP_ENCAP_ESPINUDP)
+ *skb_mac_header(skb) = IPPROTO_UDP;
return (struct ip_esp_hdr *)(uh + 1);
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index b3271957ad9a..a37d18858c72 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -264,6 +264,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
struct esp_info esp;
bool hw_offload = true;
__u32 seq;
+ int encap_type = 0;
esp.inplace = true;
@@ -296,8 +297,10 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.esph = ip_esp_hdr(skb);
+ if (x->encap)
+ encap_type = x->encap->encap_type;
- if (!hw_offload || !skb_is_gso(skb)) {
+ if (!hw_offload || !skb_is_gso(skb) || (hw_offload && encap_type == UDP_ENCAP_ESPINUDP)) {
esp.nfrags = esp_output_head(x, skb, &esp);
if (esp.nfrags < 0)
return esp.nfrags;
@@ -324,6 +327,18 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
+ if (hw_offload && encap_type == UDP_ENCAP_ESPINUDP) {
+ /* In the XFRM stack, the encapsulation protocol is set to iphdr->protocol by
+ * setting *skb_mac_header(skb) (see esp_output_udp_encap()) where skb->mac_header
+ * points to iphdr->protocol (see xfrm4_tunnel_encap_add()).
+ * However, in esp_xmit(), skb->mac_header doesn't point to iphdr->protocol.
+ * Therefore, the protocol field needs to be corrected.
+ */
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
+
+ esph->seq_no = htonl(seq);
+ }
+
ip_hdr(skb)->tot_len = htons(skb->len);
ip_send_check(ip_hdr(skb));
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e03fb9a1dbeb..90d2c7e3f5e9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1060,6 +1060,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
.ipv6_fragment = ip6_fragment,
.ipv6_dev_find = ipv6_dev_find,
+ .ip6_xmit = ip6_xmit,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 2f1ea5f999a2..b1d81c4270ab 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -290,8 +290,14 @@ int __init xfrm6_init(void)
ret = register_pernet_subsys(&xfrm6_net_ops);
if (ret)
goto out_protocol;
+
+ ret = xfrm_nat_keepalive_init(AF_INET6);
+ if (ret)
+ goto out_nat_keepalive;
out:
return ret;
+out_nat_keepalive:
+ unregister_pernet_subsys(&xfrm6_net_ops);
out_protocol:
xfrm6_protocol_fini();
out_state:
@@ -303,6 +309,7 @@ out_policy:
void xfrm6_fini(void)
{
+ xfrm_nat_keepalive_fini(AF_INET6);
unregister_pernet_subsys(&xfrm6_net_ops);
xfrm6_protocol_fini();
xfrm6_policy_fini();
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index b284a06b5a75..cc2df9f8c14a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1610,7 +1610,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_ts_info info = {
+ struct kernel_ethtool_ts_info info = {
.cmd = ETHTOOL_GET_TS_INFO,
.phc_index = -1,
};
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 547cec77ba03..512e0b2f8514 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -13,7 +13,8 @@ endif
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
- xfrm_sysctl.o xfrm_replay.o xfrm_device.o
+ xfrm_sysctl.o xfrm_replay.o xfrm_device.o \
+ xfrm_nat_keepalive.o
obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 703d4172c7d7..91357ccaf4af 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -131,6 +131,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
+ [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -280,9 +281,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_IF_ID:
case XFRMA_MTIMER_THRESH:
case XFRMA_SA_DIR:
+ case XFRMA_NAT_KEEPALIVE_INTERVAL:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -437,7 +439,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 2455a76a1cff..9a44d363ba62 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -261,9 +261,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
- /* We don't yet support UDP encapsulation and TFC padding. */
- if ((!is_packet_offload && x->encap) || x->tfcpad) {
- NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
+ /* We don't yet support TFC padding. */
+ if (x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded");
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index d2ea18dcb0cb..7cee9c0a2cdc 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -471,7 +471,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
- if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
+ if (encap_type < 0 || (xo && (xo->flags & XFRM_GRO || encap_type == 0 ||
+ encap_type == UDP_ENCAP_ESPINUDP))) {
x = xfrm_input_state(skb);
if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c
new file mode 100644
index 000000000000..82f0a301683f
--- /dev/null
+++ b/net/xfrm/xfrm_nat_keepalive.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xfrm_nat_keepalive.c
+ *
+ * (c) 2024 Eyal Birger <eyal.birger@gmail.com>
+ */
+
+#include <net/inet_common.h>
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+
+static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6);
+#endif
+
+struct nat_keepalive {
+ struct net *net;
+ u16 family;
+ xfrm_address_t saddr;
+ xfrm_address_t daddr;
+ __be16 encap_sport;
+ __be16 encap_dport;
+ __u32 smark;
+};
+
+static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x)
+{
+ ka->net = xs_net(x);
+ ka->family = x->props.family;
+ ka->saddr = x->props.saddr;
+ ka->daddr = x->id.daddr;
+ ka->encap_sport = x->encap->encap_sport;
+ ka->encap_dport = x->encap->encap_dport;
+ ka->smark = xfrm_smark_get(0, x);
+}
+
+static int nat_keepalive_send_ipv4(struct sk_buff *skb,
+ struct nat_keepalive *ka)
+{
+ struct net *net = ka->net;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ struct sock *sk;
+ __u8 tos = 0;
+ int err;
+
+ flowi4_init_output(&fl4, 0 /* oif */, skb->mark, tos,
+ RT_SCOPE_UNIVERSE, IPPROTO_UDP, 0,
+ ka->daddr.a4, ka->saddr.a4, ka->encap_dport,
+ ka->encap_sport, sock_net_uid(net, NULL));
+
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ skb_dst_set(skb, &rt->dst);
+
+ sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4);
+ sock_net_set(sk, net);
+ err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos);
+ sock_net_set(sk, &init_net);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int nat_keepalive_send_ipv6(struct sk_buff *skb,
+ struct nat_keepalive *ka,
+ struct udphdr *uh)
+{
+ struct net *net = ka->net;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct sock *sk;
+ __wsum csum;
+ int err;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+ uh->check = csum_ipv6_magic(&ka->saddr.in6, &ka->daddr.in6,
+ skb->len, IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_mark = skb->mark;
+ fl6.saddr = ka->saddr.in6;
+ fl6.daddr = ka->daddr.in6;
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.fl6_sport = ka->encap_sport;
+ fl6.fl6_dport = ka->encap_dport;
+
+ sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6);
+ sock_net_set(sk, net);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ skb_dst_set(skb, dst);
+ err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0);
+ sock_net_set(sk, &init_net);
+ return err;
+}
+#endif
+
+static void nat_keepalive_send(struct nat_keepalive *ka)
+{
+ const int nat_ka_hdrs_len = max(sizeof(struct iphdr),
+ sizeof(struct ipv6hdr)) +
+ sizeof(struct udphdr);
+ const u8 nat_ka_payload = 0xFF;
+ int err = -EAFNOSUPPORT;
+ struct sk_buff *skb;
+ struct udphdr *uh;
+
+ skb = alloc_skb(nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+
+ skb_reserve(skb, nat_ka_hdrs_len);
+
+ skb_put_u8(skb, nat_ka_payload);
+
+ uh = skb_push(skb, sizeof(*uh));
+ uh->source = ka->encap_sport;
+ uh->dest = ka->encap_dport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ skb->mark = ka->smark;
+
+ switch (ka->family) {
+ case AF_INET:
+ err = nat_keepalive_send_ipv4(skb, ka);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ err = nat_keepalive_send_ipv6(skb, ka, uh);
+ break;
+#endif
+ }
+ if (err)
+ kfree_skb(skb);
+}
+
+struct nat_keepalive_work_ctx {
+ time64_t next_run;
+ time64_t now;
+};
+
+static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr)
+{
+ struct nat_keepalive_work_ctx *ctx = ptr;
+ bool send_keepalive = false;
+ struct nat_keepalive ka;
+ time64_t next_run;
+ u32 interval;
+ int delta;
+
+ interval = x->nat_keepalive_interval;
+ if (!interval)
+ return 0;
+
+ spin_lock(&x->lock);
+
+ delta = (int)(ctx->now - x->lastused);
+ if (delta < interval) {
+ x->nat_keepalive_expiration = ctx->now + interval - delta;
+ next_run = x->nat_keepalive_expiration;
+ } else if (x->nat_keepalive_expiration > ctx->now) {
+ next_run = x->nat_keepalive_expiration;
+ } else {
+ next_run = ctx->now + interval;
+ nat_keepalive_init(&ka, x);
+ send_keepalive = true;
+ }
+
+ spin_unlock(&x->lock);
+
+ if (send_keepalive)
+ nat_keepalive_send(&ka);
+
+ if (!ctx->next_run || next_run < ctx->next_run)
+ ctx->next_run = next_run;
+ return 0;
+}
+
+static void nat_keepalive_work(struct work_struct *work)
+{
+ struct nat_keepalive_work_ctx ctx;
+ struct xfrm_state_walk walk;
+ struct net *net;
+
+ ctx.next_run = 0;
+ ctx.now = ktime_get_real_seconds();
+
+ net = container_of(work, struct net, xfrm.nat_keepalive_work.work);
+ xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL);
+ xfrm_state_walk(net, &walk, nat_keepalive_work_single, &ctx);
+ xfrm_state_walk_done(&walk, net);
+ if (ctx.next_run)
+ schedule_delayed_work(&net->xfrm.nat_keepalive_work,
+ (ctx.next_run - ctx.now) * HZ);
+}
+
+static int nat_keepalive_sk_init(struct sock * __percpu *socks,
+ unsigned short family)
+{
+ struct sock *sk;
+ int err, i;
+
+ for_each_possible_cpu(i) {
+ err = inet_ctl_sock_create(&sk, family, SOCK_RAW, IPPROTO_UDP,
+ &init_net);
+ if (err < 0)
+ goto err;
+
+ *per_cpu_ptr(socks, i) = sk;
+ }
+
+ return 0;
+err:
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(*per_cpu_ptr(socks, i));
+ return err;
+}
+
+static void nat_keepalive_sk_fini(struct sock * __percpu *socks)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(*per_cpu_ptr(socks, i));
+}
+
+void xfrm_nat_keepalive_state_updated(struct xfrm_state *x)
+{
+ struct net *net;
+
+ if (!x->nat_keepalive_interval)
+ return;
+
+ net = xs_net(x);
+ schedule_delayed_work(&net->xfrm.nat_keepalive_work, 0);
+}
+
+int __net_init xfrm_nat_keepalive_net_init(struct net *net)
+{
+ INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work);
+ return 0;
+}
+
+int xfrm_nat_keepalive_net_fini(struct net *net)
+{
+ cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work);
+ return 0;
+}
+
+int xfrm_nat_keepalive_init(unsigned short family)
+{
+ int err = -EAFNOSUPPORT;
+
+ switch (family) {
+ case AF_INET:
+ err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv6, PF_INET6);
+ break;
+#endif
+ }
+
+ if (err)
+ pr_err("xfrm nat keepalive init: failed to init err:%d\n", err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init);
+
+void xfrm_nat_keepalive_fini(unsigned short family)
+{
+ switch (family) {
+ case AF_INET:
+ nat_keepalive_sk_fini(&nat_keepalive_sk_ipv4);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ nat_keepalive_sk_fini(&nat_keepalive_sk_ipv6);
+ break;
+#endif
+ }
+}
+EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 66e07de2de35..b35e11d80c27 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3718,12 +3718,15 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
if (!pol) {
+ const bool is_crypto_offload = sp &&
+ (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO);
+
if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
}
- if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
+ if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
@@ -4284,8 +4287,14 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out_sysctl;
+ rv = xfrm_nat_keepalive_net_init(net);
+ if (rv < 0)
+ goto out_nat_keepalive;
+
return 0;
+out_nat_keepalive:
+ xfrm_sysctl_fini(net);
out_sysctl:
xfrm_policy_fini(net);
out_policy:
@@ -4298,6 +4307,7 @@ out_statistics:
static void __net_exit xfrm_net_exit(struct net *net)
{
+ xfrm_nat_keepalive_net_fini(net);
xfrm_sysctl_fini(net);
xfrm_policy_fini(net);
xfrm_state_fini(net);
@@ -4359,6 +4369,7 @@ void __init xfrm_init(void)
#endif
register_xfrm_state_bpf();
+ xfrm_nat_keepalive_init(AF_INET);
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 649bb739df0d..abadc857cd45 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -715,6 +715,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
+ xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
if (x->encap_sk)
@@ -1453,6 +1454,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL);
+ xfrm_nat_keepalive_state_updated(x);
}
/* net->xfrm.xfrm_state_lock is held */
@@ -2871,6 +2873,21 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
goto error;
}
+ if (x->nat_keepalive_interval) {
+ if (x->dir != XFRM_SA_DIR_OUT) {
+ NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs");
+ err = -EINVAL;
+ goto error;
+ }
+
+ if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
+ NL_SET_ERR_MSG(extack,
+ "NAT keepalive is only supported for UDP encapsulation");
+ err = -EINVAL;
+ goto error;
+ }
+ }
+
error:
return err;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e83c687bd64e..a552cfa623ea 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -833,6 +833,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (attrs[XFRMA_SA_DIR])
x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]);
+ if (attrs[XFRMA_NAT_KEEPALIVE_INTERVAL])
+ x->nat_keepalive_interval =
+ nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]);
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -1288,6 +1292,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
}
if (x->dir)
ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
+
+ if (x->nat_keepalive_interval) {
+ ret = nla_put_u32(skb, XFRMA_NAT_KEEPALIVE_INTERVAL,
+ x->nat_keepalive_interval);
+ if (ret)
+ goto out;
+ }
out:
return ret;
}
@@ -3165,6 +3176,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_IF_ID] = { .type = NLA_U32 },
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
+ [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3474,6 +3486,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
if (x->dir)
l += nla_total_size(sizeof(x->dir));
+ if (x->nat_keepalive_interval)
+ l += nla_total_size(sizeof(x->nat_keepalive_interval));
+
return l;
}