diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/8021q/vlan_dev.c | 2 | ||||
| -rw-r--r-- | net/core/dev_ioctl.c | 8 | ||||
| -rw-r--r-- | net/core/timestamping.c | 5 | ||||
| -rw-r--r-- | net/dsa/Kconfig | 6 | ||||
| -rw-r--r-- | net/dsa/Makefile | 1 | ||||
| -rw-r--r-- | net/dsa/tag_8021q.c | 84 | ||||
| -rw-r--r-- | net/dsa/tag_8021q.h | 7 | ||||
| -rw-r--r-- | net/dsa/tag_ocelot_8021q.c | 2 | ||||
| -rw-r--r-- | net/dsa/tag_sja1105.c | 72 | ||||
| -rw-r--r-- | net/dsa/tag_vsc73xx_8021q.c | 68 | ||||
| -rw-r--r-- | net/dsa/user.c | 2 | ||||
| -rw-r--r-- | net/ethtool/common.c | 11 | ||||
| -rw-r--r-- | net/ethtool/common.h | 2 | ||||
| -rw-r--r-- | net/ethtool/ioctl.c | 14 | ||||
| -rw-r--r-- | net/ethtool/tsinfo.c | 6 | ||||
| -rw-r--r-- | net/ipv4/esp4.c | 8 | ||||
| -rw-r--r-- | net/ipv4/esp4_offload.c | 17 | ||||
| -rw-r--r-- | net/ipv6/af_inet6.c | 1 | ||||
| -rw-r--r-- | net/ipv6/xfrm6_policy.c | 7 | ||||
| -rw-r--r-- | net/sched/sch_taprio.c | 2 | ||||
| -rw-r--r-- | net/xfrm/Makefile | 3 | ||||
| -rw-r--r-- | net/xfrm/xfrm_compat.c | 6 | ||||
| -rw-r--r-- | net/xfrm/xfrm_device.c | 6 | ||||
| -rw-r--r-- | net/xfrm/xfrm_input.c | 3 | ||||
| -rw-r--r-- | net/xfrm/xfrm_nat_keepalive.c | 292 | ||||
| -rw-r--r-- | net/xfrm/xfrm_policy.c | 13 | ||||
| -rw-r--r-- | net/xfrm/xfrm_state.c | 17 | ||||
| -rw-r--r-- | net/xfrm/xfrm_user.c | 15 |
28 files changed, 575 insertions, 105 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3efba4f857ac..217be32426b5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -677,7 +677,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, } static int vlan_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return ethtool_get_ts_info_by_layer(vlan->real_dev, info); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b9719ed3c3fd..8592c052c0f4 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -259,9 +259,7 @@ static int dev_eth_ioctl(struct net_device *dev, * @dev: Network device * @cfg: Timestamping configuration structure * - * Helper for enforcing a common policy that phylib timestamping, if available, - * should take precedence in front of hardware timestamping provided by the - * netdev. + * Helper for calling the default hardware provider timestamping. * * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and * there only exists a phydev->mii_ts->hwtstamp() method. So this will return @@ -271,7 +269,7 @@ static int dev_eth_ioctl(struct net_device *dev, static int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { - if (phy_has_hwtstamp(dev->phydev)) + if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); @@ -327,7 +325,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - bool phy_ts = phy_has_hwtstamp(dev->phydev); + bool phy_ts = phy_is_default_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; bool changed = false; int err; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 04840697fe79..3717fb152ecc 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -25,7 +25,8 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) struct sk_buff *clone; unsigned int type; - if (!skb->sk) + if (!skb->sk || !skb->dev || + !phy_is_default_hwtstamp(skb->dev->phydev)) return; type = classify(skb); @@ -47,7 +48,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) struct mii_timestamper *mii_ts; unsigned int type; - if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts) + if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) return false; if (skb_headroom(skb) < ETH_HLEN) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8d5bf869eb14..2dfe9063613f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -166,6 +166,12 @@ config NET_DSA_TAG_TRAILER Say Y or M if you want to enable support for tagging frames at with a trailed. e.g. Marvell 88E6060. +config NET_DSA_TAG_VSC73XX_8021Q + tristate "Tag driver for Microchip/Vitesse VSC73xx family of switches, using VLAN" + help + Say Y or M if you want to enable support for tagging frames with a + custom VLAN-based header. + config NET_DSA_TAG_XRS700X tristate "Tag driver for XRS700x switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8a1894a42552..555c07cfeb71 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +obj-$(CONFIG_NET_DSA_TAG_VSC73XX_8021Q) += tag_vsc73xx_8021q.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o # for tracing framework to find trace.h diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 71b26ae6db39..3ee53e28ec2e 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -286,7 +286,8 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, * be used for VLAN-unaware bridging. */ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_to_port(ds, port); u16 standalone_vid, bridge_vid; @@ -304,6 +305,8 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false); + *tx_fwd_offload = true; + return 0; } EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join); @@ -468,8 +471,8 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, - int vbid) +static struct net_device * +dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid) { struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; @@ -495,30 +498,91 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, return NULL; } -EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid); +struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit, + int source_port, int switch_id, + int vid, int vbid) +{ + /* Always prefer precise source port information, if available */ + if (source_port != -1 && switch_id != -1) + return dsa_conduit_find_user(conduit, switch_id, source_port); + else if (vbid >= 1) + return dsa_tag_8021q_find_port_by_vbid(conduit, vbid); + + return dsa_find_designated_bridge_port_by_vid(conduit, vid); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_user); + +/** + * dsa_8021q_rcv - Decode source information from tag_8021q header + * @skb: RX socket buffer + * @source_port: pointer to storage for precise source port information. + * If this is known already from outside tag_8021q, the pre-initialized + * value is preserved. If not known, pass -1. + * @switch_id: similar to source_port. + * @vbid: pointer to storage for imprecise bridge ID. Must be pre-initialized + * with -1. If a positive value is returned, the source_port and switch_id + * are invalid. + * @vid: pointer to storage for original VID, in case tag_8021q decoding failed. + * + * If the packet has a tag_8021q header, decode it and set @source_port, + * @switch_id and @vbid, and strip the header. Otherwise set @vid and keep the + * header in the hwaccel area of the packet. + */ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *vbid) + int *vbid, int *vid) { - u16 vid, tci; + int tmp_source_port, tmp_switch_id, tmp_vbid; + __be16 vlan_proto; + u16 tmp_vid, tci; if (skb_vlan_tag_present(skb)) { + vlan_proto = skb->vlan_proto; tci = skb_vlan_tag_get(skb); __vlan_hwaccel_clear_tag(skb); } else { + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + + vlan_proto = hdr->h_vlan_proto; skb_push_rcsum(skb, ETH_HLEN); __skb_vlan_pop(skb, &tci); skb_pull_rcsum(skb, ETH_HLEN); } - vid = tci & VLAN_VID_MASK; + tmp_vid = tci & VLAN_VID_MASK; + if (!vid_is_dsa_8021q(tmp_vid)) { + /* Not a tag_8021q frame, so return the VID to the + * caller for further processing, and put the tag back + */ + if (vid) + *vid = tmp_vid; + + __vlan_hwaccel_put_tag(skb, vlan_proto, tci); + + return; + } - *source_port = dsa_8021q_rx_source_port(vid); - *switch_id = dsa_8021q_rx_switch_id(vid); + tmp_source_port = dsa_8021q_rx_source_port(tmp_vid); + tmp_switch_id = dsa_8021q_rx_switch_id(tmp_vid); + tmp_vbid = dsa_tag_8021q_rx_vbid(tmp_vid); + + /* Precise source port information is unknown when receiving from a + * VLAN-unaware bridging domain, and tmp_source_port and tmp_switch_id + * are zeroes in this case. + * + * Preserve the source information from hardware-specific mechanisms, + * if available. This allows us to not overwrite a valid source port + * and switch ID with less precise values. + */ + if (tmp_vbid == 0 && *source_port == -1) + *source_port = tmp_source_port; + if (tmp_vbid == 0 && *switch_id == -1) + *switch_id = tmp_switch_id; if (vbid) - *vbid = dsa_tag_8021q_rx_vbid(vid); + *vbid = tmp_vbid; skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + return; } EXPORT_SYMBOL_GPL(dsa_8021q_rcv); diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h index 41f7167ac520..27b8906f99ec 100644 --- a/net/dsa/tag_8021q.h +++ b/net/dsa/tag_8021q.h @@ -14,10 +14,11 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *vbid); + int *vbid, int *vid); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, - int vbid); +struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit, + int source_port, int switch_id, + int vid, int vbid); int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info); diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index b059381310fe..8e8b1bef6af6 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -81,7 +81,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, { int src_port, switch_id; - dsa_8021q_rcv(skb, &src_port, &switch_id, NULL); + dsa_8021q_rcv(skb, &src_port, &switch_id, NULL, NULL); skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port); if (!skb->dev) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 1aba1d05c27a..3e902af7eea6 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -472,37 +472,14 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb) return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110; } -/* If the VLAN in the packet is a tag_8021q one, set @source_port and - * @switch_id and strip the header. Otherwise set @vid and keep it in the - * packet. - */ -static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, - int *switch_id, int *vbid, u16 *vid) -{ - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - u16 vlan_tci; - - if (skb_vlan_tag_present(skb)) - vlan_tci = skb_vlan_tag_get(skb); - else - vlan_tci = ntohs(hdr->h_vlan_TCI); - - if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK)) - return dsa_8021q_rcv(skb, source_port, switch_id, vbid); - - /* Try our best with imprecise RX */ - *vid = vlan_tci & VLAN_VID_MASK; -} - static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev) { - int source_port = -1, switch_id = -1, vbid = -1; + int source_port = -1, switch_id = -1, vbid = -1, vid = -1; struct sja1105_meta meta = {0}; struct ethhdr *hdr; bool is_link_local; bool is_meta; - u16 vid; hdr = eth_hdr(skb); is_link_local = sja1105_is_link_local(skb); @@ -524,37 +501,16 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, /* Normal data plane traffic and link-local frames are tagged with * a tag_8021q VLAN which we have to strip */ - if (sja1105_skb_has_tag_8021q(skb)) { - int tmp_source_port = -1, tmp_switch_id = -1; - - sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vbid, - &vid); - /* Preserve the source information from the INCL_SRCPT option, - * if available. This allows us to not overwrite a valid source - * port and switch ID with zeroes when receiving link-local - * frames from a VLAN-unaware bridged port (non-zero vbid) or a - * VLAN-aware bridged port (non-zero vid). Furthermore, the - * tag_8021q source port information is only of trust when the - * vbid is 0 (precise port). Otherwise, tmp_source_port and - * tmp_switch_id will be zeroes. - */ - if (vbid == 0 && source_port == -1) - source_port = tmp_source_port; - if (vbid == 0 && switch_id == -1) - switch_id = tmp_switch_id; - } else if (source_port == -1 && switch_id == -1) { + if (sja1105_skb_has_tag_8021q(skb)) + dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid); + else if (source_port == -1 && switch_id == -1) /* Packets with no source information have no chance of * getting accepted, drop them straight away. */ return NULL; - } - if (source_port != -1 && switch_id != -1) - skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); - else if (vbid >= 1) - skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); - else - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id, + vid, vbid); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; @@ -677,9 +633,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, static struct sk_buff *sja1110_rcv(struct sk_buff *skb, struct net_device *netdev) { - int source_port = -1, switch_id = -1, vbid = -1; + int source_port = -1, switch_id = -1, vbid = -1, vid = -1; bool host_only = false; - u16 vid = 0; if (sja1110_skb_has_inband_control_extension(skb)) { skb = sja1110_rcv_inband_control_extension(skb, &source_port, @@ -691,14 +646,11 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, /* Packets with in-band control extensions might still have RX VLANs */ if (likely(sja1105_skb_has_tag_8021q(skb))) - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); - - if (vbid >= 1) - skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); - else if (source_port == -1 || switch_id == -1) - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); - else - skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); + dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid); + + skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id, + vid, vbid); + if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; diff --git a/net/dsa/tag_vsc73xx_8021q.c b/net/dsa/tag_vsc73xx_8021q.c new file mode 100644 index 000000000000..af121a9aff7f --- /dev/null +++ b/net/dsa/tag_vsc73xx_8021q.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright (C) 2024 Pawel Dembicki <paweldembicki@gmail.com> + */ +#include <linux/dsa/8021q.h> + +#include "tag.h" +#include "tag_8021q.h" + +#define VSC73XX_8021Q_NAME "vsc73xx-8021q" + +static struct sk_buff * +vsc73xx_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct dsa_port *dp = dsa_user_to_port(netdev); + u16 queue_mapping = skb_get_queue_mapping(skb); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); + u8 pcp; + + if (skb->offload_fwd_mark) { + unsigned int bridge_num = dsa_port_bridge_num_get(dp); + struct net_device *br = dsa_port_bridge_dev_get(dp); + + if (br_vlan_enabled(br)) + return skb; + + tx_vid = dsa_tag_8021q_bridge_vid(bridge_num); + } + + pcp = netdev_txq_to_tc(netdev, queue_mapping); + + return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff * +vsc73xx_rcv(struct sk_buff *skb, struct net_device *netdev) +{ + int src_port = -1, switch_id = -1, vbid = -1, vid = -1; + + dsa_8021q_rcv(skb, &src_port, &switch_id, &vbid, &vid); + + skb->dev = dsa_tag_8021q_find_user(netdev, src_port, switch_id, + vid, vbid); + if (!skb->dev) { + dev_warn_ratelimited(&netdev->dev, + "Couldn't decode source port\n"); + return NULL; + } + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops vsc73xx_8021q_netdev_ops = { + .name = VSC73XX_8021Q_NAME, + .proto = DSA_TAG_PROTO_VSC73XX_8021Q, + .xmit = vsc73xx_xmit, + .rcv = vsc73xx_rcv, + .needed_headroom = VLAN_HLEN, + .promisc_on_conduit = true, +}; + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DSA tag driver for VSC73XX family of switches, using VLAN"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_VSC73XX_8021Q, VSC73XX_8021Q_NAME); + +module_dsa_tag_driver(vsc73xx_8021q_netdev_ops); diff --git a/net/dsa/user.c b/net/dsa/user.c index e8f56a40b614..f5adfa1d978a 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1729,7 +1729,7 @@ static int dsa_user_set_rxnfc(struct net_device *dev, } static int dsa_user_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *ts) + struct kernel_ethtool_ts_info *ts) { struct dsa_user_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 67d06cd002a5..07032babd1b6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -211,6 +211,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), + __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -251,6 +252,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1S_P2MP 1 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 +#define __LINK_MODE_LANES_T1BRR 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -374,6 +376,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), + __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -658,7 +661,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } -int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; @@ -666,7 +669,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; - if (phy_has_tsinfo(phydev)) + if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) return phy_ts_info(phydev, info); if (ops->get_ts_info) return ops->get_ts_info(dev, info); @@ -680,7 +683,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { - struct ethtool_ts_info info = { }; + struct kernel_ethtool_ts_info info = { }; int num = 0; if (!__ethtool_get_ts_info(dev, &info)) @@ -690,7 +693,7 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) } EXPORT_SYMBOL(ethtool_get_phc_vclocks); -int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info) +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info) { return __ethtool_get_ts_info(dev, info); } diff --git a/net/ethtool/common.h b/net/ethtool/common.h index b55705a9ad5a..863806fcf01a 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -44,7 +44,7 @@ bool convert_legacy_settings_to_link_ksettings( const struct ethtool_cmd *legacy_settings); u32 ethtool_get_max_rxfh_channel(struct net_device *dev); int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max); -int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); +int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); extern const struct ethtool_phy_ops *ethtool_phy_ops; extern const struct ethtool_pse_ops *ethtool_pse_ops; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 5ff128eacc9b..983fee76f5cf 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -65,7 +65,8 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int ethtool_op_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | @@ -2594,13 +2595,20 @@ out: static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) { - struct ethtool_ts_info info; + struct kernel_ethtool_ts_info kernel_info; + struct ethtool_ts_info info = {}; int err; - err = __ethtool_get_ts_info(dev, &info); + err = __ethtool_get_ts_info(dev, &kernel_info); if (err) return err; + info.cmd = kernel_info.cmd; + info.so_timestamping = kernel_info.so_timestamping; + info.phc_index = kernel_info.phc_index; + info.tx_types = kernel_info.tx_types; + info.rx_filters = kernel_info.rx_filters; + if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 57d496287e52..03d12d6f79ca 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -12,7 +12,7 @@ struct tsinfo_req_info { struct tsinfo_reply_data { struct ethnl_reply_data base; - struct ethtool_ts_info ts_info; + struct kernel_ethtool_ts_info ts_info; struct ethtool_ts_stats stats; }; @@ -55,7 +55,7 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; - const struct ethtool_ts_info *ts_info = &data->ts_info; + const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int len = 0; int ret; @@ -136,7 +136,7 @@ static int tsinfo_fill_reply(struct sk_buff *skb, { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; - const struct ethtool_ts_info *ts_info = &data->ts_info; + const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int ret; if (ts_info->so_timestamping) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3968d3f98e08..73981595f062 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -349,6 +349,7 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, { struct udphdr *uh; unsigned int len; + struct xfrm_offload *xo = xfrm_offload(skb); len = skb->len + esp->tailen - skb_transport_offset(skb); if (len + sizeof(struct iphdr) > IP_MAX_MTU) @@ -360,7 +361,12 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, uh->len = htons(len); uh->check = 0; - *skb_mac_header(skb) = IPPROTO_UDP; + /* For IPv4 ESP with UDP encapsulation, if xo is not null, the skb is in the crypto offload + * data path, which means that esp_output_udp_encap is called outside of the XFRM stack. + * In this case, the mac header doesn't point to the IPv4 protocol field, so don't set it. + */ + if (!xo || encap_type != UDP_ENCAP_ESPINUDP) + *skb_mac_header(skb) = IPPROTO_UDP; return (struct ip_esp_hdr *)(uh + 1); } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index b3271957ad9a..a37d18858c72 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -264,6 +264,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ struct esp_info esp; bool hw_offload = true; __u32 seq; + int encap_type = 0; esp.inplace = true; @@ -296,8 +297,10 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.esph = ip_esp_hdr(skb); + if (x->encap) + encap_type = x->encap->encap_type; - if (!hw_offload || !skb_is_gso(skb)) { + if (!hw_offload || !skb_is_gso(skb) || (hw_offload && encap_type == UDP_ENCAP_ESPINUDP)) { esp.nfrags = esp_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; @@ -324,6 +327,18 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); + if (hw_offload && encap_type == UDP_ENCAP_ESPINUDP) { + /* In the XFRM stack, the encapsulation protocol is set to iphdr->protocol by + * setting *skb_mac_header(skb) (see esp_output_udp_encap()) where skb->mac_header + * points to iphdr->protocol (see xfrm4_tunnel_encap_add()). + * However, in esp_xmit(), skb->mac_header doesn't point to iphdr->protocol. + * Therefore, the protocol field needs to be corrected. + */ + ip_hdr(skb)->protocol = IPPROTO_UDP; + + esph->seq_no = htonl(seq); + } + ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e03fb9a1dbeb..90d2c7e3f5e9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1060,6 +1060,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, .ipv6_dev_find = ipv6_dev_find, + .ip6_xmit = ip6_xmit, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2f1ea5f999a2..b1d81c4270ab 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -290,8 +290,14 @@ int __init xfrm6_init(void) ret = register_pernet_subsys(&xfrm6_net_ops); if (ret) goto out_protocol; + + ret = xfrm_nat_keepalive_init(AF_INET6); + if (ret) + goto out_nat_keepalive; out: return ret; +out_nat_keepalive: + unregister_pernet_subsys(&xfrm6_net_ops); out_protocol: xfrm6_protocol_fini(); out_state: @@ -303,6 +309,7 @@ out_policy: void xfrm6_fini(void) { + xfrm_nat_keepalive_fini(AF_INET6); unregister_pernet_subsys(&xfrm6_net_ops); xfrm6_protocol_fini(); xfrm6_policy_fini(); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b284a06b5a75..cc2df9f8c14a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1610,7 +1610,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_ts_info info = { + struct kernel_ethtool_ts_info info = { .cmd = ETHTOOL_GET_TS_INFO, .phc_index = -1, }; diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 547cec77ba03..512e0b2f8514 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -13,7 +13,8 @@ endif obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o xfrm_device.o + xfrm_sysctl.o xfrm_replay.o xfrm_device.o \ + xfrm_nat_keepalive.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 703d4172c7d7..91357ccaf4af 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -131,6 +131,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), + [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -280,9 +281,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: case XFRMA_SA_DIR: + case XFRMA_NAT_KEEPALIVE_INTERVAL: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -437,7 +439,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 2455a76a1cff..9a44d363ba62 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -261,9 +261,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; - /* We don't yet support UDP encapsulation and TFC padding. */ - if ((!is_packet_offload && x->encap) || x->tfcpad) { - NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded"); + /* We don't yet support TFC padding. */ + if (x->tfcpad) { + NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded"); return -EINVAL; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d2ea18dcb0cb..7cee9c0a2cdc 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -471,7 +471,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { + if (encap_type < 0 || (xo && (xo->flags & XFRM_GRO || encap_type == 0 || + encap_type == UDP_ENCAP_ESPINUDP))) { x = xfrm_input_state(skb); if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c new file mode 100644 index 000000000000..82f0a301683f --- /dev/null +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * xfrm_nat_keepalive.c + * + * (c) 2024 Eyal Birger <eyal.birger@gmail.com> + */ + +#include <net/inet_common.h> +#include <net/ip6_checksum.h> +#include <net/xfrm.h> + +static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4); +#if IS_ENABLED(CONFIG_IPV6) +static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6); +#endif + +struct nat_keepalive { + struct net *net; + u16 family; + xfrm_address_t saddr; + xfrm_address_t daddr; + __be16 encap_sport; + __be16 encap_dport; + __u32 smark; +}; + +static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x) +{ + ka->net = xs_net(x); + ka->family = x->props.family; + ka->saddr = x->props.saddr; + ka->daddr = x->id.daddr; + ka->encap_sport = x->encap->encap_sport; + ka->encap_dport = x->encap->encap_dport; + ka->smark = xfrm_smark_get(0, x); +} + +static int nat_keepalive_send_ipv4(struct sk_buff *skb, + struct nat_keepalive *ka) +{ + struct net *net = ka->net; + struct flowi4 fl4; + struct rtable *rt; + struct sock *sk; + __u8 tos = 0; + int err; + + flowi4_init_output(&fl4, 0 /* oif */, skb->mark, tos, + RT_SCOPE_UNIVERSE, IPPROTO_UDP, 0, + ka->daddr.a4, ka->saddr.a4, ka->encap_dport, + ka->encap_sport, sock_net_uid(net, NULL)); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + skb_dst_set(skb, &rt->dst); + + sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4); + sock_net_set(sk, net); + err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos); + sock_net_set(sk, &init_net); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int nat_keepalive_send_ipv6(struct sk_buff *skb, + struct nat_keepalive *ka, + struct udphdr *uh) +{ + struct net *net = ka->net; + struct dst_entry *dst; + struct flowi6 fl6; + struct sock *sk; + __wsum csum; + int err; + + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = csum_ipv6_magic(&ka->saddr.in6, &ka->daddr.in6, + skb->len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.saddr = ka->saddr.in6; + fl6.daddr = ka->daddr.in6; + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_sport = ka->encap_sport; + fl6.fl6_dport = ka->encap_dport; + + sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6); + sock_net_set(sk, net); + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + skb_dst_set(skb, dst); + err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); + sock_net_set(sk, &init_net); + return err; +} +#endif + +static void nat_keepalive_send(struct nat_keepalive *ka) +{ + const int nat_ka_hdrs_len = max(sizeof(struct iphdr), + sizeof(struct ipv6hdr)) + + sizeof(struct udphdr); + const u8 nat_ka_payload = 0xFF; + int err = -EAFNOSUPPORT; + struct sk_buff *skb; + struct udphdr *uh; + + skb = alloc_skb(nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC); + if (unlikely(!skb)) + return; + + skb_reserve(skb, nat_ka_hdrs_len); + + skb_put_u8(skb, nat_ka_payload); + + uh = skb_push(skb, sizeof(*uh)); + uh->source = ka->encap_sport; + uh->dest = ka->encap_dport; + uh->len = htons(skb->len); + uh->check = 0; + + skb->mark = ka->smark; + + switch (ka->family) { + case AF_INET: + err = nat_keepalive_send_ipv4(skb, ka); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + err = nat_keepalive_send_ipv6(skb, ka, uh); + break; +#endif + } + if (err) + kfree_skb(skb); +} + +struct nat_keepalive_work_ctx { + time64_t next_run; + time64_t now; +}; + +static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr) +{ + struct nat_keepalive_work_ctx *ctx = ptr; + bool send_keepalive = false; + struct nat_keepalive ka; + time64_t next_run; + u32 interval; + int delta; + + interval = x->nat_keepalive_interval; + if (!interval) + return 0; + + spin_lock(&x->lock); + + delta = (int)(ctx->now - x->lastused); + if (delta < interval) { + x->nat_keepalive_expiration = ctx->now + interval - delta; + next_run = x->nat_keepalive_expiration; + } else if (x->nat_keepalive_expiration > ctx->now) { + next_run = x->nat_keepalive_expiration; + } else { + next_run = ctx->now + interval; + nat_keepalive_init(&ka, x); + send_keepalive = true; + } + + spin_unlock(&x->lock); + + if (send_keepalive) + nat_keepalive_send(&ka); + + if (!ctx->next_run || next_run < ctx->next_run) + ctx->next_run = next_run; + return 0; +} + +static void nat_keepalive_work(struct work_struct *work) +{ + struct nat_keepalive_work_ctx ctx; + struct xfrm_state_walk walk; + struct net *net; + + ctx.next_run = 0; + ctx.now = ktime_get_real_seconds(); + + net = container_of(work, struct net, xfrm.nat_keepalive_work.work); + xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL); + xfrm_state_walk(net, &walk, nat_keepalive_work_single, &ctx); + xfrm_state_walk_done(&walk, net); + if (ctx.next_run) + schedule_delayed_work(&net->xfrm.nat_keepalive_work, + (ctx.next_run - ctx.now) * HZ); +} + +static int nat_keepalive_sk_init(struct sock * __percpu *socks, + unsigned short family) +{ + struct sock *sk; + int err, i; + + for_each_possible_cpu(i) { + err = inet_ctl_sock_create(&sk, family, SOCK_RAW, IPPROTO_UDP, + &init_net); + if (err < 0) + goto err; + + *per_cpu_ptr(socks, i) = sk; + } + + return 0; +err: + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); + return err; +} + +static void nat_keepalive_sk_fini(struct sock * __percpu *socks) +{ + int i; + + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); +} + +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x) +{ + struct net *net; + + if (!x->nat_keepalive_interval) + return; + + net = xs_net(x); + schedule_delayed_work(&net->xfrm.nat_keepalive_work, 0); +} + +int __net_init xfrm_nat_keepalive_net_init(struct net *net) +{ + INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work); + return 0; +} + +int xfrm_nat_keepalive_net_fini(struct net *net) +{ + cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + return 0; +} + +int xfrm_nat_keepalive_init(unsigned short family) +{ + int err = -EAFNOSUPPORT; + + switch (family) { + case AF_INET: + err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv6, PF_INET6); + break; +#endif + } + + if (err) + pr_err("xfrm nat keepalive init: failed to init err:%d\n", err); + return err; +} +EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init); + +void xfrm_nat_keepalive_fini(unsigned short family) +{ + switch (family) { + case AF_INET: + nat_keepalive_sk_fini(&nat_keepalive_sk_ipv4); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + nat_keepalive_sk_fini(&nat_keepalive_sk_ipv6); + break; +#endif + } +} +EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 66e07de2de35..b35e11d80c27 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3718,12 +3718,15 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id); if (!pol) { + const bool is_crypto_offload = sp && + (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO); + if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } - if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) { + if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; @@ -4284,8 +4287,14 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out_sysctl; + rv = xfrm_nat_keepalive_net_init(net); + if (rv < 0) + goto out_nat_keepalive; + return 0; +out_nat_keepalive: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -4298,6 +4307,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_nat_keepalive_net_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -4359,6 +4369,7 @@ void __init xfrm_init(void) #endif register_xfrm_state_bpf(); + xfrm_nat_keepalive_init(AF_INET); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 649bb739df0d..abadc857cd45 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -715,6 +715,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->id.spi) hlist_del_rcu(&x->byspi); net->xfrm.state_num--; + xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); if (x->encap_sk) @@ -1453,6 +1454,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + xfrm_nat_keepalive_state_updated(x); } /* net->xfrm.xfrm_state_lock is held */ @@ -2871,6 +2873,21 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } + if (x->nat_keepalive_interval) { + if (x->dir != XFRM_SA_DIR_OUT) { + NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs"); + err = -EINVAL; + goto error; + } + + if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) { + NL_SET_ERR_MSG(extack, + "NAT keepalive is only supported for UDP encapsulation"); + err = -EINVAL; + goto error; + } + } + error: return err; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e83c687bd64e..a552cfa623ea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -833,6 +833,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_SA_DIR]) x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + if (attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]) + x->nat_keepalive_interval = + nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1288,6 +1292,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, } if (x->dir) ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + + if (x->nat_keepalive_interval) { + ret = nla_put_u32(skb, XFRMA_NAT_KEEPALIVE_INTERVAL, + x->nat_keepalive_interval); + if (ret) + goto out; + } out: return ret; } @@ -3165,6 +3176,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), + [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3474,6 +3486,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->dir) l += nla_total_size(sizeof(x->dir)); + if (x->nat_keepalive_interval) + l += nla_total_size(sizeof(x->nat_keepalive_interval)); + return l; } |
