summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-04-04 09:15:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-04-04 09:15:35 -0700
commit61f96e684edd28ca40555ec49ea1555df31ba619 (patch)
tree5042c3c391a8bcb6fc5bab65253adcd23d30a366 /net
parent96364527357980ea68bb8bc7ec1490e22b9ed0cd (diff)
parent94f68c0f99a548d33a102672690100bf76a7c460 (diff)
Merge tag 'net-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from netfilter. Current release - regressions: - four fixes for the netdev per-instance locking Current release - new code bugs: - consolidate more code between existing Rx zero-copy and uring so that the latter doesn't miss / have to duplicate the safety checks Previous releases - regressions: - ipv6: fix omitted Netlink attributes when using SKIP_STATS Previous releases - always broken: - net: fix geneve_opt length integer overflow - udp: fix multiple wrap arounds of sk->sk_rmem_alloc when it approaches INT_MAX - dsa: mvpp2: add a lock to avoid corruption of the shared TCAM - dsa: airoha: fix issues with traffic QoS configuration / offload, and flow table offload Misc: - touch up the Netlink YAML specs of old families to make them usable for user space C codegen" * tag 'net-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (56 commits) selftests: net: amt: indicate progress in the stress test netlink: specs: rt_route: pull the ifa- prefix out of the names netlink: specs: rt_addr: pull the ifa- prefix out of the names netlink: specs: rt_addr: fix get multi command name netlink: specs: rt_addr: fix the spec format / schema failures net: avoid false positive warnings in __net_mp_close_rxq() net: move mp dev config validation to __net_mp_open_rxq() net: ibmveth: make veth_pool_store stop hanging arcnet: Add NULL check in com20020pci_probe() ipv6: Do not consider link down nexthops in path selection ipv6: Start path selection from the first nexthop usbnet:fix NPE during rx_complete net: octeontx2: Handle XDP_ABORTED and XDP invalid as XDP_DROP net: fix geneve_opt length integer overflow io_uring/zcrx: fix selftests w/ updated netdev Python helpers selftests: net: use netdevsim in netns test docs: net: document netdev notifier expectations net: dummy: request ops lock netdevsim: add dummy device notifiers net: rename rtnl_net_debug to lock_debug ...
Diffstat (limited to 'net')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/dev_api.c8
-rw-r--r--net/core/devmem.c62
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/lock_debug.c (renamed from net/core/rtnl_net_debug.c)16
-rw-r--r--net/core/netdev-genl.c6
-rw-r--r--net/core/netdev_rx_queue.c53
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/udp.c42
-rw-r--r--net/ipv6/addrconf.c52
-rw-r--r--net/ipv6/calipso.c21
-rw-r--r--net/ipv6/route.c42
-rw-r--r--net/netfilter/nf_tables_api.c4
-rw-r--r--net/netfilter/nft_set_hash.c3
-rw-r--r--net/netfilter/nft_tunnel.c6
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/sch_skbprio.c3
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/vmw_vsock/af_vsock.c6
24 files changed, 231 insertions, 146 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index a10c3bd96798..b2a76ce33932 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
obj-$(CONFIG_NET_DEVMEM) += devmem.o
-obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
+obj-$(CONFIG_DEBUG_NET) += lock_debug.o
obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o
diff --git a/net/core/dev.c b/net/core/dev.c
index be17e0660144..0608605cfc24 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev)
netdev_unlock_ops(lower_dev);
}
}
+EXPORT_IPV6_MOD(netif_disable_lro);
/**
* dev_disable_gro_hw - disable HW Generic Receive Offload on a device
@@ -1858,7 +1859,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
int err;
for_each_netdev(net, dev) {
+ netdev_lock_ops(dev);
err = call_netdevice_register_notifiers(nb, dev);
+ netdev_unlock_ops(dev);
if (err)
goto rollback;
}
@@ -10284,7 +10287,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
+ netdev_lock_ops(dev);
err = dev_xdp_attach_link(dev, &extack, link);
+ netdev_unlock_ops(dev);
rtnl_unlock();
if (err) {
@@ -11045,7 +11050,9 @@ int register_netdevice(struct net_device *dev)
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
+ netdev_lock_ops(dev);
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
+ netdev_unlock_ops(dev);
ret = notifier_to_errno(ret);
if (ret) {
/* Expect explicit free_netdev() on failure */
@@ -12057,7 +12064,7 @@ void unregister_netdev(struct net_device *dev)
}
EXPORT_SYMBOL(unregister_netdev);
-int netif_change_net_namespace(struct net_device *dev, struct net *net,
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack)
{
@@ -12142,11 +12149,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
* And now a mini version of register_netdevice unregister_netdevice.
*/
+ netdev_lock_ops(dev);
/* If device is running close it first. */
netif_close(dev);
-
/* And unlink it from device chain */
unlist_netdevice(dev);
+ netdev_unlock_ops(dev);
synchronize_net();
@@ -12208,11 +12216,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
+ netdev_lock_ops(dev);
/* Add the device back in the hashes */
list_netdevice(dev);
-
/* Notify protocols, that a new device appeared. */
call_netdevice_notifiers(NETDEV_REGISTER, dev);
+ netdev_unlock_ops(dev);
/*
* Prevent userspace races by waiting until the network
diff --git a/net/core/dev_api.c b/net/core/dev_api.c
index 8dbc60612100..90bafb0b1b8c 100644
--- a/net/core/dev_api.c
+++ b/net/core/dev_api.c
@@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user);
int dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat)
{
- int ret;
-
- netdev_lock_ops(dev);
- ret = netif_change_net_namespace(dev, net, pat, 0, NULL);
- netdev_unlock_ops(dev);
-
- return ret;
+ return __dev_change_net_namespace(dev, net, pat, 0, NULL);
}
EXPORT_SYMBOL_GPL(dev_change_net_namespace);
diff --git a/net/core/devmem.c b/net/core/devmem.c
index ee145a2aa41c..6e27a47d0493 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -8,7 +8,6 @@
*/
#include <linux/dma-buf.h>
-#include <linux/ethtool_netlink.h>
#include <linux/genalloc.h>
#include <linux/mm.h>
#include <linux/netdevice.h>
@@ -117,21 +116,19 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int rxq_idx;
- int err;
if (binding->list.next)
list_del(&binding->list);
xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
- WARN_ON(rxq->mp_params.mp_priv != binding);
-
- rxq->mp_params.mp_priv = NULL;
- rxq->mp_params.mp_ops = NULL;
+ const struct pp_memory_provider_params mp_params = {
+ .mp_priv = binding,
+ .mp_ops = &dmabuf_devmem_ops,
+ };
rxq_idx = get_netdev_rx_queue_index(rxq);
- err = netdev_rx_queue_restart(binding->dev, rxq_idx);
- WARN_ON(err && err != -ENETDOWN);
+ __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
}
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
@@ -143,57 +140,28 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack)
{
+ struct pp_memory_provider_params mp_params = {
+ .mp_priv = binding,
+ .mp_ops = &dmabuf_devmem_ops,
+ };
struct netdev_rx_queue *rxq;
u32 xa_idx;
int err;
- if (rxq_idx >= dev->real_num_rx_queues) {
- NL_SET_ERR_MSG(extack, "rx queue index out of range");
- return -ERANGE;
- }
-
- if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
- NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
- return -EINVAL;
- }
-
- if (dev->cfg->hds_thresh) {
- NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
- return -EINVAL;
- }
+ err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
+ if (err)
+ return err;
rxq = __netif_get_rx_queue(dev, rxq_idx);
- if (rxq->mp_params.mp_ops) {
- NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
- return -EEXIST;
- }
-
-#ifdef CONFIG_XDP_SOCKETS
- if (rxq->pool) {
- NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
- return -EBUSY;
- }
-#endif
-
err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
GFP_KERNEL);
if (err)
- return err;
-
- rxq->mp_params.mp_priv = binding;
- rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
-
- err = netdev_rx_queue_restart(dev, rxq_idx);
- if (err)
- goto err_xa_erase;
+ goto err_close_rxq;
return 0;
-err_xa_erase:
- rxq->mp_params.mp_priv = NULL;
- rxq->mp_params.mp_ops = NULL;
- xa_erase(&binding->bound_rxqs, xa_idx);
-
+err_close_rxq:
+ __net_mp_close_rxq(dev, rxq_idx, &mp_params);
return err;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index c99b95cf9cbb..795ca07e28a4 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -165,6 +165,14 @@ static void dst_count_dec(struct dst_entry *dst)
void dst_release(struct dst_entry *dst)
{
if (dst && rcuref_put(&dst->__rcuref)) {
+#ifdef CONFIG_DST_CACHE
+ if (dst->flags & DST_METADATA) {
+ struct metadata_dst *md_dst = (struct metadata_dst *)dst;
+
+ if (md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_reset_now(&md_dst->u.tun_info.dst_cache);
+ }
+#endif
dst_count_dec(dst);
call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
}
diff --git a/net/core/rtnl_net_debug.c b/net/core/lock_debug.c
index 7ecd28cc1c22..b7f22dc92a6f 100644
--- a/net/core/rtnl_net_debug.c
+++ b/net/core/lock_debug.c
@@ -6,10 +6,11 @@
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/net_namespace.h>
+#include <net/netdev_lock.h>
#include <net/netns/generic.h>
-static int rtnl_net_debug_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+int netdev_debug_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
@@ -17,11 +18,13 @@ static int rtnl_net_debug_event(struct notifier_block *nb,
/* Keep enum and don't add default to trigger -Werror=switch */
switch (cmd) {
+ case NETDEV_REGISTER:
case NETDEV_UP:
+ netdev_ops_assert_locked(dev);
+ fallthrough;
case NETDEV_DOWN:
case NETDEV_REBOOT:
case NETDEV_CHANGE:
- case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
@@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb,
return NOTIFY_DONE;
}
+EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL");
static int rtnl_net_debug_net_id;
@@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net)
struct notifier_block *nb;
nb = net_generic(net, rtnl_net_debug_net_id);
- nb->notifier_call = rtnl_net_debug_event;
+ nb->notifier_call = netdev_debug_event;
return register_netdevice_notifier_net(net, nb);
}
@@ -95,14 +99,14 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
};
static struct notifier_block rtnl_net_debug_block = {
- .notifier_call = rtnl_net_debug_event,
+ .notifier_call = netdev_debug_event,
};
static int __init rtnl_net_debug_init(void)
{
int ret;
- ret = register_pernet_device(&rtnl_net_debug_net_ops);
+ ret = register_pernet_subsys(&rtnl_net_debug_net_ops);
if (ret)
return ret;
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 3afeaa8c5dc5..5d7af50fe702 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -874,12 +874,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
goto err_unlock;
}
- if (dev_xdp_prog_count(netdev)) {
- NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached");
- err = -EEXIST;
- goto err_unlock;
- }
-
binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack);
if (IS_ERR(binding)) {
err = PTR_ERR(binding);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index 3af716f77a13..d126f10197bf 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
#include <net/netdev_lock.h>
#include <net/netdev_queues.h>
@@ -86,8 +87,9 @@ err_free_new_mem:
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
-static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *p)
+int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
+ const struct pp_memory_provider_params *p,
+ struct netlink_ext_ack *extack)
{
struct netdev_rx_queue *rxq;
int ret;
@@ -95,16 +97,41 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
- if (ifq_idx >= dev->real_num_rx_queues)
+ if (rxq_idx >= dev->real_num_rx_queues)
return -EINVAL;
- ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);
+ rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
- rxq = __netif_get_rx_queue(dev, ifq_idx);
- if (rxq->mp_params.mp_ops)
+ if (rxq_idx >= dev->real_num_rx_queues) {
+ NL_SET_ERR_MSG(extack, "rx queue index out of range");
+ return -ERANGE;
+ }
+ if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
+ NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
+ return -EINVAL;
+ }
+ if (dev->cfg->hds_thresh) {
+ NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
+ return -EINVAL;
+ }
+ if (dev_xdp_prog_count(dev)) {
+ NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached");
return -EEXIST;
+ }
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ if (rxq->mp_params.mp_ops) {
+ NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
+ return -EEXIST;
+ }
+#ifdef CONFIG_XDP_SOCKETS
+ if (rxq->pool) {
+ NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
+ return -EBUSY;
+ }
+#endif
rxq->mp_params = *p;
- ret = netdev_rx_queue_restart(dev, ifq_idx);
+ ret = netdev_rx_queue_restart(dev, rxq_idx);
if (ret) {
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
@@ -112,21 +139,22 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
return ret;
}
-int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
struct pp_memory_provider_params *p)
{
int ret;
netdev_lock(dev);
- ret = __net_mp_open_rxq(dev, ifq_idx, p);
+ ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL);
netdev_unlock(dev);
return ret;
}
-static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
- struct pp_memory_provider_params *old_p)
+void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
+ const struct pp_memory_provider_params *old_p)
{
struct netdev_rx_queue *rxq;
+ int err;
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
return;
@@ -146,7 +174,8 @@ static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
rxq->mp_params.mp_ops = NULL;
rxq->mp_params.mp_priv = NULL;
- WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
+ err = netdev_rx_queue_restart(dev, ifq_idx);
+ WARN_ON(err && err != -ENETDOWN);
}
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 334db17be37d..c23852835050 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3025,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
char ifname[IFNAMSIZ];
int err;
- netdev_lock_ops(dev);
-
err = validate_linkmsg(dev, tb, extack);
if (err < 0)
goto errout;
@@ -3042,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0);
- err = netif_change_net_namespace(dev, tgt_net, pat,
+ err = __dev_change_net_namespace(dev, tgt_net, pat,
new_ifindex, extack);
if (err)
- goto errout;
+ return err;
status |= DO_SETLINK_MODIFIED;
}
+ netdev_lock_ops(dev);
+
if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 754f60fb6e25..77e5705ac799 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
if (!in_dev->arp_parms)
goto out_kfree;
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
- dev_disable_lro(dev);
+ netif_disable_lro(dev);
/* Reference in_dev->dev */
netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
/* Account for reference dev->ip_ptr (below) */
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index a3676155be78..f65d2f727381 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -416,7 +416,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
skb_dst_update_pmtu_no_confirm(skb, mtu);
- if (!reply || skb->pkt_type == PACKET_HOST)
+ if (!reply)
return 0;
if (skb->protocol == htons(ETH_P_IP))
@@ -451,7 +451,7 @@ static const struct nla_policy
geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = {
[LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
- [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
+ [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 },
};
static const struct nla_policy
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d0bffcfa56d8..2742cc7602bb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1625,12 +1625,12 @@ static bool udp_skb_has_head_state(struct sk_buff *skb)
}
/* fully reclaim rmem/fwd memory allocated for skb */
-static void udp_rmem_release(struct sock *sk, int size, int partial,
- bool rx_queue_lock_held)
+static void udp_rmem_release(struct sock *sk, unsigned int size,
+ int partial, bool rx_queue_lock_held)
{
struct udp_sock *up = udp_sk(sk);
struct sk_buff_head *sk_queue;
- int amt;
+ unsigned int amt;
if (likely(partial)) {
up->forward_deficit += size;
@@ -1650,10 +1650,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
if (!rx_queue_lock_held)
spin_lock(&sk_queue->lock);
-
- sk_forward_alloc_add(sk, size);
- amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk_forward_alloc_add(sk, -amt);
+ amt = (size + sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
+ sk_forward_alloc_add(sk, size - amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1725,17 +1723,25 @@ static int udp_rmem_schedule(struct sock *sk, int size)
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff_head *list = &sk->sk_receive_queue;
- int rmem, err = -ENOMEM;
+ unsigned int rmem, rcvbuf;
spinlock_t *busy = NULL;
- int size, rcvbuf;
+ int size, err = -ENOMEM;
- /* Immediately drop when the receive queue is full.
- * Always allow at least one packet.
- */
rmem = atomic_read(&sk->sk_rmem_alloc);
rcvbuf = READ_ONCE(sk->sk_rcvbuf);
- if (rmem > rcvbuf)
- goto drop;
+ size = skb->truesize;
+
+ /* Immediately drop when the receive queue is full.
+ * Cast to unsigned int performs the boundary check for INT_MAX.
+ */
+ if (rmem + size > rcvbuf) {
+ if (rcvbuf > INT_MAX >> 1)
+ goto drop;
+
+ /* Always allow at least one packet for small buffer. */
+ if (rmem > rcvbuf)
+ goto drop;
+ }
/* Under mem pressure, it might be helpful to help udp_recvmsg()
* having linear skbs :
@@ -1745,10 +1751,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
if (rmem > (rcvbuf >> 1)) {
skb_condense(skb);
-
+ size = skb->truesize;
busy = busylock_acquire(sk);
}
- size = skb->truesize;
+
udp_set_dev_scratch(skb);
atomic_add(size, &sk->sk_rmem_alloc);
@@ -1835,7 +1841,7 @@ EXPORT_IPV6_MOD_GPL(skb_consume_udp);
static struct sk_buff *__first_packet_length(struct sock *sk,
struct sk_buff_head *rcvq,
- int *total)
+ unsigned int *total)
{
struct sk_buff *skb;
@@ -1868,8 +1874,8 @@ static int first_packet_length(struct sock *sk)
{
struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
+ unsigned int total = 0;
struct sk_buff *skb;
- int total = 0;
int res;
spin_lock_bh(&rcvq->lock);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ac8cc1076536..c3b908fccbc1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -80,6 +80,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/l3mdev.h>
+#include <net/netdev_lock.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
@@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
int err = -ENOMEM;
ASSERT_RTNL();
+ netdev_ops_assert_locked(dev);
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);
@@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
- dev_disable_lro(dev);
+ netif_disable_lro(dev);
/* We refer to the device */
netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);
@@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
+ netdev_lock_ops(dev);
if (dev)
err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
else
err = -ENODEV;
+ netdev_unlock_ops(dev);
rtnl_net_unlock(net);
return err;
}
@@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
err = -ENODEV;
- goto unlock;
+ goto unlock_rtnl;
}
+ netdev_lock_ops(dev);
idev = ipv6_find_idev(dev);
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
@@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
in6_ifa_put(ifa);
unlock:
+ netdev_unlock_ops(dev);
+unlock_rtnl:
rtnl_net_unlock(net);
return err;
@@ -5784,6 +5791,27 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
+static int inet6_fill_ifla6_stats_attrs(struct sk_buff *skb,
+ struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
@@ -5806,18 +5834,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
/* XXX - MC not implemented */
- if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
- return 0;
-
- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+ if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) {
+ if (inet6_fill_ifla6_stats_attrs(skb, idev) < 0)
+ goto nla_put_failure;
+ }
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
@@ -6503,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
+ netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
+ netdev_unlock_ops(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6515,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
new_val);
+ netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
+ netdev_unlock_ops(idev->dev);
}
}
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index dbcea9fee626..62618a058b8f 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1072,8 +1072,13 @@ static int calipso_sock_getattr(struct sock *sk,
struct ipv6_opt_hdr *hop;
int opt_len, len, ret_val = -ENOMSG, offset;
unsigned char *opt;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+
+ if (!pinfo)
+ return -EAFNOSUPPORT;
+ txopts = txopt_get(pinfo);
if (!txopts || !txopts->hopopt)
goto done;
@@ -1125,8 +1130,13 @@ static int calipso_sock_setattr(struct sock *sk,
{
int ret_val;
struct ipv6_opt_hdr *old, *new;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+
+ if (!pinfo)
+ return -EAFNOSUPPORT;
+ txopts = txopt_get(pinfo);
old = NULL;
if (txopts)
old = txopts->hopopt;
@@ -1153,8 +1163,13 @@ static int calipso_sock_setattr(struct sock *sk,
static void calipso_sock_delattr(struct sock *sk)
{
struct ipv6_opt_hdr *new_hop;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+
+ if (!pinfo)
+ return;
+ txopts = txopt_get(pinfo);
if (!txopts || !txopts->hopopt)
goto done;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c3406a0d45bd..ab12b816ab94 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -412,12 +412,37 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
+static struct fib6_info *
+rt6_multipath_first_sibling_rcu(const struct fib6_info *rt)
+{
+ struct fib6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+ iter = rcu_dereference(fn->leaf);
+ if (!iter)
+ goto out;
+
+ while (iter) {
+ if (iter->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference(iter->fib6_next);
+ }
+
+out:
+ return NULL;
+}
+
void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *match = res->f6i;
+ struct fib6_info *first, *match = res->f6i;
struct fib6_info *sibling;
+ int hash;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -440,16 +465,25 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
return;
}
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
+ first = rt6_multipath_first_sibling_rcu(match);
+ if (!first)
goto out;
- list_for_each_entry_rcu(sibling, &match->fib6_siblings,
+ hash = fl6->mp_hash;
+ if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) &&
+ rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+ strict) >= 0) {
+ match = first;
+ goto out;
+ }
+
+ list_for_each_entry_rcu(sibling, &first->fib6_siblings,
fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
- if (fl6->mp_hash > nh_upper_bound)
+ if (hash > nh_upper_bound)
continue;
if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c2df81b7e950..a133e1c175ce 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2839,11 +2839,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
err = nft_netdev_register_hooks(ctx->net, &hook.list);
if (err < 0)
goto err_hooks;
+
+ unregister = true;
}
}
- unregister = true;
-
if (nla[NFTA_CHAIN_COUNTERS]) {
if (!nft_is_base_chain(chain)) {
err = -EOPNOTSUPP;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 8bfac4185ac7..abb0c8ec6371 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -309,7 +309,8 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
nft_setelem_expr_foreach(expr, elem_expr, size) {
if (expr->ops->gc &&
- expr->ops->gc(read_pnet(&set->net), expr))
+ expr->ops->gc(read_pnet(&set->net), expr) &&
+ set->flags & NFT_SET_EVAL)
return true;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 681301b46aa4..0c63d1367cf7 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -335,13 +335,13 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = {
[NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 },
[NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 },
- [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
+ [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 },
};
static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
struct nft_tunnel_opts *opts)
{
- struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len;
+ struct geneve_opt *opt = (struct geneve_opt *)(opts->u.data + opts->len);
struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1];
int err, data_len;
@@ -625,7 +625,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
if (!inner)
goto failure;
while (opts->len > offset) {
- opt = (struct geneve_opt *)opts->u.data + offset;
+ opt = (struct geneve_opt *)(opts->u.data + offset);
if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE,
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 704c858cf209..61fea7baae5d 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -947,12 +947,6 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
pskb_trim(skb, ovs_mac_header_len(key));
}
- /* Need to set the pkt_type to involve the routing layer. The
- * packet movement through the OVS datapath doesn't generally
- * use routing, but this is needed for tunnel cases.
- */
- skb->pkt_type = PACKET_OUTGOING;
-
if (likely(!mru ||
(skb->len <= mru + vport->dev->hard_header_len))) {
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index ae5dea7c48a8..2cef4b08befb 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -68,7 +68,7 @@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
- .len = 128 },
+ .len = 127 },
};
static const struct nla_policy
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 03505673d523..099ff6a3e1f5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -766,7 +766,7 @@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
- .len = 128 },
+ .len = 127 },
};
static const struct nla_policy
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 20ff7386b74b..f485f62ab721 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -123,8 +123,6 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Check to update highest and lowest priorities. */
if (skb_queue_empty(lp_qdisc)) {
if (q->lowest_prio == q->highest_prio) {
- /* The incoming packet is the only packet in queue. */
- BUG_ON(sch->q.qlen != 1);
q->lowest_prio = prio;
q->highest_prio = prio;
} else {
@@ -156,7 +154,6 @@ static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
/* Update highest priority field. */
if (skb_queue_empty(hpq)) {
if (q->lowest_prio == q->highest_prio) {
- BUG_ON(sch->q.qlen);
q->highest_prio = 0;
q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
} else {
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 8e1e97be4df7..ee3eac338a9d 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -525,6 +525,8 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write,
return ret;
}
+static DEFINE_MUTEX(sctp_sysctl_mutex);
+
static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -549,6 +551,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
if (new_value > max || new_value < min)
return -EINVAL;
+ mutex_lock(&sctp_sysctl_mutex);
net->sctp.udp_port = new_value;
sctp_udp_sock_stop(net);
if (new_value) {
@@ -561,6 +564,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
lock_sock(sk);
sctp_sk(sk)->udp_port = htons(net->sctp.udp_port);
release_sock(sk);
+ mutex_unlock(&sctp_sysctl_mutex);
}
return ret;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7e3db87ae433..fc6afbc8d680 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1551,7 +1551,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
+ /* If the socket is already closing or it is in an error state, there
+ * is no point in waiting.
+ */
+ while (sk->sk_state != TCP_ESTABLISHED &&
+ sk->sk_state != TCP_CLOSING && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection