summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c31
-rw-r--r--net/core/devmem.c93
-rw-r--r--net/core/devmem.h49
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/net-sysfs.c392
-rw-r--r--net/core/netdev-genl.c11
-rw-r--r--net/core/netdev_rx_queue.c69
-rw-r--r--net/core/page_pool.c51
-rw-r--r--net/core/page_pool_user.c7
-rw-r--r--net/core/rtnetlink.c5
10 files changed, 509 insertions, 208 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..d5ab9a4b318e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -159,6 +159,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <net/rps.h>
#include <linux/phy_link_topology.h>
@@ -6119,16 +6120,18 @@ EXPORT_SYMBOL(netif_receive_skb_list);
static void flush_backlog(struct work_struct *work)
{
struct sk_buff *skb, *tmp;
+ struct sk_buff_head list;
struct softnet_data *sd;
+ __skb_queue_head_init(&list);
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
- dev_kfree_skb_irq(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
@@ -6136,14 +6139,16 @@ static void flush_backlog(struct work_struct *work)
local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
local_bh_enable();
+
+ __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY);
}
static bool flush_required(int cpu)
@@ -7071,6 +7076,9 @@ void __netif_napi_del_locked(struct napi_struct *napi)
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
+ /* Make sure NAPI is disabled (or was never enabled). */
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
+
if (napi->config) {
napi->index = -1;
napi->config = NULL;
@@ -11738,6 +11746,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
}
EXPORT_SYMBOL(unregister_netdevice_queue);
+static void dev_memory_provider_uninstall(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ struct netdev_rx_queue *rxq = &dev->_rx[i];
+ struct pp_memory_provider_params *p = &rxq->mp_params;
+
+ if (p->mp_ops && p->mp_ops->uninstall)
+ p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
+ }
+}
+
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh)
{
@@ -11792,7 +11813,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
- dev_dmabuf_uninstall(dev);
+ dev_memory_provider_uninstall(dev);
netdev_offload_xstats_disable_all(dev);
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 3bba3f018df0..7c6e0b5b6acb 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -16,6 +16,7 @@
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <trace/events/page_pool.h>
#include "devmem.h"
@@ -27,20 +28,28 @@
/* Protected by rtnl_lock() */
static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+static const struct memory_provider_ops dmabuf_devmem_ops;
+
+bool net_is_devmem_iov(struct net_iov *niov)
+{
+ return niov->pp->mp_ops == &dmabuf_devmem_ops;
+}
+
static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
struct gen_pool_chunk *chunk,
void *not_used)
{
struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
- kvfree(owner->niovs);
+ kvfree(owner->area.niovs);
kfree(owner);
}
static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
{
- struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+ struct dmabuf_genpool_chunk_owner *owner;
+ owner = net_devmem_iov_to_chunk_owner(niov);
return owner->base_dma_addr +
((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
}
@@ -83,7 +92,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
offset = dma_addr - owner->base_dma_addr;
index = offset / PAGE_SIZE;
- niov = &owner->niovs[index];
+ niov = &owner->area.niovs[index];
niov->pp_magic = 0;
niov->pp = NULL;
@@ -94,7 +103,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
void net_devmem_free_dmabuf(struct net_iov *niov)
{
- struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
+ struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov);
unsigned long dma_addr = net_devmem_get_dma_addr(niov);
if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
@@ -117,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
WARN_ON(rxq->mp_params.mp_priv != binding);
rxq->mp_params.mp_priv = NULL;
+ rxq->mp_params.mp_ops = NULL;
rxq_idx = get_netdev_rx_queue_index(rxq);
@@ -152,7 +162,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
}
rxq = __netif_get_rx_queue(dev, rxq_idx);
- if (rxq->mp_params.mp_priv) {
+ if (rxq->mp_params.mp_ops) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
return -EEXIST;
}
@@ -170,6 +180,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return err;
rxq->mp_params.mp_priv = binding;
+ rxq->mp_params.mp_ops = &dmabuf_devmem_ops;
err = netdev_rx_queue_restart(dev, rxq_idx);
if (err)
@@ -179,6 +190,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
err_xa_erase:
rxq->mp_params.mp_priv = NULL;
+ rxq->mp_params.mp_ops = NULL;
xa_erase(&binding->bound_rxqs, xa_idx);
return err;
@@ -261,9 +273,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
- owner->base_virtual = virtual;
+ owner->area.base_virtual = virtual;
owner->base_dma_addr = dma_addr;
- owner->num_niovs = len / PAGE_SIZE;
+ owner->area.num_niovs = len / PAGE_SIZE;
owner->binding = binding;
err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
@@ -275,17 +287,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
goto err_free_chunks;
}
- owner->niovs = kvmalloc_array(owner->num_niovs,
- sizeof(*owner->niovs),
- GFP_KERNEL);
- if (!owner->niovs) {
+ owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
+ sizeof(*owner->area.niovs),
+ GFP_KERNEL);
+ if (!owner->area.niovs) {
err = -ENOMEM;
goto err_free_chunks;
}
- for (i = 0; i < owner->num_niovs; i++) {
- niov = &owner->niovs[i];
- niov->owner = owner;
+ for (i = 0; i < owner->area.num_niovs; i++) {
+ niov = &owner->area.niovs[i];
+ niov->owner = &owner->area;
page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
net_devmem_get_dma_addr(niov));
}
@@ -313,26 +325,6 @@ err_put_dmabuf:
return ERR_PTR(err);
}
-void dev_dmabuf_uninstall(struct net_device *dev)
-{
- struct net_devmem_dmabuf_binding *binding;
- struct netdev_rx_queue *rxq;
- unsigned long xa_idx;
- unsigned int i;
-
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- binding = dev->_rx[i].mp_params.mp_priv;
- if (!binding)
- continue;
-
- xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
- if (rxq == &dev->_rx[i]) {
- xa_erase(&binding->bound_rxqs, xa_idx);
- break;
- }
- }
-}
-
/*** "Dmabuf devmem memory provider" ***/
int mp_dmabuf_devmem_init(struct page_pool *pool)
@@ -398,3 +390,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
/* We don't want the page pool put_page()ing our net_iovs. */
return false;
}
+
+static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp,
+ struct netdev_rx_queue *rxq)
+{
+ const struct net_devmem_dmabuf_binding *binding = mp_priv;
+ int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF;
+
+ return nla_put_u32(rsp, type, binding->id);
+}
+
+static void mp_dmabuf_devmem_uninstall(void *mp_priv,
+ struct netdev_rx_queue *rxq)
+{
+ struct net_devmem_dmabuf_binding *binding = mp_priv;
+ struct netdev_rx_queue *bound_rxq;
+ unsigned long xa_idx;
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
+ if (bound_rxq == rxq) {
+ xa_erase(&binding->bound_rxqs, xa_idx);
+ break;
+ }
+ }
+}
+
+static const struct memory_provider_ops dmabuf_devmem_ops = {
+ .init = mp_dmabuf_devmem_init,
+ .destroy = mp_dmabuf_devmem_destroy,
+ .alloc_netmems = mp_dmabuf_devmem_alloc_netmems,
+ .release_netmem = mp_dmabuf_devmem_release_page,
+ .nl_fill = mp_dmabuf_devmem_nl_fill,
+ .uninstall = mp_dmabuf_devmem_uninstall,
+};
diff --git a/net/core/devmem.h b/net/core/devmem.h
index 76099ef9c482..7fc158d52729 100644
--- a/net/core/devmem.h
+++ b/net/core/devmem.h
@@ -10,6 +10,8 @@
#ifndef _NET_DEVMEM_H
#define _NET_DEVMEM_H
+#include <net/netmem.h>
+
struct netlink_ext_ack;
struct net_devmem_dmabuf_binding {
@@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding {
* allocations from this chunk.
*/
struct dmabuf_genpool_chunk_owner {
- /* Offset into the dma-buf where this chunk starts. */
- unsigned long base_virtual;
+ struct net_iov_area area;
+ struct net_devmem_dmabuf_binding *binding;
/* dma_addr of the start of the chunk. */
dma_addr_t base_dma_addr;
-
- /* Array of net_iovs for this chunk. */
- struct net_iov *niovs;
- size_t num_niovs;
-
- struct net_devmem_dmabuf_binding *binding;
};
void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
@@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding,
struct netlink_ext_ack *extack);
-void dev_dmabuf_uninstall(struct net_device *dev);
static inline struct dmabuf_genpool_chunk_owner *
-net_iov_owner(const struct net_iov *niov)
+net_devmem_iov_to_chunk_owner(const struct net_iov *niov)
{
- return niov->owner;
+ struct net_iov_area *owner = net_iov_owner(niov);
+
+ return container_of(owner, struct dmabuf_genpool_chunk_owner, area);
}
-static inline unsigned int net_iov_idx(const struct net_iov *niov)
+static inline struct net_devmem_dmabuf_binding *
+net_devmem_iov_binding(const struct net_iov *niov)
{
- return niov - net_iov_owner(niov)->niovs;
+ return net_devmem_iov_to_chunk_owner(niov)->binding;
}
-static inline struct net_devmem_dmabuf_binding *
-net_iov_binding(const struct net_iov *niov)
+static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
- return net_iov_owner(niov)->binding;
+ return net_devmem_iov_binding(niov)->id;
}
static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
{
- struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+ struct net_iov_area *owner = net_iov_owner(niov);
return owner->base_virtual +
((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
}
-static inline u32 net_iov_binding_id(const struct net_iov *niov)
-{
- return net_iov_owner(niov)->binding->id;
-}
-
static inline void
net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
{
@@ -123,6 +115,8 @@ struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
void net_devmem_free_dmabuf(struct net_iov *ppiov);
+bool net_is_devmem_iov(struct net_iov *niov);
+
#else
struct net_devmem_dmabuf_binding;
@@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return -EOPNOTSUPP;
}
-static inline void dev_dmabuf_uninstall(struct net_device *dev)
-{
-}
-
static inline struct net_iov *
net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
{
@@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
return 0;
}
-static inline u32 net_iov_binding_id(const struct net_iov *niov)
+static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
{
return 0;
}
+
+static inline bool net_is_devmem_iov(struct net_iov *niov)
+{
+ return false;
+}
#endif
#endif /* _NET_DEVMEM_H */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bd0251bd74a1..d8dd686b5287 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
return -ENOENT;
}
-static void neigh_parms_destroy(struct neigh_parms *parms);
-
static inline void neigh_parms_put(struct neigh_parms *parms)
{
if (refcount_dec_and_test(&parms->refcnt))
- neigh_parms_destroy(parms);
+ kfree(parms);
}
/*
@@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
EXPORT_SYMBOL(neigh_parms_release);
-static void neigh_parms_destroy(struct neigh_parms *parms)
-{
- kfree(parms);
-}
-
static struct lock_class_key neigh_table_proxy_queue_class;
static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 07cb99b114bd..3fe2c521e574 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev)
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
}
+/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
+ * when unregistering a net device and accessing associated sysfs files. The
+ * potential deadlock is as follow:
+ *
+ * CPU 0 CPU 1
+ *
+ * rtnl_lock vfs_read
+ * unregister_netdevice_many kernfs_seq_start
+ * device_del / kobject_put kernfs_get_active (kn->active++)
+ * kernfs_drain sysfs_kf_seq_show
+ * wait_event( rtnl_lock
+ * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
+ * -> waits on CPU 1 to decrease kn->active the rtnl lock.
+ *
+ * The historical fix was to use rtnl_trylock with restart_syscall to bail out
+ * of sysfs operations when the lock couldn't be taken. This fixed the above
+ * issue as it allowed CPU 1 to bail out of the ABBA situation.
+ *
+ * But it came with performances issues, as syscalls are being restarted in
+ * loops when there was contention on the rtnl lock, with huge slow downs in
+ * specific scenarios (e.g. lots of virtual interfaces created and userspace
+ * daemons querying their attributes).
+ *
+ * The idea below is to bail out of the active kernfs_node protection
+ * (kn->active) while trying to take the rtnl lock.
+ *
+ * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
+ * net device is guaranteed to be alive if this returns successfully.
+ */
+static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
+ struct net_device *ndev)
+{
+ struct kernfs_node *kn;
+ int ret = 0;
+
+ /* First, we hold a reference to the net device as the unregistration
+ * path might run in parallel. This will ensure the net device and the
+ * associated sysfs objects won't be freed while we try to take the rtnl
+ * lock.
+ */
+ dev_hold(ndev);
+ /* sysfs_break_active_protection was introduced to allow self-removal of
+ * devices and their associated sysfs files by bailing out of the
+ * sysfs/kernfs protection. We do this here to allow the unregistration
+ * path to complete in parallel. The following takes a reference on the
+ * kobject and the kernfs_node being accessed.
+ *
+ * This works because we hold a reference onto the net device and the
+ * unregistration path will wait for us eventually in netdev_run_todo
+ * (outside an rtnl lock section).
+ */
+ kn = sysfs_break_active_protection(kobj, attr);
+ /* We can now try to take the rtnl lock. This can't deadlock us as the
+ * unregistration path is able to drain sysfs files (kernfs_node) thanks
+ * to the above dance.
+ */
+ if (rtnl_lock_interruptible()) {
+ ret = -ERESTARTSYS;
+ goto unbreak;
+ }
+ /* Check dismantle on the device hasn't started, otherwise deny the
+ * operation.
+ */
+ if (!dev_isalive(ndev)) {
+ rtnl_unlock();
+ ret = -ENODEV;
+ goto unbreak;
+ }
+ /* We are now sure the device dismantle hasn't started nor that it can
+ * start before we exit the locking section as we hold the rtnl lock.
+ * There's no need to keep unbreaking the sysfs protection nor to hold
+ * a net device reference from that point; that was only needed to take
+ * the rtnl lock.
+ */
+unbreak:
+ sysfs_unbreak_active_protection(kn);
+ dev_put(ndev);
+
+ return ret;
+}
+
/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
struct device_attribute *attr, char *buf,
@@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
if (ret)
goto err;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ goto err;
+
+ ret = (*set)(netdev, new);
+ if (ret == 0)
+ ret = len;
- if (dev_isalive(netdev)) {
- ret = (*set)(netdev, new);
- if (ret == 0)
- ret = len;
- }
rtnl_unlock();
err:
return ret;
@@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
struct net_device *netdev = to_net_dev(dev);
/* The check is also done in change_carrier; this helps returning early
- * without hitting the trylock/restart in netdev_store.
+ * without hitting the locking section in netdev_store.
*/
if (!netdev->netdev_ops->ndo_change_carrier)
return -EOPNOTSUPP;
@@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
/* Synchronize carrier state with link watch,
@@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev,
ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
}
- rtnl_unlock();
+ rtnl_unlock();
return ret;
}
static DEVICE_ATTR_RW(carrier);
@@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev,
int ret = -EINVAL;
/* The check is also done in __ethtool_get_link_ksettings; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->ethtool_ops->get_link_ksettings)
return ret;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
@@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- ret = dev_set_alias(netdev, buf, count);
- if (ret < 0)
- goto err;
- ret = len;
- netdev_state_change(netdev);
- }
+ ret = dev_set_alias(netdev, buf, count);
+ if (ret < 0)
+ goto err;
+ ret = len;
+ netdev_state_change(netdev);
err:
rtnl_unlock();
@@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ struct netdev_phys_item_id ppid;
ssize_t ret = -EINVAL;
/* The check is also done in dev_get_phys_port_id; this helps returning
- * early without hitting the trylock/restart below.
+ * early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_id)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid;
+ ret = dev_get_phys_port_id(netdev, &ppid);
+ if (!ret)
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- ret = dev_get_phys_port_id(netdev, &ppid);
- if (!ret)
- ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- }
rtnl_unlock();
return ret;
@@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ char name[IFNAMSIZ];
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below.
+ * returning early without hitting the locking section below.
*/
if (!netdev->netdev_ops->ndo_get_phys_port_name &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- char name[IFNAMSIZ];
+ ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+ if (!ret)
+ ret = sysfs_emit(buf, "%s\n", name);
- ret = dev_get_phys_port_name(netdev, name, sizeof(name));
- if (!ret)
- ret = sysfs_emit(buf, "%s\n", name);
- }
rtnl_unlock();
return ret;
@@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ struct netdev_phys_item_id ppid = { };
ssize_t ret = -EINVAL;
/* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the trylock/restart below. This works
+ * returning early without hitting the locking section below. This works
* because recurse is false when calling dev_get_port_parent_id.
*/
if (!netdev->netdev_ops->ndo_get_port_parent_id &&
!netdev->devlink_port)
return -EOPNOTSUPP;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
+ if (ret)
+ return ret;
- if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid = { };
+ ret = dev_get_port_parent_id(netdev, &ppid, false);
+ if (!ret)
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- ret = dev_get_port_parent_id(netdev, &ppid, false);
- if (!ret)
- ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
- }
rtnl_unlock();
return ret;
@@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type rx_queue_ktype = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
- .default_groups = rx_queue_default_groups,
.namespace = rx_queue_namespace,
.get_ownership = rx_queue_get_ownership,
};
@@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Rx queues are cleared in rx_queue_release to allow later
+ * re-registration. This is triggered when their kobj refcount is
+ * dropped.
+ *
+ * If a queue is removed while both a read (or write) operation and a
+ * the re-addition of the same queue are pending (waiting on rntl_lock)
+ * it might happen that the re-addition will execute before the read,
+ * making the initial removal to never happen (queue's kobj refcount
+ * won't drop enough because of the pending read). In such rare case,
+ * return to allow the removal operation to complete.
+ */
+ if (unlikely(kobj->state_initialized)) {
+ netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
+ return -EAGAIN;
+ }
+
/* Kobject_put later will trigger rx_queue_release call which
* decreases dev refcount: Take that reference here
*/
@@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
+ queue->groups = rx_queue_default_groups;
+ error = sysfs_create_groups(kobj, queue->groups);
+ if (error)
+ goto err;
+
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
- goto err;
+ goto err_default_groups;
}
error = rx_queue_default_mask(dev, queue);
if (error)
- goto err;
+ goto err_default_groups;
kobject_uevent(kobj, KOBJ_ADD);
return error;
+err_default_groups:
+ sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
}
while (--i >= new_num) {
- struct kobject *kobj = &dev->_rx[i].kobj;
+ struct netdev_rx_queue *queue = &dev->_rx[i];
+ struct kobject *kobj = &queue->kobj;
if (!refcount_read(&dev_net(dev)->ns.count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
+ sysfs_remove_groups(kobj, queue->groups);
kobject_put(kobj);
}
@@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num,
*/
struct netdev_queue_attribute {
struct attribute attr;
- ssize_t (*show)(struct netdev_queue *queue, char *buf);
- ssize_t (*store)(struct netdev_queue *queue,
- const char *buf, size_t len);
+ ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len);
};
#define to_netdev_queue_attr(_attr) \
container_of(_attr, struct netdev_queue_attribute, attr)
@@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj,
if (!attribute->show)
return -EIO;
- return attribute->show(queue, buf);
+ return attribute->show(kobj, attr, queue, buf);
}
static ssize_t netdev_queue_attr_store(struct kobject *kobj,
@@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
if (!attribute->store)
return -EIO;
- return attribute->store(queue, buf, count);
+ return attribute->store(kobj, attr, queue, buf, count);
}
static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
-static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
+static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
@@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
return i;
}
-static ssize_t traffic_class_show(struct netdev_queue *queue,
- char *buf)
+static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
- int num_tc, tc;
- int index;
+ int num_tc, tc, index, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
+ if (ret)
+ return ret;
index = get_netdev_queue_index(queue);
@@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static ssize_t tx_maxrate_show(struct netdev_queue *queue,
- char *buf)
+static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
}
-static ssize_t tx_maxrate_store(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
- struct net_device *dev = queue->dev;
int err, index = get_netdev_queue_index(queue);
+ struct net_device *dev = queue->dev;
u32 rate = 0;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* The check is also done later; this helps returning early without
- * hitting the trylock/restart below.
+ * hitting the locking section below.
*/
if (!dev->netdev_ops->ndo_set_tx_maxrate)
return -EOPNOTSUPP;
@@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (err < 0)
return err;
- if (!rtnl_trylock())
- return restart_syscall();
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err)
+ return err;
err = -EOPNOTSUPP;
if (dev->netdev_ops->ndo_set_tx_maxrate)
err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
- rtnl_unlock();
if (!err) {
queue->tx_maxrate = rate;
+ rtnl_unlock();
return len;
}
+
+ rtnl_unlock();
return err;
}
@@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count,
return count;
}
-static ssize_t bql_show_hold_time(struct netdev_queue *queue,
- char *buf)
+static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
}
-static ssize_t bql_set_hold_time(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
= __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
-static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
}
-static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct dql *dql = &queue->dql;
unsigned int value;
@@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
__ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
-static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
}
-static ssize_t bql_set_stall_max(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
WRITE_ONCE(queue->dql.stall_max, 0);
return len;
@@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue,
static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
__ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
-static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
+static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
__ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
-static ssize_t bql_show_inflight(struct netdev_queue *queue,
- char *buf)
+static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct dql *dql = &queue->dql;
@@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
__ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
-static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
- char *buf) \
+static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
+ struct attribute *attr, \
+ struct netdev_queue *queue, char *buf) \
{ \
return bql_show(buf, queue->dql.FIELD); \
} \
\
-static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
+static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
+ struct attribute *attr, \
+ struct netdev_queue *queue, \
const char *buf, size_t len) \
{ \
return bql_set(buf, len, &queue->dql.FIELD); \
@@ -1613,19 +1733,21 @@ out_no_maps:
return len < PAGE_SIZE ? len : -EINVAL;
}
-static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
+static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
- int len, tc;
+ int len, tc, ret;
if (!netif_is_multiqueue(dev))
return -ENOENT;
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
+ if (ret)
+ return ret;
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
@@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
return -EINVAL;
}
- /* Make sure the subordinate device can't be freed */
- get_device(&dev->dev);
+ /* Increase the net device refcnt to make sure it won't be freed while
+ * xps_queue_show is running.
+ */
+ dev_hold(dev);
rtnl_unlock();
len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
- put_device(&dev->dev);
+ dev_put(dev);
return len;
}
-static ssize_t xps_cpus_store(struct netdev_queue *queue,
- const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
+ size_t len)
{
struct net_device *dev = queue->dev;
unsigned int index;
@@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
- if (!rtnl_trylock()) {
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err) {
free_cpumask_var(mask);
- return restart_syscall();
+ return err;
}
err = netif_set_xps_queue(dev, mask, index);
@@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
= __ATTR_RW(xps_cpus);
-static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, char *buf)
{
struct net_device *dev = queue->dev;
unsigned int index;
- int tc;
+ int tc, ret;
index = get_netdev_queue_index(queue);
- if (!rtnl_trylock())
- return restart_syscall();
+ ret = sysfs_rtnl_lock(kobj, attr, dev);
+ if (ret)
+ return ret;
tc = netdev_txq_to_tc(dev, index);
+
+ /* Increase the net device refcnt to make sure it won't be freed while
+ * xps_queue_show is running.
+ */
+ dev_hold(dev);
rtnl_unlock();
- if (tc < 0)
- return -EINVAL;
- return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
+ ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
+ dev_put(dev);
+ return ret;
}
-static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
+ struct netdev_queue *queue, const char *buf,
size_t len)
{
struct net_device *dev = queue->dev;
@@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
- if (!rtnl_trylock()) {
+ err = sysfs_rtnl_lock(kobj, attr, dev);
+ if (err) {
bitmap_free(mask);
- return restart_syscall();
+ return err;
}
cpus_read_lock();
@@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
static const struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
- .default_groups = netdev_queue_default_groups,
.namespace = netdev_queue_namespace,
.get_ownership = netdev_queue_get_ownership,
};
@@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Tx queues are cleared in netdev_queue_release to allow later
+ * re-registration. This is triggered when their kobj refcount is
+ * dropped.
+ *
+ * If a queue is removed while both a read (or write) operation and a
+ * the re-addition of the same queue are pending (waiting on rntl_lock)
+ * it might happen that the re-addition will execute before the read,
+ * making the initial removal to never happen (queue's kobj refcount
+ * won't drop enough because of the pending read). In such rare case,
+ * return to allow the removal operation to complete.
+ */
+ if (unlikely(kobj->state_initialized)) {
+ netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
+ return -EAGAIN;
+ }
+
/* Kobject_put later will trigger netdev_queue_release call
* which decreases dev refcount: Take that reference here
*/
@@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
if (error)
goto err;
+ queue->groups = netdev_queue_default_groups;
+ error = sysfs_create_groups(kobj, queue->groups);
+ if (error)
+ goto err;
+
if (netdev_uses_bql(dev)) {
error = sysfs_create_group(kobj, &dql_group);
if (error)
- goto err;
+ goto err_default_groups;
}
kobject_uevent(kobj, KOBJ_ADD);
return 0;
+err_default_groups:
+ sysfs_remove_groups(kobj, queue->groups);
err:
kobject_put(kobj);
return error;
@@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
if (netdev_uses_bql(dev))
sysfs_remove_group(&queue->kobj, &dql_group);
+ sysfs_remove_groups(&queue->kobj, queue->groups);
kobject_put(&queue->kobj);
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 715f85c6b62e..5b459b4fef46 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -10,6 +10,7 @@
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
+#include <net/page_pool/memory_provider.h>
#include "dev.h"
#include "devmem.h"
@@ -368,7 +369,7 @@ static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
- struct net_devmem_dmabuf_binding *binding;
+ struct pp_memory_provider_params *params;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@@ -385,15 +386,15 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
switch (q_type) {
case NETDEV_QUEUE_TYPE_RX:
rxq = __netif_get_rx_queue(netdev, q_idx);
+
if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
rxq->napi->napi_id))
goto nla_put_failure;
- binding = rxq->mp_params.mp_priv;
- if (binding &&
- nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
+ params = &rxq->mp_params;
+ if (params->mp_ops &&
+ params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
-
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index db82786fa0c4..db46880f37cc 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -3,6 +3,7 @@
#include <linux/netdevice.h>
#include <net/netdev_queues.h>
#include <net/netdev_rx_queue.h>
+#include <net/page_pool/memory_provider.h>
#include "page_pool_priv.h"
@@ -80,3 +81,71 @@ err_free_new_mem:
return err;
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
+
+static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p)
+{
+ struct netdev_rx_queue *rxq;
+ int ret;
+
+ if (ifq_idx >= dev->real_num_rx_queues)
+ return -EINVAL;
+ ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues);
+
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
+ if (rxq->mp_params.mp_ops)
+ return -EEXIST;
+
+ rxq->mp_params = *p;
+ ret = netdev_rx_queue_restart(dev, ifq_idx);
+ if (ret) {
+ rxq->mp_params.mp_ops = NULL;
+ rxq->mp_params.mp_priv = NULL;
+ }
+ return ret;
+}
+
+int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *p)
+{
+ int ret;
+
+ rtnl_lock();
+ ret = __net_mp_open_rxq(dev, ifq_idx, p);
+ rtnl_unlock();
+ return ret;
+}
+
+static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p)
+{
+ struct netdev_rx_queue *rxq;
+
+ if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
+ return;
+
+ rxq = __netif_get_rx_queue(dev, ifq_idx);
+
+ /* Callers holding a netdev ref may get here after we already
+ * went thru shutdown via dev_memory_provider_uninstall().
+ */
+ if (dev->reg_state > NETREG_REGISTERED &&
+ !rxq->mp_params.mp_ops)
+ return;
+
+ if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops ||
+ rxq->mp_params.mp_priv != old_p->mp_priv))
+ return;
+
+ rxq->mp_params.mp_ops = NULL;
+ rxq->mp_params.mp_priv = NULL;
+ WARN_ON(netdev_rx_queue_restart(dev, ifq_idx));
+}
+
+void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
+ struct pp_memory_provider_params *old_p)
+{
+ rtnl_lock();
+ __net_mp_close_rxq(dev, ifq_idx, old_p);
+ rtnl_unlock();
+}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f5e908c9e7ad..686bd4a117d9 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -13,6 +13,7 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/memory_provider.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@@ -285,13 +286,19 @@ static int page_pool_init(struct page_pool *pool,
rxq = __netif_get_rx_queue(pool->slow.netdev,
pool->slow.queue_idx);
pool->mp_priv = rxq->mp_params.mp_priv;
+ pool->mp_ops = rxq->mp_params.mp_ops;
}
- if (pool->mp_priv) {
+ if (pool->mp_ops) {
if (!pool->dma_map || !pool->dma_sync)
return -EOPNOTSUPP;
- err = mp_dmabuf_devmem_init(pool);
+ if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) {
+ err = -EFAULT;
+ goto free_ptr_ring;
+ }
+
+ err = pool->mp_ops->init(pool);
if (err) {
pr_warn("%s() mem-provider init failed %d\n", __func__,
err);
@@ -587,8 +594,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
- if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
- netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+ netmem = pool->mp_ops->alloc_netmems(pool, gfp);
else
netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
@@ -679,8 +686,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
bool put;
put = true;
- if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
- put = mp_dmabuf_devmem_release_page(pool, netmem);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+ put = pool->mp_ops->release_netmem(pool, netmem);
else
__page_pool_release_page_dma(pool, netmem);
@@ -1048,8 +1055,8 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
- if (pool->mp_priv) {
- mp_dmabuf_devmem_destroy(pool);
+ if (pool->mp_ops) {
+ pool->mp_ops->destroy(pool);
static_branch_dec(&page_pool_mem_providers);
}
@@ -1190,3 +1197,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
+
+bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr);
+}
+
+/* Associate a niov with a page pool. Should follow with a matching
+ * net_mp_niov_clear_page_pool()
+ */
+void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov)
+{
+ netmem_ref netmem = net_iov_to_netmem(niov);
+
+ page_pool_set_pp_info(pool, netmem);
+
+ pool->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
+}
+
+/* Disassociate a niov from a page pool. Should only be used in the
+ * ->release_netmem() path.
+ */
+void net_mp_niov_clear_page_pool(struct net_iov *niov)
+{
+ netmem_ref netmem = net_iov_to_netmem(niov);
+
+ page_pool_clear_pp_info(netmem);
+}
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 6677e0c2e256..9d8a3d8597fa 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -8,9 +8,9 @@
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/types.h>
+#include <net/page_pool/memory_provider.h>
#include <net/sock.h>
-#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@@ -216,7 +216,6 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
- struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
unsigned int napi_id;
void *hdr;
@@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
- if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
+ if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
goto err_cancel;
genlmsg_end(rsp, hdr);
@@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool)
int page_pool_check_memory_provider(struct net_device *dev,
struct netdev_rx_queue *rxq)
{
- struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ void *binding = rxq->mp_params.mp_priv;
struct page_pool *pool;
struct hlist_node *n;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1e559fce918..abe1a461ea67 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -80,6 +80,11 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_interruptible(void)
+{
+ return mutex_lock_interruptible(&rtnl_mutex);
+}
+
int rtnl_lock_killable(void)
{
return mutex_lock_killable(&rtnl_mutex);