diff options
author | Jakub Kicinski <kuba@kernel.org> | 2025-02-07 17:21:05 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2025-02-07 17:21:06 -0800 |
commit | acdefab0dcbc3833b5a734ab80d792bb778517a0 (patch) | |
tree | 173c9371158b93ff7588f9bc2000299abda2a83d | |
parent | 6a0ca73e5144a5d1c1f84cbfd96f4bc656c2ae6c (diff) | |
parent | 285b3f78eabd951e59e98f01f86abaaa6c76cd44 (diff) |
Merge branch 'net-improve-core-queue-api-handling-while-device-is-down'
Jakub Kicinski says:
====================
net: improve core queue API handling while device is down
The core netdev_rx_queue_restart() doesn't currently take into account
that the device may be down. The current and proposed queue API
implementations deal with this by rejecting queue API calls while
the device is down. We can do better, in theory we can still allow
devmem binding when the device is down - we shouldn't stop and start
the queues just try to allocate the memory. The reason we allocate
the memory is that memory provider binding checks if any compatible
page pool has been created (page_pool_check_memory_provider()).
Alternatively we could reject installing MP while the device is down
but the MP assignment survives ifdown (so presumably MP doesn't cease
to exist while down), and in general we allow configuration while down.
Previously I thought we need this as a fix, but gve rejects page pool
calls while down, and so did Saeed in the patches he posted. So this
series just makes the core act more sensibly but practically should
be a noop for now.
v1: https://lore.kernel.org/20250205190131.564456-1-kuba@kernel.org
====================
Link: https://patch.msgid.link/20250206225638.1387810-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | drivers/net/netdevsim/netdev.c | 10 | ||||
-rw-r--r-- | include/net/netdev_queues.h | 4 | ||||
-rw-r--r-- | net/core/dev.h | 12 | ||||
-rw-r--r-- | net/core/netdev_rx_queue.c | 37 | ||||
-rw-r--r-- | net/core/page_pool.c | 7 | ||||
-rwxr-xr-x | tools/testing/selftests/net/nl_netdev.py | 18 |
6 files changed, 59 insertions, 29 deletions
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 42f247cbdcee..9b394ddc5206 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -645,8 +645,11 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) if (ns->rq_reset_mode > 3) return -EINVAL; - if (ns->rq_reset_mode == 1) + if (ns->rq_reset_mode == 1) { + if (!netif_running(ns->netdev)) + return -ENETDOWN; return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + } qmem->rq = nsim_queue_alloc(); if (!qmem->rq) @@ -754,11 +757,6 @@ nsim_qreset_write(struct file *file, const char __user *data, return -EINVAL; rtnl_lock(); - if (!netif_running(ns->netdev)) { - ret = -ENETDOWN; - goto exit_unlock; - } - if (queue >= ns->netdev->real_num_rx_queues) { ret = -EINVAL; goto exit_unlock; diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index b02bb9f109d5..73d3401261a6 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -117,6 +117,10 @@ struct netdev_stat_ops { * * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped * queue's memory is written at the specified address. + * + * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while + * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only + * be called for an interface which is open. */ struct netdev_queue_mgmt_ops { size_t ndo_queue_mem_size; diff --git a/net/core/dev.h b/net/core/dev.h index a5b166bbd169..caa13e431a6b 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -299,6 +299,18 @@ void xdp_do_check_flushed(struct napi_struct *napi); static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif +/* Best effort check that NAPI is not idle (can't be scheduled to run) */ +static inline void napi_assert_will_not_race(const struct napi_struct *napi) +{ + /* uninitialized instance, can't race */ + if (!napi->poll_list.next) + return; + + /* SCHED bit is set on disabled instances */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + WARN_ON(READ_ONCE(napi->list_owner) != -1); +} + void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index db46880f37cc..ddd54e1e7289 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -10,28 +10,27 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) { struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx); + const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; void *new_mem, *old_mem; int err; - if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop || - !dev->queue_mgmt_ops->ndo_queue_mem_free || - !dev->queue_mgmt_ops->ndo_queue_mem_alloc || - !dev->queue_mgmt_ops->ndo_queue_start) + if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free || + !qops->ndo_queue_mem_alloc || !qops->ndo_queue_start) return -EOPNOTSUPP; ASSERT_RTNL(); - new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!new_mem) return -ENOMEM; - old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!old_mem) { err = -ENOMEM; goto err_free_new_mem; } - err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); + err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); if (err) goto err_free_old_mem; @@ -39,15 +38,19 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) if (err) goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx); - if (err) - goto err_free_new_queue_mem; + if (netif_running(dev)) { + err = qops->ndo_queue_stop(dev, old_mem, rxq_idx); + if (err) + goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx); - if (err) - goto err_start_queue; + err = qops->ndo_queue_start(dev, new_mem, rxq_idx); + if (err) + goto err_start_queue; + } else { + swap(new_mem, old_mem); + } - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); kvfree(old_mem); kvfree(new_mem); @@ -62,15 +65,15 @@ err_start_queue: * WARN if we fail to recover the old rx queue, and at least free * old_mem so we don't also leak that. */ - if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) { + if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) { WARN(1, "Failed to restart old queue in error path. RX queue %d may be unhealthy.", rxq_idx); - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); } err_free_new_queue_mem: - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem); + qops->ndo_queue_mem_free(dev, new_mem); err_free_old_mem: kvfree(old_mem); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 686bd4a117d9..1c6fec08bc43 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -26,6 +26,7 @@ #include <trace/events/page_pool.h> +#include "dev.h" #include "mp_dmabuf_devmem.h" #include "netmem_priv.h" #include "page_pool_priv.h" @@ -1147,11 +1148,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) if (!pool->p.napi) return; - /* To avoid races with recycling and additional barriers make sure - * pool and NAPI are unlinked when NAPI is disabled. - */ - WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); - WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + napi_assert_will_not_race(pool->p.napi); mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93e8cb671c3d..beaee5e4e2aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,21 @@ def napi_list_check(nf) -> None: comment=f"queue count after reset queue {q} mode {i}") +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -106,7 +121,8 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, + nsim_rxq_reset_down], args=(nf, )) ksft_exit() |