summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel Begunkov <asml.silence@gmail.com>2025-02-14 16:09:40 -0800
committerJens Axboe <axboe@kernel.dk>2025-02-17 05:41:09 -0700
commitdb070446f5af8c7a384b89367a10cddbf5498717 (patch)
tree59d6b852d0d3c94c11f278fcc1d313c9ce84b855
parent34a3e60821ab9f335a58d43a88cccdbefdebdec3 (diff)
io_uring/zcrx: dma-map area for the device
Setup DMA mappings for the area into which we intend to receive data later on. We know the device we want to attach to even before we get a page pool and can pre-map in advance. All net_iov are synchronised for device when allocated, see page_pool_mp_return_in_cache(). Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: David Wei <dw@davidwei.uk> Acked-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20250215000947.789731-6-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--io_uring/zcrx.c82
-rw-r--r--io_uring/zcrx.h1
2 files changed, 82 insertions, 1 deletions
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index f9e924cfa829..9d14fdf7a568 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/dma-map-ops.h>
#include <linux/mm.h>
#include <linux/nospec.h>
#include <linux/io_uring.h>
@@ -21,6 +22,73 @@
#include "zcrx.h"
#include "rsrc.h"
+#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_area *area, int nr_mapped)
+{
+ int i;
+
+ for (i = 0; i < nr_mapped; i++) {
+ struct net_iov *niov = &area->nia.niovs[i];
+ dma_addr_t dma;
+
+ dma = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
+ dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ net_mp_niov_set_dma_addr(niov, 0);
+ }
+}
+
+static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ if (area->is_mapped)
+ __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs);
+}
+
+static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+ int i;
+
+ for (i = 0; i < area->nia.num_niovs; i++) {
+ struct net_iov *niov = &area->nia.niovs[i];
+ dma_addr_t dma;
+
+ dma = dma_map_page_attrs(ifq->dev, area->pages[i], 0, PAGE_SIZE,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ if (dma_mapping_error(ifq->dev, dma))
+ break;
+ if (net_mp_niov_set_dma_addr(niov, dma)) {
+ dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ break;
+ }
+ }
+
+ if (i != area->nia.num_niovs) {
+ __io_zcrx_unmap_area(ifq, area, i);
+ return -EINVAL;
+ }
+
+ area->is_mapped = true;
+ return 0;
+}
+
+static void io_zcrx_sync_for_device(const struct page_pool *pool,
+ struct net_iov *niov)
+{
+#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
+ dma_addr_t dma_addr;
+
+ if (!dma_dev_need_sync(pool->p.dev))
+ return;
+
+ dma_addr = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
+ __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
+ PAGE_SIZE, pool->p.dma_dir);
+#endif
+}
+
#define IO_RQ_MAX_ENTRIES 32768
__maybe_unused
@@ -83,6 +151,8 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
static void io_zcrx_free_area(struct io_zcrx_area *area)
{
+ io_zcrx_unmap_area(area->ifq, area);
+
kvfree(area->freelist);
kvfree(area->nia.niovs);
kvfree(area->user_refs);
@@ -272,6 +342,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
return -EOPNOTSUPP;
get_device(ifq->dev);
+ ret = io_zcrx_map_area(ifq, ifq->area);
+ if (ret)
+ goto err;
+
reg.offsets.rqes = sizeof(struct io_uring);
reg.offsets.head = offsetof(struct io_uring, head);
reg.offsets.tail = offsetof(struct io_uring, tail);
@@ -422,6 +496,7 @@ static void io_zcrx_ring_refill(struct page_pool *pp,
continue;
}
+ io_zcrx_sync_for_device(pp, niov);
net_mp_netmem_place_in_cache(pp, netmem);
} while (--entries);
@@ -439,6 +514,7 @@ static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq)
netmem_ref netmem = net_iov_to_netmem(niov);
net_mp_niov_set_page_pool(pp, niov);
+ io_zcrx_sync_for_device(pp, niov);
net_mp_netmem_place_in_cache(pp, netmem);
}
spin_unlock_bh(&area->freelist_lock);
@@ -482,10 +558,14 @@ static int io_pp_zc_init(struct page_pool *pp)
if (WARN_ON_ONCE(!ifq))
return -EINVAL;
- if (pp->dma_map)
+ if (WARN_ON_ONCE(ifq->dev != pp->p.dev))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!pp->dma_map))
return -EOPNOTSUPP;
if (pp->p.order != 0)
return -EOPNOTSUPP;
+ if (pp->p.dma_dir != DMA_FROM_DEVICE)
+ return -EOPNOTSUPP;
percpu_ref_get(&ifq->ctx->refs);
return 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 6c808240ac91..1b6363591f72 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -11,6 +11,7 @@ struct io_zcrx_area {
struct io_zcrx_ifq *ifq;
atomic_t *user_refs;
+ bool is_mapped;
u16 area_id;
struct page **pages;