diff options
author | Pavel Begunkov <asml.silence@gmail.com> | 2024-11-29 13:34:31 +0000 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2024-12-23 08:17:16 -0700 |
commit | 1e21df691ffa2c277e0b1a4928c9da0e86e9a2be (patch) | |
tree | 4da08ab96df80e557a6f9ef8d7a9db9c7300cab8 | |
parent | 4b851d20d325dc59f9abdce55d42dc4b68179db0 (diff) |
io_uring/memmap: implement kernel allocated regions
Allow the kernel to allocate memory for a region. That's the classical
way SQ/CQ are allocated. It's not yet useful to user space as there
is no way to mmap it, which is why it's explicitly disabled in
io_register_mem_region().
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7b8c40e6542546bbf93f4842a9a42a7373b81e0d.1732886067.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | io_uring/memmap.c | 43 | ||||
-rw-r--r-- | io_uring/register.c | 2 |
2 files changed, 42 insertions, 3 deletions
diff --git a/io_uring/memmap.c b/io_uring/memmap.c index a37ccb167258..0908a71bf57e 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -273,6 +273,39 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx, return 0; } +static int io_region_allocate_pages(struct io_ring_ctx *ctx, + struct io_mapped_region *mr, + struct io_uring_region_desc *reg) +{ + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; + unsigned long size = mr->nr_pages << PAGE_SHIFT; + unsigned long nr_allocated; + struct page **pages; + void *p; + + pages = kvmalloc_array(mr->nr_pages, sizeof(*pages), gfp); + if (!pages) + return -ENOMEM; + + p = io_mem_alloc_compound(pages, mr->nr_pages, size, gfp); + if (!IS_ERR(p)) { + mr->flags |= IO_REGION_F_SINGLE_REF; + mr->pages = pages; + return 0; + } + + nr_allocated = alloc_pages_bulk_array_node(gfp, NUMA_NO_NODE, + mr->nr_pages, pages); + if (nr_allocated != mr->nr_pages) { + if (nr_allocated) + release_pages(pages, nr_allocated); + kvfree(pages); + return -ENOMEM; + } + mr->pages = pages; + return 0; +} + int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg) { @@ -283,9 +316,10 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, return -EFAULT; if (memchr_inv(®->__resv, 0, sizeof(reg->__resv))) return -EINVAL; - if (reg->flags != IORING_MEM_REGION_TYPE_USER) + if (reg->flags & ~IORING_MEM_REGION_TYPE_USER) return -EINVAL; - if (!reg->user_addr) + /* user_addr should be set IFF it's a user memory backed region */ + if ((reg->flags & IORING_MEM_REGION_TYPE_USER) != !!reg->user_addr) return -EFAULT; if (!reg->size || reg->mmap_offset || reg->id) return -EINVAL; @@ -304,7 +338,10 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, } mr->nr_pages = nr_pages; - ret = io_region_pin_pages(ctx, mr, reg); + if (reg->flags & IORING_MEM_REGION_TYPE_USER) + ret = io_region_pin_pages(ctx, mr, reg); + else + ret = io_region_allocate_pages(ctx, mr, reg); if (ret) goto out_free; diff --git a/io_uring/register.c b/io_uring/register.c index 77743e3f6751..47992bc1ffcb 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -589,6 +589,8 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) if (copy_from_user(&rd, rd_uptr, sizeof(rd))) return -EFAULT; + if (!(rd.flags & IORING_MEM_REGION_TYPE_USER)) + return -EINVAL; if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) return -EINVAL; if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG) |