diff options
Diffstat (limited to 'io_uring/kbuf.c')
-rw-r--r-- | io_uring/kbuf.c | 200 |
1 files changed, 88 insertions, 112 deletions
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 8e72de7712ac..098109259671 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -20,7 +20,8 @@ /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) -struct kmem_cache *io_buf_cachep; +/* Mapped buffer ring, return io_uring_buf from head */ +#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] struct io_provide_buf { struct file *file; @@ -31,6 +32,41 @@ struct io_provide_buf { __u16 bid; }; +static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) +{ + while (len) { + struct io_uring_buf *buf; + u32 this_len; + + buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); + this_len = min_t(int, len, buf->len); + buf->len -= this_len; + if (buf->len) { + buf->addr += this_len; + return false; + } + bl->head++; + len -= this_len; + } + return true; +} + +bool io_kbuf_commit(struct io_kiocb *req, + struct io_buffer_list *bl, int len, int nr) +{ + if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT))) + return true; + + req->flags &= ~REQ_F_BUFFERS_COMMIT; + + if (unlikely(len < 0)) + return true; + if (bl->flags & IOBL_INC) + return io_kbuf_inc_commit(bl, len); + bl->head += nr; + return true; +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { @@ -52,6 +88,16 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } +void io_kbuf_drop_legacy(struct io_kiocb *req) +{ + if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED))) + return; + req->buf_index = req->kbuf->bgid; + req->flags &= ~REQ_F_BUFFER_SELECTED; + kfree(req->kbuf); + req->kbuf = NULL; +} + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; @@ -70,33 +116,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) return true; } -void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) -{ - /* - * We can add this buffer back to two lists: - * - * 1) The io_buffers_cache list. This one is protected by the - * ctx->uring_lock. If we already hold this lock, add back to this - * list as we can grab it from issue as well. - * 2) The io_buffers_comp list. This one is protected by the - * ctx->completion_lock. - * - * We migrate buffers from the comp_list to the issue cache list - * when we need one. - */ - if (issue_flags & IO_URING_F_UNLOCKED) { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock(&ctx->completion_lock); - __io_put_kbuf_list(req, len, &ctx->io_buffers_comp); - spin_unlock(&ctx->completion_lock); - } else { - lockdep_assert_held(&req->ctx->uring_lock); - - __io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache); - } -} - static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl) { @@ -214,25 +233,14 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { u32 len = READ_ONCE(buf->len); + size_t needed; if (unlikely(!len)) return -ENOBUFS; - /* - * Limit incremental buffers to 1 segment. No point trying - * to peek ahead and map more than we need, when the buffers - * themselves should be large when setup with - * IOU_PBUF_RING_INC. - */ - if (bl->flags & IOBL_INC) { - nr_avail = 1; - } else { - size_t needed; - - needed = (arg->max_len + len - 1) / len; - needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); - if (nr_avail > needed) - nr_avail = needed; - } + needed = (arg->max_len + len - 1) / len; + needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); + if (nr_avail > needed) + nr_avail = needed; } /* @@ -342,6 +350,35 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg) return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); } +static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) +{ + struct io_buffer_list *bl = req->buf_list; + bool ret = true; + + if (bl) { + ret = io_kbuf_commit(req, bl, len, nr); + req->buf_index = bl->bgid; + } + req->flags &= ~REQ_F_BUFFER_RING; + return ret; +} + +unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs) +{ + unsigned int ret; + + ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); + + if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) { + io_kbuf_drop_legacy(req); + return ret; + } + + if (!__io_put_kbuf_ring(req, len, nbufs)) + ret |= IORING_CQE_F_BUF_MORE; + return ret; +} + static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned nbufs) { @@ -367,7 +404,9 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *nxt; nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); - list_move(&nxt->list, &ctx->io_buffers_cache); + list_del(&nxt->list); + kfree(nxt); + if (++i == nbufs) return i; cond_resched(); @@ -385,8 +424,6 @@ static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; - struct list_head *item, *tmp; - struct io_buffer *buf; while (1) { unsigned long index = 0; @@ -400,19 +437,6 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) break; io_put_bl(ctx, bl); } - - /* - * Move deferred locked entries to cache before pruning - */ - spin_lock(&ctx->completion_lock); - if (!list_empty(&ctx->io_buffers_comp)) - list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache); - spin_unlock(&ctx->completion_lock); - - list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { - buf = list_entry(item, struct io_buffer, list); - kmem_cache_free(io_buf_cachep, buf); - } } static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) @@ -501,53 +525,6 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return 0; } -#define IO_BUFFER_ALLOC_BATCH 64 - -static int io_refill_buffer_cache(struct io_ring_ctx *ctx) -{ - struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; - int allocated; - - /* - * Completions that don't happen inline (eg not under uring_lock) will - * add to ->io_buffers_comp. If we don't have any free buffers, check - * the completion list and splice those entries first. - */ - if (!list_empty_careful(&ctx->io_buffers_comp)) { - spin_lock(&ctx->completion_lock); - if (!list_empty(&ctx->io_buffers_comp)) { - list_splice_init(&ctx->io_buffers_comp, - &ctx->io_buffers_cache); - spin_unlock(&ctx->completion_lock); - return 0; - } - spin_unlock(&ctx->completion_lock); - } - - /* - * No free buffers and no completion entries either. Allocate a new - * batch of buffer entries and add those to our freelist. - */ - - allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, - ARRAY_SIZE(bufs), (void **) bufs); - if (unlikely(!allocated)) { - /* - * Bulk alloc is all-or-nothing. If we fail to get a batch, - * retry single alloc to be on the safe side. - */ - bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); - if (!bufs[0]) - return -ENOMEM; - allocated = 1; - } - - while (allocated) - list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); - - return 0; -} - static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, struct io_buffer_list *bl) { @@ -556,12 +533,11 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, int i, bid = pbuf->bid; for (i = 0; i < pbuf->nbufs; i++) { - if (list_empty(&ctx->io_buffers_cache) && - io_refill_buffer_cache(ctx)) + buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); + if (!buf) break; - buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, - list); - list_move_tail(&buf->list, &bl->buf_list); + + list_add_tail(&buf->list, &bl->buf_list); buf->addr = addr; buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); buf->bid = bid; |