summaryrefslogtreecommitdiff
path: root/io_uring/kbuf.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/kbuf.c')
-rw-r--r--io_uring/kbuf.c200
1 files changed, 88 insertions, 112 deletions
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 8e72de7712ac..098109259671 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -20,7 +20,8 @@
/* BIDs are addressed by a 16-bit field in a CQE */
#define MAX_BIDS_PER_BGID (1 << 16)
-struct kmem_cache *io_buf_cachep;
+/* Mapped buffer ring, return io_uring_buf from head */
+#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
struct io_provide_buf {
struct file *file;
@@ -31,6 +32,41 @@ struct io_provide_buf {
__u16 bid;
};
+static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
+{
+ while (len) {
+ struct io_uring_buf *buf;
+ u32 this_len;
+
+ buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+ this_len = min_t(int, len, buf->len);
+ buf->len -= this_len;
+ if (buf->len) {
+ buf->addr += this_len;
+ return false;
+ }
+ bl->head++;
+ len -= this_len;
+ }
+ return true;
+}
+
+bool io_kbuf_commit(struct io_kiocb *req,
+ struct io_buffer_list *bl, int len, int nr)
+{
+ if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
+ return true;
+
+ req->flags &= ~REQ_F_BUFFERS_COMMIT;
+
+ if (unlikely(len < 0))
+ return true;
+ if (bl->flags & IOBL_INC)
+ return io_kbuf_inc_commit(bl, len);
+ bl->head += nr;
+ return true;
+}
+
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
unsigned int bgid)
{
@@ -52,6 +88,16 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
}
+void io_kbuf_drop_legacy(struct io_kiocb *req)
+{
+ if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ return;
+ req->buf_index = req->kbuf->bgid;
+ req->flags &= ~REQ_F_BUFFER_SELECTED;
+ kfree(req->kbuf);
+ req->kbuf = NULL;
+}
+
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -70,33 +116,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
return true;
}
-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
-{
- /*
- * We can add this buffer back to two lists:
- *
- * 1) The io_buffers_cache list. This one is protected by the
- * ctx->uring_lock. If we already hold this lock, add back to this
- * list as we can grab it from issue as well.
- * 2) The io_buffers_comp list. This one is protected by the
- * ctx->completion_lock.
- *
- * We migrate buffers from the comp_list to the issue cache list
- * when we need one.
- */
- if (issue_flags & IO_URING_F_UNLOCKED) {
- struct io_ring_ctx *ctx = req->ctx;
-
- spin_lock(&ctx->completion_lock);
- __io_put_kbuf_list(req, len, &ctx->io_buffers_comp);
- spin_unlock(&ctx->completion_lock);
- } else {
- lockdep_assert_held(&req->ctx->uring_lock);
-
- __io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache);
- }
-}
-
static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
struct io_buffer_list *bl)
{
@@ -214,25 +233,14 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
buf = io_ring_head_to_buf(br, head, bl->mask);
if (arg->max_len) {
u32 len = READ_ONCE(buf->len);
+ size_t needed;
if (unlikely(!len))
return -ENOBUFS;
- /*
- * Limit incremental buffers to 1 segment. No point trying
- * to peek ahead and map more than we need, when the buffers
- * themselves should be large when setup with
- * IOU_PBUF_RING_INC.
- */
- if (bl->flags & IOBL_INC) {
- nr_avail = 1;
- } else {
- size_t needed;
-
- needed = (arg->max_len + len - 1) / len;
- needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
- if (nr_avail > needed)
- nr_avail = needed;
- }
+ needed = (arg->max_len + len - 1) / len;
+ needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
+ if (nr_avail > needed)
+ nr_avail = needed;
}
/*
@@ -342,6 +350,35 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
}
+static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
+{
+ struct io_buffer_list *bl = req->buf_list;
+ bool ret = true;
+
+ if (bl) {
+ ret = io_kbuf_commit(req, bl, len, nr);
+ req->buf_index = bl->bgid;
+ }
+ req->flags &= ~REQ_F_BUFFER_RING;
+ return ret;
+}
+
+unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
+{
+ unsigned int ret;
+
+ ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
+
+ if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
+ io_kbuf_drop_legacy(req);
+ return ret;
+ }
+
+ if (!__io_put_kbuf_ring(req, len, nbufs))
+ ret |= IORING_CQE_F_BUF_MORE;
+ return ret;
+}
+
static int __io_remove_buffers(struct io_ring_ctx *ctx,
struct io_buffer_list *bl, unsigned nbufs)
{
@@ -367,7 +404,9 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
struct io_buffer *nxt;
nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
- list_move(&nxt->list, &ctx->io_buffers_cache);
+ list_del(&nxt->list);
+ kfree(nxt);
+
if (++i == nbufs)
return i;
cond_resched();
@@ -385,8 +424,6 @@ static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
- struct list_head *item, *tmp;
- struct io_buffer *buf;
while (1) {
unsigned long index = 0;
@@ -400,19 +437,6 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
break;
io_put_bl(ctx, bl);
}
-
- /*
- * Move deferred locked entries to cache before pruning
- */
- spin_lock(&ctx->completion_lock);
- if (!list_empty(&ctx->io_buffers_comp))
- list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache);
- spin_unlock(&ctx->completion_lock);
-
- list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
- buf = list_entry(item, struct io_buffer, list);
- kmem_cache_free(io_buf_cachep, buf);
- }
}
static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
@@ -501,53 +525,6 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return 0;
}
-#define IO_BUFFER_ALLOC_BATCH 64
-
-static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
-{
- struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH];
- int allocated;
-
- /*
- * Completions that don't happen inline (eg not under uring_lock) will
- * add to ->io_buffers_comp. If we don't have any free buffers, check
- * the completion list and splice those entries first.
- */
- if (!list_empty_careful(&ctx->io_buffers_comp)) {
- spin_lock(&ctx->completion_lock);
- if (!list_empty(&ctx->io_buffers_comp)) {
- list_splice_init(&ctx->io_buffers_comp,
- &ctx->io_buffers_cache);
- spin_unlock(&ctx->completion_lock);
- return 0;
- }
- spin_unlock(&ctx->completion_lock);
- }
-
- /*
- * No free buffers and no completion entries either. Allocate a new
- * batch of buffer entries and add those to our freelist.
- */
-
- allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT,
- ARRAY_SIZE(bufs), (void **) bufs);
- if (unlikely(!allocated)) {
- /*
- * Bulk alloc is all-or-nothing. If we fail to get a batch,
- * retry single alloc to be on the safe side.
- */
- bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
- if (!bufs[0])
- return -ENOMEM;
- allocated = 1;
- }
-
- while (allocated)
- list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache);
-
- return 0;
-}
-
static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
struct io_buffer_list *bl)
{
@@ -556,12 +533,11 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
int i, bid = pbuf->bid;
for (i = 0; i < pbuf->nbufs; i++) {
- if (list_empty(&ctx->io_buffers_cache) &&
- io_refill_buffer_cache(ctx))
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
+ if (!buf)
break;
- buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
- list);
- list_move_tail(&buf->list, &bl->buf_list);
+
+ list_add_tail(&buf->list, &bl->buf_list);
buf->addr = addr;
buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
buf->bid = bid;