summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/io_uring.c138
-rw-r--r--include/uapi/linux/io_uring.h10
2 files changed, 145 insertions, 3 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1f3ae208f6a6..1a58f2042815 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -447,6 +447,15 @@ struct io_splice {
unsigned int flags;
};
+struct io_provide_buf {
+ struct file *file;
+ __u64 addr;
+ __s32 len;
+ __u32 bgid;
+ __u16 nbufs;
+ __u16 bid;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -572,6 +581,7 @@ struct io_kiocb {
struct io_madvise madvise;
struct io_epoll epoll;
struct io_splice splice;
+ struct io_provide_buf pbuf;
};
struct io_async_ctx *io;
@@ -799,7 +809,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
- }
+ },
+ [IORING_OP_PROVIDE_BUFFERS] = {},
};
static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2785,6 +2796,120 @@ static int io_openat(struct io_kiocb *req, bool force_nonblock)
return io_openat2(req, force_nonblock);
}
+static int io_provide_buffers_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ u64 tmp;
+
+ if (sqe->ioprio || sqe->rw_flags)
+ return -EINVAL;
+
+ tmp = READ_ONCE(sqe->fd);
+ if (!tmp || tmp > USHRT_MAX)
+ return -E2BIG;
+ p->nbufs = tmp;
+ p->addr = READ_ONCE(sqe->addr);
+ p->len = READ_ONCE(sqe->len);
+
+ if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+ return -EFAULT;
+
+ p->bgid = READ_ONCE(sqe->buf_group);
+ tmp = READ_ONCE(sqe->off);
+ if (tmp > USHRT_MAX)
+ return -E2BIG;
+ p->bid = tmp;
+ return 0;
+}
+
+static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
+{
+ struct io_buffer *buf;
+ u64 addr = pbuf->addr;
+ int i, bid = pbuf->bid;
+
+ for (i = 0; i < pbuf->nbufs; i++) {
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ break;
+
+ buf->addr = addr;
+ buf->len = pbuf->len;
+ buf->bid = bid;
+ addr += pbuf->len;
+ bid++;
+ if (!*head) {
+ INIT_LIST_HEAD(&buf->list);
+ *head = buf;
+ } else {
+ list_add_tail(&buf->list, &(*head)->list);
+ }
+ }
+
+ return i ? i : -ENOMEM;
+}
+
+static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
+{
+ if (needs_lock)
+ mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
+{
+ /*
+ * "Normal" inline submissions always hold the uring_lock, since we
+ * grab it from the system call. Same is true for the SQPOLL offload.
+ * The only exception is when we've detached the request and issue it
+ * from an async worker thread, grab the lock for that case.
+ */
+ if (needs_lock)
+ mutex_lock(&ctx->uring_lock);
+}
+
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+{
+ struct io_provide_buf *p = &req->pbuf;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer *head, *list;
+ int ret = 0;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+
+ ret = io_add_buffers(p, &head);
+ if (ret < 0)
+ goto out;
+
+ if (!list) {
+ ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
+ GFP_KERNEL);
+ if (ret < 0) {
+ while (!list_empty(&head->list)) {
+ struct io_buffer *buf;
+
+ buf = list_first_entry(&head->list,
+ struct io_buffer, list);
+ list_del(&buf->list);
+ kfree(buf);
+ }
+ kfree(head);
+ goto out;
+ }
+ }
+out:
+ io_ring_submit_unlock(ctx, !force_nonblock);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
+ return 0;
+}
+
static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -4392,6 +4517,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
case IORING_OP_SPLICE:
ret = io_splice_prep(req, sqe);
break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ ret = io_provide_buffers_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
@@ -4669,6 +4797,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
}
ret = io_splice(req, force_nonblock);
break;
+ case IORING_OP_PROVIDE_BUFFERS:
+ if (sqe) {
+ ret = io_provide_buffers_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_provide_buffers(req, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 53b36311cdac..bc34a57a660b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -45,8 +45,13 @@ struct io_uring_sqe {
__u64 user_data; /* data to be passed back at completion time */
union {
struct {
- /* index into fixed buffers, if used */
- __u16 buf_index;
+ /* pack this to avoid bogus arm OABI complaints */
+ union {
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
/* personality to use, if used */
__u16 personality;
__s32 splice_fd_in;
@@ -119,6 +124,7 @@ enum {
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
IORING_OP_SPLICE,
+ IORING_OP_PROVIDE_BUFFERS,
/* this goes last, obviously */
IORING_OP_LAST,