summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/cmd_net.c2
-rw-r--r--io_uring/io_uring.c2
-rw-r--r--io_uring/memmap.c2
-rw-r--r--io_uring/mock_file.c43
-rw-r--r--io_uring/net.c6
-rw-r--r--io_uring/query.c2
-rw-r--r--io_uring/register.c3
-rw-r--r--io_uring/rsrc.c27
-rw-r--r--io_uring/rw.c19
-rw-r--r--io_uring/zcrx.c68
-rw-r--r--io_uring/zcrx.h7
11 files changed, 48 insertions, 133 deletions
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c
index 27a09aa4c9d0..3b75931bd569 100644
--- a/io_uring/cmd_net.c
+++ b/io_uring/cmd_net.c
@@ -127,7 +127,7 @@ static int io_uring_cmd_timestamp(struct socket *sock,
if (!unlikely(skb_queue_empty(&list))) {
scoped_guard(spinlock_irqsave, &q->lock)
- skb_queue_splice(q, &list);
+ skb_queue_splice(&list, q);
}
return -EAGAIN;
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 296667ba712c..02339b74ba8d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -634,6 +634,8 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying)
is_cqe32 = true;
cqe_size <<= 1;
}
+ if (ctx->flags & IORING_SETUP_CQE32)
+ is_cqe32 = false;
if (!dying) {
if (!io_get_cqe_overflow(ctx, &cqe, true, is_cqe32))
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 2e99dffddfc5..add03ca75cb9 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -135,7 +135,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx,
struct io_mapped_region *mr,
struct io_uring_region_desc *reg)
{
- unsigned long size = mr->nr_pages << PAGE_SHIFT;
+ unsigned long size = (size_t) mr->nr_pages << PAGE_SHIFT;
struct page **pages;
int nr_pages;
diff --git a/io_uring/mock_file.c b/io_uring/mock_file.c
index 45d3735b2708..3ffac8f72974 100644
--- a/io_uring/mock_file.c
+++ b/io_uring/mock_file.c
@@ -211,10 +211,9 @@ static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flag
const struct file_operations *fops = &io_mock_fops;
const struct io_uring_sqe *sqe = cmd->sqe;
struct io_uring_mock_create mc, __user *uarg;
- struct io_mock_file *mf = NULL;
- struct file *file = NULL;
+ struct file *file;
+ struct io_mock_file *mf __free(kfree) = NULL;
size_t uarg_size;
- int fd = -1, ret;
/*
* It's a testing only driver that allows exercising edge cases
@@ -246,10 +245,6 @@ static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flag
if (!mf)
return -ENOMEM;
- ret = fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
- if (fd < 0)
- goto fail;
-
init_waitqueue_head(&mf->poll_wq);
mf->size = mc.file_size;
mf->rw_delay_ns = mc.rw_delay_ns;
@@ -258,33 +253,25 @@ static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flag
mf->pollable = true;
}
- file = anon_inode_create_getfile("[io_uring_mock]", fops,
- mf, O_RDWR | O_CLOEXEC, NULL);
- if (IS_ERR(file)) {
- ret = PTR_ERR(file);
- goto fail;
- }
+ FD_PREPARE(fdf, O_RDWR | O_CLOEXEC,
+ anon_inode_create_getfile("[io_uring_mock]", fops, mf,
+ O_RDWR | O_CLOEXEC, NULL));
+ if (fdf.err)
+ return fdf.err;
- file->f_mode |= FMODE_READ | FMODE_CAN_READ |
- FMODE_WRITE | FMODE_CAN_WRITE |
- FMODE_LSEEK;
+ retain_and_null_ptr(mf);
+ file = fd_prepare_file(fdf);
+ file->f_mode |= FMODE_READ | FMODE_CAN_READ | FMODE_WRITE |
+ FMODE_CAN_WRITE | FMODE_LSEEK;
if (mc.flags & IORING_MOCK_CREATE_F_SUPPORT_NOWAIT)
file->f_mode |= FMODE_NOWAIT;
- mc.out_fd = fd;
- if (copy_to_user(uarg, &mc, uarg_size)) {
- fput(file);
- ret = -EFAULT;
- goto fail;
- }
+ mc.out_fd = fd_prepare_fd(fdf);
+ if (copy_to_user(uarg, &mc, uarg_size))
+ return -EFAULT;
- fd_install(fd, file);
+ fd_publish(fdf);
return 0;
-fail:
- if (fd >= 0)
- put_unused_fd(fd);
- kfree(mf);
- return ret;
}
static int io_probe_mock(struct io_uring_cmd *cmd)
diff --git a/io_uring/net.c b/io_uring/net.c
index a95cc9ca2a4d..43d77f95db51 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1532,8 +1532,10 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
int ret;
- ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req,
- &kmsg->vec, uvec_segs, issue_flags);
+ sr->notif->buf_index = req->buf_index;
+ ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter,
+ sr->notif, &kmsg->vec, uvec_segs,
+ issue_flags);
if (unlikely(ret))
return ret;
req->flags &= ~REQ_F_IMPORT_BUFFER;
diff --git a/io_uring/query.c b/io_uring/query.c
index 645301bd2c82..cf02893ba911 100644
--- a/io_uring/query.c
+++ b/io_uring/query.c
@@ -20,6 +20,8 @@ static ssize_t io_query_ops(void *data)
e->ring_setup_flags = IORING_SETUP_FLAGS;
e->enter_flags = IORING_ENTER_FLAGS;
e->sqe_flags = SQE_VALID_FLAGS;
+ e->nr_query_opcodes = __IO_URING_QUERY_MAX;
+ e->__pad = 0;
return sizeof(*e);
}
diff --git a/io_uring/register.c b/io_uring/register.c
index 2e4717f1357c..d189b266b8cc 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -827,9 +827,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
case IORING_REGISTER_QUERY:
ret = io_query(ctx, arg, nr_args);
break;
- case IORING_REGISTER_ZCRX_REFILL:
- ret = io_zcrx_return_bufs(ctx, arg, nr_args);
- break;
default:
ret = -EINVAL;
break;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d787c16dc1c3..0010c4992490 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -943,8 +943,8 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
struct req_iterator rq_iter;
struct io_mapped_ubuf *imu;
struct io_rsrc_node *node;
- struct bio_vec bv, *bvec;
- u16 nr_bvecs;
+ struct bio_vec bv;
+ unsigned int nr_bvecs = 0;
int ret = 0;
io_ring_submit_lock(ctx, issue_flags);
@@ -965,8 +965,11 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
goto unlock;
}
- nr_bvecs = blk_rq_nr_phys_segments(rq);
- imu = io_alloc_imu(ctx, nr_bvecs);
+ /*
+ * blk_rq_nr_phys_segments() may overestimate the number of bvecs
+ * but avoids needing to iterate over the bvecs
+ */
+ imu = io_alloc_imu(ctx, blk_rq_nr_phys_segments(rq));
if (!imu) {
kfree(node);
ret = -ENOMEM;
@@ -977,16 +980,15 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
imu->len = blk_rq_bytes(rq);
imu->acct_pages = 0;
imu->folio_shift = PAGE_SHIFT;
- imu->nr_bvecs = nr_bvecs;
refcount_set(&imu->refs, 1);
imu->release = release;
imu->priv = rq;
imu->is_kbuf = true;
imu->dir = 1 << rq_data_dir(rq);
- bvec = imu->bvec;
rq_for_each_bvec(bv, rq, rq_iter)
- *bvec++ = bv;
+ imu->bvec[nr_bvecs++] = bv;
+ imu->nr_bvecs = nr_bvecs;
node->buf = imu;
data->nodes[index] = node;
@@ -1403,8 +1405,11 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,
size_t max_segs = 0;
unsigned i;
- for (i = 0; i < nr_iovs; i++)
+ for (i = 0; i < nr_iovs; i++) {
max_segs += (iov[i].iov_len >> shift) + 2;
+ if (max_segs > INT_MAX)
+ return -EOVERFLOW;
+ }
return max_segs;
}
@@ -1510,7 +1515,11 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
if (unlikely(ret))
return ret;
} else {
- nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
+ int ret = io_estimate_bvec_size(iov, nr_iovs, imu);
+
+ if (ret < 0)
+ return ret;
+ nr_segs = ret;
}
if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 5b2241a5813c..6310a3d08409 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -277,7 +277,6 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
} else {
rw->kiocb.ki_ioprio = get_current_ioprio();
}
- rw->kiocb.dio_complete = NULL;
rw->kiocb.ki_flags = 0;
rw->kiocb.ki_write_stream = READ_ONCE(sqe->write_stream);
@@ -463,7 +462,10 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
void io_readv_writev_cleanup(struct io_kiocb *req)
{
+ struct io_async_rw *rw = req->async_data;
+
lockdep_assert_held(&req->ctx->uring_lock);
+ io_vec_free(&rw->vec);
io_rw_recycle(req, 0);
}
@@ -566,15 +568,6 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw)
{
- struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
- struct kiocb *kiocb = &rw->kiocb;
-
- if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
- long res = kiocb->dio_complete(rw->kiocb.private);
-
- io_req_set_res(req, io_fixup_rw_res(req, res), 0);
- }
-
io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
@@ -589,10 +582,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
struct io_kiocb *req = cmd_to_io_kiocb(rw);
- if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
- __io_complete_rw_common(req, res);
- io_req_set_res(req, io_fixup_rw_res(req, res), 0);
- }
+ __io_complete_rw_common(req, res);
+ io_req_set_res(req, io_fixup_rw_res(req, res), 0);
req->io_task_work.func = io_req_rw_complete;
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
}
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index a816f5902091..b1b723222cdb 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -928,74 +928,6 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
.uninstall = io_pp_uninstall,
};
-#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16)
-#define IO_ZCRX_SYS_REFILL_BATCH 32
-
-static void io_return_buffers(struct io_zcrx_ifq *ifq,
- struct io_uring_zcrx_rqe *rqes, unsigned nr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- struct net_iov *niov;
- netmem_ref netmem;
-
- if (!io_parse_rqe(&rqes[i], ifq, &niov))
- continue;
-
- scoped_guard(spinlock_bh, &ifq->rq_lock) {
- if (!io_zcrx_put_niov_uref(niov))
- continue;
- }
-
- netmem = net_iov_to_netmem(niov);
- if (!page_pool_unref_and_test(netmem))
- continue;
- io_zcrx_return_niov(niov);
- }
-}
-
-int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
- void __user *arg, unsigned nr_arg)
-{
- struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH];
- struct io_uring_zcrx_rqe __user *user_rqes;
- struct io_uring_zcrx_sync_refill zr;
- struct io_zcrx_ifq *ifq;
- unsigned nr, i;
-
- if (nr_arg)
- return -EINVAL;
- if (copy_from_user(&zr, arg, sizeof(zr)))
- return -EFAULT;
- if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS)
- return -EINVAL;
- if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv)))
- return -EINVAL;
-
- ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id);
- if (!ifq)
- return -EINVAL;
- nr = zr.nr_entries;
- user_rqes = u64_to_user_ptr(zr.rqes);
-
- for (i = 0; i < nr;) {
- unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH);
- size_t size = batch * sizeof(rqes[0]);
-
- if (copy_from_user(rqes, user_rqes + i, size))
- return i ? i : -EFAULT;
- io_return_buffers(ifq, rqes, batch);
-
- i += batch;
-
- if (fatal_signal_pending(current))
- return i;
- cond_resched();
- }
- return nr;
-}
-
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
struct io_zcrx_ifq *ifq, int off, int len)
{
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 33ef61503092..a48871b5adad 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -63,8 +63,6 @@ struct io_zcrx_ifq {
};
#if defined(CONFIG_IO_URING_ZCRX)
-int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
- void __user *arg, unsigned nr_arg);
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg);
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
@@ -97,11 +95,6 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct
{
return NULL;
}
-static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
- void __user *arg, unsigned nr_arg)
-{
- return -EOPNOTSUPP;
-}
#endif
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);