diff options
Diffstat (limited to 'io_uring')
| -rw-r--r-- | io_uring/cmd_net.c | 2 | ||||
| -rw-r--r-- | io_uring/io_uring.c | 2 | ||||
| -rw-r--r-- | io_uring/memmap.c | 2 | ||||
| -rw-r--r-- | io_uring/mock_file.c | 43 | ||||
| -rw-r--r-- | io_uring/net.c | 6 | ||||
| -rw-r--r-- | io_uring/query.c | 2 | ||||
| -rw-r--r-- | io_uring/register.c | 3 | ||||
| -rw-r--r-- | io_uring/rsrc.c | 27 | ||||
| -rw-r--r-- | io_uring/rw.c | 19 | ||||
| -rw-r--r-- | io_uring/zcrx.c | 68 | ||||
| -rw-r--r-- | io_uring/zcrx.h | 7 |
11 files changed, 48 insertions, 133 deletions
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c index 27a09aa4c9d0..3b75931bd569 100644 --- a/io_uring/cmd_net.c +++ b/io_uring/cmd_net.c @@ -127,7 +127,7 @@ static int io_uring_cmd_timestamp(struct socket *sock, if (!unlikely(skb_queue_empty(&list))) { scoped_guard(spinlock_irqsave, &q->lock) - skb_queue_splice(q, &list); + skb_queue_splice(&list, q); } return -EAGAIN; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 296667ba712c..02339b74ba8d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -634,6 +634,8 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying) is_cqe32 = true; cqe_size <<= 1; } + if (ctx->flags & IORING_SETUP_CQE32) + is_cqe32 = false; if (!dying) { if (!io_get_cqe_overflow(ctx, &cqe, true, is_cqe32)) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 2e99dffddfc5..add03ca75cb9 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -135,7 +135,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg) { - unsigned long size = mr->nr_pages << PAGE_SHIFT; + unsigned long size = (size_t) mr->nr_pages << PAGE_SHIFT; struct page **pages; int nr_pages; diff --git a/io_uring/mock_file.c b/io_uring/mock_file.c index 45d3735b2708..3ffac8f72974 100644 --- a/io_uring/mock_file.c +++ b/io_uring/mock_file.c @@ -211,10 +211,9 @@ static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flag const struct file_operations *fops = &io_mock_fops; const struct io_uring_sqe *sqe = cmd->sqe; struct io_uring_mock_create mc, __user *uarg; - struct io_mock_file *mf = NULL; - struct file *file = NULL; + struct file *file; + struct io_mock_file *mf __free(kfree) = NULL; size_t uarg_size; - int fd = -1, ret; /* * It's a testing only driver that allows exercising edge cases @@ -246,10 +245,6 @@ static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flag if (!mf) return -ENOMEM; - ret = fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (fd < 0) - goto fail; - init_waitqueue_head(&mf->poll_wq); mf->size = mc.file_size; mf->rw_delay_ns = mc.rw_delay_ns; @@ -258,33 +253,25 @@ static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flag mf->pollable = true; } - file = anon_inode_create_getfile("[io_uring_mock]", fops, - mf, O_RDWR | O_CLOEXEC, NULL); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto fail; - } + FD_PREPARE(fdf, O_RDWR | O_CLOEXEC, + anon_inode_create_getfile("[io_uring_mock]", fops, mf, + O_RDWR | O_CLOEXEC, NULL)); + if (fdf.err) + return fdf.err; - file->f_mode |= FMODE_READ | FMODE_CAN_READ | - FMODE_WRITE | FMODE_CAN_WRITE | - FMODE_LSEEK; + retain_and_null_ptr(mf); + file = fd_prepare_file(fdf); + file->f_mode |= FMODE_READ | FMODE_CAN_READ | FMODE_WRITE | + FMODE_CAN_WRITE | FMODE_LSEEK; if (mc.flags & IORING_MOCK_CREATE_F_SUPPORT_NOWAIT) file->f_mode |= FMODE_NOWAIT; - mc.out_fd = fd; - if (copy_to_user(uarg, &mc, uarg_size)) { - fput(file); - ret = -EFAULT; - goto fail; - } + mc.out_fd = fd_prepare_fd(fdf); + if (copy_to_user(uarg, &mc, uarg_size)) + return -EFAULT; - fd_install(fd, file); + fd_publish(fdf); return 0; -fail: - if (fd >= 0) - put_unused_fd(fd); - kfree(mf); - return ret; } static int io_probe_mock(struct io_uring_cmd *cmd) diff --git a/io_uring/net.c b/io_uring/net.c index a95cc9ca2a4d..43d77f95db51 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1532,8 +1532,10 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; int ret; - ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req, - &kmsg->vec, uvec_segs, issue_flags); + sr->notif->buf_index = req->buf_index; + ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, + sr->notif, &kmsg->vec, uvec_segs, + issue_flags); if (unlikely(ret)) return ret; req->flags &= ~REQ_F_IMPORT_BUFFER; diff --git a/io_uring/query.c b/io_uring/query.c index 645301bd2c82..cf02893ba911 100644 --- a/io_uring/query.c +++ b/io_uring/query.c @@ -20,6 +20,8 @@ static ssize_t io_query_ops(void *data) e->ring_setup_flags = IORING_SETUP_FLAGS; e->enter_flags = IORING_ENTER_FLAGS; e->sqe_flags = SQE_VALID_FLAGS; + e->nr_query_opcodes = __IO_URING_QUERY_MAX; + e->__pad = 0; return sizeof(*e); } diff --git a/io_uring/register.c b/io_uring/register.c index 2e4717f1357c..d189b266b8cc 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -827,9 +827,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, case IORING_REGISTER_QUERY: ret = io_query(ctx, arg, nr_args); break; - case IORING_REGISTER_ZCRX_REFILL: - ret = io_zcrx_return_bufs(ctx, arg, nr_args); - break; default: ret = -EINVAL; break; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d787c16dc1c3..0010c4992490 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -943,8 +943,8 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, struct req_iterator rq_iter; struct io_mapped_ubuf *imu; struct io_rsrc_node *node; - struct bio_vec bv, *bvec; - u16 nr_bvecs; + struct bio_vec bv; + unsigned int nr_bvecs = 0; int ret = 0; io_ring_submit_lock(ctx, issue_flags); @@ -965,8 +965,11 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, goto unlock; } - nr_bvecs = blk_rq_nr_phys_segments(rq); - imu = io_alloc_imu(ctx, nr_bvecs); + /* + * blk_rq_nr_phys_segments() may overestimate the number of bvecs + * but avoids needing to iterate over the bvecs + */ + imu = io_alloc_imu(ctx, blk_rq_nr_phys_segments(rq)); if (!imu) { kfree(node); ret = -ENOMEM; @@ -977,16 +980,15 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, imu->len = blk_rq_bytes(rq); imu->acct_pages = 0; imu->folio_shift = PAGE_SHIFT; - imu->nr_bvecs = nr_bvecs; refcount_set(&imu->refs, 1); imu->release = release; imu->priv = rq; imu->is_kbuf = true; imu->dir = 1 << rq_data_dir(rq); - bvec = imu->bvec; rq_for_each_bvec(bv, rq, rq_iter) - *bvec++ = bv; + imu->bvec[nr_bvecs++] = bv; + imu->nr_bvecs = nr_bvecs; node->buf = imu; data->nodes[index] = node; @@ -1403,8 +1405,11 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs, size_t max_segs = 0; unsigned i; - for (i = 0; i < nr_iovs; i++) + for (i = 0; i < nr_iovs; i++) { max_segs += (iov[i].iov_len >> shift) + 2; + if (max_segs > INT_MAX) + return -EOVERFLOW; + } return max_segs; } @@ -1510,7 +1515,11 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, if (unlikely(ret)) return ret; } else { - nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + int ret = io_estimate_bvec_size(iov, nr_iovs, imu); + + if (ret < 0) + return ret; + nr_segs = ret; } if (sizeof(struct bio_vec) > sizeof(struct iovec)) { diff --git a/io_uring/rw.c b/io_uring/rw.c index 5b2241a5813c..6310a3d08409 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -277,7 +277,6 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, } else { rw->kiocb.ki_ioprio = get_current_ioprio(); } - rw->kiocb.dio_complete = NULL; rw->kiocb.ki_flags = 0; rw->kiocb.ki_write_stream = READ_ONCE(sqe->write_stream); @@ -463,7 +462,10 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) void io_readv_writev_cleanup(struct io_kiocb *req) { + struct io_async_rw *rw = req->async_data; + lockdep_assert_held(&req->ctx->uring_lock); + io_vec_free(&rw->vec); io_rw_recycle(req, 0); } @@ -566,15 +568,6 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res) void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw) { - struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); - struct kiocb *kiocb = &rw->kiocb; - - if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) { - long res = kiocb->dio_complete(rw->kiocb.private); - - io_req_set_res(req, io_fixup_rw_res(req, res), 0); - } - io_req_io_end(req); if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) @@ -589,10 +582,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res) struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb); struct io_kiocb *req = cmd_to_io_kiocb(rw); - if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) { - __io_complete_rw_common(req, res); - io_req_set_res(req, io_fixup_rw_res(req, res), 0); - } + __io_complete_rw_common(req, res); + io_req_set_res(req, io_fixup_rw_res(req, res), 0); req->io_task_work.func = io_req_rw_complete; __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE); } diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index a816f5902091..b1b723222cdb 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -928,74 +928,6 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = { .uninstall = io_pp_uninstall, }; -#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16) -#define IO_ZCRX_SYS_REFILL_BATCH 32 - -static void io_return_buffers(struct io_zcrx_ifq *ifq, - struct io_uring_zcrx_rqe *rqes, unsigned nr) -{ - int i; - - for (i = 0; i < nr; i++) { - struct net_iov *niov; - netmem_ref netmem; - - if (!io_parse_rqe(&rqes[i], ifq, &niov)) - continue; - - scoped_guard(spinlock_bh, &ifq->rq_lock) { - if (!io_zcrx_put_niov_uref(niov)) - continue; - } - - netmem = net_iov_to_netmem(niov); - if (!page_pool_unref_and_test(netmem)) - continue; - io_zcrx_return_niov(niov); - } -} - -int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg) -{ - struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH]; - struct io_uring_zcrx_rqe __user *user_rqes; - struct io_uring_zcrx_sync_refill zr; - struct io_zcrx_ifq *ifq; - unsigned nr, i; - - if (nr_arg) - return -EINVAL; - if (copy_from_user(&zr, arg, sizeof(zr))) - return -EFAULT; - if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS) - return -EINVAL; - if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv))) - return -EINVAL; - - ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id); - if (!ifq) - return -EINVAL; - nr = zr.nr_entries; - user_rqes = u64_to_user_ptr(zr.rqes); - - for (i = 0; i < nr;) { - unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH); - size_t size = batch * sizeof(rqes[0]); - - if (copy_from_user(rqes, user_rqes + i, size)) - return i ? i : -EFAULT; - io_return_buffers(ifq, rqes, batch); - - i += batch; - - if (fatal_signal_pending(current)) - return i; - cond_resched(); - } - return nr; -} - static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, struct io_zcrx_ifq *ifq, int off, int len) { diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 33ef61503092..a48871b5adad 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -63,8 +63,6 @@ struct io_zcrx_ifq { }; #if defined(CONFIG_IO_URING_ZCRX) -int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg); int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg); void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); @@ -97,11 +95,6 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct { return NULL; } -static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg) -{ - return -EOPNOTSUPP; -} #endif int io_recvzc(struct io_kiocb *req, unsigned int issue_flags); |
