diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-28 15:07:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-28 15:07:04 -0700 |
commit | eff5f16bfd87ae48c56751741af41a825d5d4618 (patch) | |
tree | 9b1e58d1038902a754107b35621d428ba24f5165 /io_uring/rsrc.c | |
parent | 6df9d086ffcb6b0521872fef5f9f4dd1907abb9a (diff) | |
parent | 6889ae1b4df1579bcdffef023e2ea9a982565dff (diff) |
Merge tag 'for-6.15/io_uring-reg-vec-20250327' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe:
"Final separate updates for io_uring.
This started out as a series of cleanups improvements and improvements
for registered buffers, but as the last series of the io_uring changes
for 6.15, it also collected a few fixes for the other branches on top:
- Add support for vectored fixed/registered buffers.
Previously only single segments have been supported for commands,
now vectored variants are supported as well. This series includes
networking and file read/write support.
- Small series unifying return codes across multi and single shot.
- Small series cleaning up registerd buffer importing.
- Adding support for vectored registered buffers for uring_cmd.
- Fix for io-wq handling of command reissue.
- Various little fixes and tweaks"
* tag 'for-6.15/io_uring-reg-vec-20250327' of git://git.kernel.dk/linux: (25 commits)
io_uring/net: fix io_req_post_cqe abuse by send bundle
io_uring/net: use REQ_F_IMPORT_BUFFER for send_zc
io_uring: move min_events sanitisation
io_uring: rename "min" arg in io_iopoll_check()
io_uring: open code __io_post_aux_cqe()
io_uring: defer iowq cqe overflow via task_work
io_uring: fix retry handling off iowq
io_uring/net: only import send_zc buffer once
io_uring/cmd: introduce io_uring_cmd_import_fixed_vec
io_uring/cmd: add iovec cache for commands
io_uring/cmd: don't expose entire cmd async data
io_uring: rename the data cmd cache
io_uring: rely on io_prep_reg_vec for iovec placement
io_uring: introduce io_prep_reg_iovec()
io_uring: unify STOP_MULTISHOT with IOU_OK
io_uring: return -EAGAIN to continue multishot
io_uring: cap cached iovec/bvec size
io_uring/net: implement vectored reg bufs for zctx
io_uring/net: convert to struct iou_vec
io_uring/net: pull vec alloc out of msghdr import
...
Diffstat (limited to 'io_uring/rsrc.c')
-rw-r--r-- | io_uring/rsrc.c | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index a59563fbb4ad..3f195e24777e 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1262,3 +1262,166 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) fput(file); return ret; } + +void io_vec_free(struct iou_vec *iv) +{ + if (!iv->iovec) + return; + kfree(iv->iovec); + iv->iovec = NULL; + iv->nr = 0; +} + +int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries) +{ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; + struct iovec *iov; + + iov = kmalloc_array(nr_entries, sizeof(iov[0]), gfp); + if (!iov) + return -ENOMEM; + + io_vec_free(iv); + iv->iovec = iov; + iv->nr = nr_entries; + return 0; +} + +static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, + struct iovec *iovec, unsigned nr_iovs, + struct iou_vec *vec) +{ + unsigned long folio_size = 1 << imu->folio_shift; + unsigned long folio_mask = folio_size - 1; + u64 folio_addr = imu->ubuf & ~folio_mask; + struct bio_vec *res_bvec = vec->bvec; + size_t total_len = 0; + unsigned bvec_idx = 0; + unsigned iov_idx; + + for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) { + size_t iov_len = iovec[iov_idx].iov_len; + u64 buf_addr = (u64)(uintptr_t)iovec[iov_idx].iov_base; + struct bio_vec *src_bvec; + size_t offset; + u64 buf_end; + + if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end))) + return -EFAULT; + if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) + return -EFAULT; + if (unlikely(!iov_len)) + return -EFAULT; + if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) + return -EOVERFLOW; + + /* by using folio address it also accounts for bvec offset */ + offset = buf_addr - folio_addr; + src_bvec = imu->bvec + (offset >> imu->folio_shift); + offset &= folio_mask; + + for (; iov_len; offset = 0, bvec_idx++, src_bvec++) { + size_t seg_size = min_t(size_t, iov_len, + folio_size - offset); + + bvec_set_page(&res_bvec[bvec_idx], + src_bvec->bv_page, seg_size, offset); + iov_len -= seg_size; + } + } + if (total_len > MAX_RW_COUNT) + return -EINVAL; + + iov_iter_bvec(iter, ddir, res_bvec, bvec_idx, total_len); + return 0; +} + +static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs, + struct io_mapped_ubuf *imu) +{ + unsigned shift = imu->folio_shift; + size_t max_segs = 0; + unsigned i; + + for (i = 0; i < nr_iovs; i++) + max_segs += (iov[i].iov_len >> shift) + 2; + return max_segs; +} + +int io_import_reg_vec(int ddir, struct iov_iter *iter, + struct io_kiocb *req, struct iou_vec *vec, + unsigned nr_iovs, unsigned issue_flags) +{ + struct io_rsrc_node *node; + struct io_mapped_ubuf *imu; + unsigned iovec_off; + struct iovec *iov; + unsigned nr_segs; + + node = io_find_buf_node(req, issue_flags); + if (!node) + return -EFAULT; + imu = node->buf; + if (imu->is_kbuf) + return -EOPNOTSUPP; + if (!(imu->dir & (1 << ddir))) + return -EFAULT; + + iovec_off = vec->nr - nr_iovs; + iov = vec->iovec + iovec_off; + nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + + if (sizeof(struct bio_vec) > sizeof(struct iovec)) { + size_t bvec_bytes; + + bvec_bytes = nr_segs * sizeof(struct bio_vec); + nr_segs = (bvec_bytes + sizeof(*iov) - 1) / sizeof(*iov); + nr_segs += nr_iovs; + } + + if (nr_segs > vec->nr) { + struct iou_vec tmp_vec = {}; + int ret; + + ret = io_vec_realloc(&tmp_vec, nr_segs); + if (ret) + return ret; + + iovec_off = tmp_vec.nr - nr_iovs; + memcpy(tmp_vec.iovec + iovec_off, iov, sizeof(*iov) * nr_iovs); + io_vec_free(vec); + + *vec = tmp_vec; + iov = vec->iovec + iovec_off; + req->flags |= REQ_F_NEED_CLEANUP; + } + + return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec); +} + +int io_prep_reg_iovec(struct io_kiocb *req, struct iou_vec *iv, + const struct iovec __user *uvec, size_t uvec_segs) +{ + struct iovec *iov; + int iovec_off, ret; + void *res; + + if (uvec_segs > iv->nr) { + ret = io_vec_realloc(iv, uvec_segs); + if (ret) + return ret; + req->flags |= REQ_F_NEED_CLEANUP; + } + + /* pad iovec to the right */ + iovec_off = iv->nr - uvec_segs; + iov = iv->iovec + iovec_off; + res = iovec_from_user(uvec, uvec_segs, uvec_segs, iov, + io_is_compat(req->ctx)); + if (IS_ERR(res)) + return PTR_ERR(res); + + req->flags |= REQ_F_IMPORT_BUFFER; + return 0; +} |