diff options
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/io_uring.c | 55 | ||||
-rw-r--r-- | io_uring/io_uring.h | 1 | ||||
-rw-r--r-- | io_uring/kbuf.c | 56 | ||||
-rw-r--r-- | io_uring/napi.c | 4 | ||||
-rw-r--r-- | io_uring/net.c | 5 | ||||
-rw-r--r-- | io_uring/rsrc.c | 4 | ||||
-rw-r--r-- | io_uring/rw.c | 7 | ||||
-rw-r--r-- | io_uring/timeout.c | 13 |
8 files changed, 77 insertions, 68 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4e362c8542a7..3ba49c628337 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2444,7 +2444,7 @@ static enum hrtimer_restart io_cqring_min_timer_wakeup(struct hrtimer *timer) goto out_wake; } - iowq->t.function = io_cqring_timer_wakeup; + hrtimer_update_function(&iowq->t, io_cqring_timer_wakeup); hrtimer_set_expires(timer, iowq->timeout); return HRTIMER_RESTART; out_wake: @@ -2479,8 +2479,18 @@ static int io_cqring_schedule_timeout(struct io_wait_queue *iowq, return READ_ONCE(iowq->hit_timeout) ? -ETIME : 0; } +struct ext_arg { + size_t argsz; + struct timespec64 ts; + const sigset_t __user *sig; + ktime_t min_time; + bool ts_set; + bool iowait; +}; + static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, + struct ext_arg *ext_arg, ktime_t start_time) { int ret = 0; @@ -2490,7 +2500,7 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx, * can take into account that the task is waiting for IO - turns out * to be important for low QD IO. */ - if (current_pending_io()) + if (ext_arg->iowait && current_pending_io()) current->in_iowait = 1; if (iowq->timeout != KTIME_MAX || iowq->min_timeout) ret = io_cqring_schedule_timeout(iowq, ctx->clockid, start_time); @@ -2503,6 +2513,7 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx, /* If this returns > 0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, + struct ext_arg *ext_arg, ktime_t start_time) { if (unlikely(READ_ONCE(ctx->check_cq))) @@ -2516,17 +2527,9 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (unlikely(io_should_wake(iowq))) return 0; - return __io_cqring_wait_schedule(ctx, iowq, start_time); + return __io_cqring_wait_schedule(ctx, iowq, ext_arg, start_time); } -struct ext_arg { - size_t argsz; - struct timespec64 ts; - const sigset_t __user *sig; - ktime_t min_time; - bool ts_set; -}; - /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -2606,7 +2609,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, TASK_INTERRUPTIBLE); } - ret = io_cqring_wait_schedule(ctx, &iowq, start_time); + ret = io_cqring_wait_schedule(ctx, &iowq, ext_arg, start_time); __set_current_state(TASK_RUNNING); atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT); @@ -3263,6 +3266,8 @@ static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned flags, const struct io_uring_getevents_arg __user *uarg = argp; struct io_uring_getevents_arg arg; + ext_arg->iowait = !(flags & IORING_ENTER_NO_IOWAIT); + /* * If EXT_ARG isn't set, then we have no timespec and the argp pointer * is just a pointer to the sigset_t. @@ -3340,7 +3345,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG | IORING_ENTER_REGISTERED_RING | IORING_ENTER_ABS_TIMER | - IORING_ENTER_EXT_ARG_REG))) + IORING_ENTER_EXT_ARG_REG | + IORING_ENTER_NO_IOWAIT))) return -EINVAL; /* @@ -3748,7 +3754,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING | IORING_FEAT_RECVSEND_BUNDLE | IORING_FEAT_MIN_TIMEOUT | - IORING_FEAT_RW_ATTR; + IORING_FEAT_RW_ATTR | IORING_FEAT_NO_IOWAIT; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; @@ -3822,29 +3828,36 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) return io_uring_create(entries, &p, params); } -static inline bool io_uring_allowed(void) +static inline int io_uring_allowed(void) { int disabled = READ_ONCE(sysctl_io_uring_disabled); kgid_t io_uring_group; if (disabled == 2) - return false; + return -EPERM; if (disabled == 0 || capable(CAP_SYS_ADMIN)) - return true; + goto allowed_lsm; io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group); if (!gid_valid(io_uring_group)) - return false; + return -EPERM; - return in_group_p(io_uring_group); + if (!in_group_p(io_uring_group)) + return -EPERM; + +allowed_lsm: + return security_uring_allowed(); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, struct io_uring_params __user *, params) { - if (!io_uring_allowed()) - return -EPERM; + int ret; + + ret = io_uring_allowed(); + if (ret) + return ret; return io_uring_setup(entries, params); } diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 2308f39ed915..87f883130286 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -91,7 +91,6 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd, void __io_req_task_work_add(struct io_kiocb *req, unsigned flags); void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, unsigned flags); -bool io_alloc_async_data(struct io_kiocb *req); void io_req_task_queue(struct io_kiocb *req); void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw); void io_req_task_queue_fail(struct io_kiocb *req, int ret); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 3478be6d02ab..098109259671 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -32,6 +32,25 @@ struct io_provide_buf { __u16 bid; }; +static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) +{ + while (len) { + struct io_uring_buf *buf; + u32 this_len; + + buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); + this_len = min_t(int, len, buf->len); + buf->len -= this_len; + if (buf->len) { + buf->addr += this_len; + return false; + } + bl->head++; + len -= this_len; + } + return true; +} + bool io_kbuf_commit(struct io_kiocb *req, struct io_buffer_list *bl, int len, int nr) { @@ -42,20 +61,8 @@ bool io_kbuf_commit(struct io_kiocb *req, if (unlikely(len < 0)) return true; - - if (bl->flags & IOBL_INC) { - struct io_uring_buf *buf; - - buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); - if (WARN_ON_ONCE(len > buf->len)) - len = buf->len; - buf->len -= len; - if (buf->len) { - buf->addr += len; - return false; - } - } - + if (bl->flags & IOBL_INC) + return io_kbuf_inc_commit(bl, len); bl->head += nr; return true; } @@ -226,25 +233,14 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { u32 len = READ_ONCE(buf->len); + size_t needed; if (unlikely(!len)) return -ENOBUFS; - /* - * Limit incremental buffers to 1 segment. No point trying - * to peek ahead and map more than we need, when the buffers - * themselves should be large when setup with - * IOU_PBUF_RING_INC. - */ - if (bl->flags & IOBL_INC) { - nr_avail = 1; - } else { - size_t needed; - - needed = (arg->max_len + len - 1) / len; - needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); - if (nr_avail > needed) - nr_avail = needed; - } + needed = (arg->max_len + len - 1) / len; + needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); + if (nr_avail > needed) + nr_avail = needed; } /* diff --git a/io_uring/napi.c b/io_uring/napi.c index b1ade3fda30f..4a10de03e426 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -44,7 +44,7 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) struct io_napi_entry *e; /* Non-NAPI IDs can be rejected. */ - if (napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi_id)) return -EINVAL; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; @@ -87,7 +87,7 @@ static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id) struct io_napi_entry *e; /* Non-NAPI IDs can be rejected. */ - if (napi_id < MIN_NAPI_ID) + if (!napi_id_valid(napi_id)) return -EINVAL; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; diff --git a/io_uring/net.c b/io_uring/net.c index 616e953ef0ae..8944eb679024 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -157,7 +157,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { req->async_data = NULL; - req->flags &= ~REQ_F_ASYNC_DATA; + req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP); } } @@ -464,7 +464,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static void io_req_msg_cleanup(struct io_kiocb *req, unsigned int issue_flags) { - req->flags &= ~REQ_F_NEED_CLEANUP; io_netmsg_recycle(req, issue_flags); } @@ -1499,6 +1498,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(zc->notif); + zc->notif = NULL; io_req_msg_cleanup(req, 0); } io_req_set_res(req, ret, IORING_CQE_F_MORE); @@ -1572,6 +1572,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(sr->notif); + sr->notif = NULL; io_req_msg_cleanup(req, 0); } io_req_set_res(req, ret, IORING_CQE_F_MORE); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 607b09bd8374..3f195e24777e 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1024,7 +1024,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, * and advance us to the beginning. */ offset = buf_addr - imu->ubuf; - iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, len); + iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); if (offset) { /* @@ -1051,6 +1051,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, * to use the slow iter advance. */ if (offset < bvec->bv_len) { + iter->count -= offset; iter->iov_offset = offset; } else if (imu->is_kbuf) { iov_iter_advance(iter, offset); @@ -1063,6 +1064,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, iter->bvec += seg_skip; iter->nr_segs -= seg_skip; + iter->count -= bvec->bv_len + offset; iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1); } } diff --git a/io_uring/rw.c b/io_uring/rw.c index 246b22225919..039e063f7091 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -599,11 +599,10 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) if (kiocb->ki_flags & IOCB_WRITE) io_req_end_write(req); if (unlikely(res != req->cqe.res)) { - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if (res == -EAGAIN && io_rw_should_reissue(req)) req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; - return; - } - req->cqe.res = res; + else + req->cqe.res = res; } /* order with io_iopoll_complete() checking ->iopoll_completed */ diff --git a/io_uring/timeout.c b/io_uring/timeout.c index fec6ec7beb62..2a107665230b 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -407,8 +407,7 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, io = req->async_data; if (hrtimer_try_to_cancel(&io->timer) == -1) return -EALREADY; - hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); - io->timer.function = io_link_timeout_fn; + hrtimer_setup(&io->timer, io_link_timeout_fn, io_timeout_get_clock(io), mode); hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); return 0; } @@ -430,8 +429,7 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, data->ts = *ts; list_add_tail(&timeout->list, &ctx->timeout_list); - hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); - data->timer.function = io_timeout_fn; + hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), mode); hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode); return 0; } @@ -557,7 +555,6 @@ static int __io_timeout_prep(struct io_kiocb *req, return -EINVAL; data->mode = io_translate_timeout_mode(flags); - hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); if (is_timeout_link) { struct io_submit_link *link = &req->ctx->submit_state.link; @@ -568,6 +565,10 @@ static int __io_timeout_prep(struct io_kiocb *req, return -EINVAL; timeout->head = link->last; link->last->flags |= REQ_F_ARM_LTIMEOUT; + hrtimer_setup(&data->timer, io_link_timeout_fn, io_timeout_get_clock(data), + data->mode); + } else { + hrtimer_setup(&data->timer, io_timeout_fn, io_timeout_get_clock(data), data->mode); } return 0; } @@ -627,7 +628,6 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags) } add: list_add(&timeout->list, entry); - data->timer.function = io_timeout_fn; hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); raw_spin_unlock_irq(&ctx->timeout_lock); return IOU_ISSUE_SKIP_COMPLETE; @@ -646,7 +646,6 @@ void io_queue_linked_timeout(struct io_kiocb *req) if (timeout->head) { struct io_timeout_data *data = req->async_data; - data->timer.function = io_link_timeout_fn; hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); list_add_tail(&timeout->list, &ctx->ltimeout_list); |