summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-28 14:55:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-28 14:55:32 -0700
commit6df9d086ffcb6b0521872fef5f9f4dd1907abb9a (patch)
treebddb2a97f000f3652a946dff8a2153ffcf1737cd /io_uring
parentca0b04ba0b35d48e1473a280c2e8905e7f80e906 (diff)
parent19f7e942732766aec8a51d217ab5fb4a7fe3bb0d (diff)
Merge tag 'for-6.15/io_uring-epoll-wait-20250325' of git://git.kernel.dk/linux
Pull io_uring epoll support from Jens Axboe: "This adds support for reading epoll events via io_uring. While this may seem counter-intuitive (and/or productive), the reasoning here is that quite a few existing epoll event loops can easily do a partial conversion to a completion based model, but are still stuck with one (or few) event types that remain readiness based. For that case, they then need to add the io_uring fd to the epoll context, and continue to rely on epoll_wait(2) for waiting on events. This misses out on the finer grained waiting that io_uring can do, to reduce context switches and wait for multiple events in one batch reliably. With adding support for reaping epoll events via io_uring, the whole legacy readiness based event types can still be reaped via epoll, with the overall waiting in the loop be driven by io_uring" * tag 'for-6.15/io_uring-epoll-wait-20250325' of git://git.kernel.dk/linux: io_uring/epoll: add support for IORING_OP_EPOLL_WAIT io_uring/epoll: remove CONFIG_EPOLL guards
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/Makefile9
-rw-r--r--io_uring/epoll.c35
-rw-r--r--io_uring/epoll.h2
-rw-r--r--io_uring/opdef.c14
4 files changed, 54 insertions, 6 deletions
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 98e48339d84d..3e28a741ca15 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -11,10 +11,11 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
eventfd.o uring_cmd.o openclose.o \
sqpoll.o xattr.o nop.o fs.o splice.o \
sync.o msg_ring.o advise.o openclose.o \
- epoll.o statx.o timeout.o fdinfo.o \
- cancel.o waitid.o register.o \
- truncate.o memmap.o alloc_cache.o
+ statx.o timeout.o fdinfo.o cancel.o \
+ waitid.o register.o truncate.o \
+ memmap.o alloc_cache.o
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
-obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
+obj-$(CONFIG_EPOLL) += epoll.o
+obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 89bff2068a19..6d2c48ba1923 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -12,7 +12,6 @@
#include "io_uring.h"
#include "epoll.h"
-#if defined(CONFIG_EPOLL)
struct io_epoll {
struct file *file;
int epfd;
@@ -21,6 +20,12 @@ struct io_epoll {
struct epoll_event event;
};
+struct io_epoll_wait {
+ struct file *file;
+ int maxevents;
+ struct epoll_event __user *events;
+};
+
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
@@ -58,4 +63,30 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
io_req_set_res(req, ret, 0);
return IOU_OK;
}
-#endif
+
+int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
+
+ if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ return -EINVAL;
+
+ iew->maxevents = READ_ONCE(sqe->len);
+ iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ return 0;
+}
+
+int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
+ int ret;
+
+ ret = epoll_sendevents(req->file, iew->events, iew->maxevents);
+ if (ret == 0)
+ return -EAGAIN;
+ if (ret < 0)
+ req_set_fail(req);
+
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
diff --git a/io_uring/epoll.h b/io_uring/epoll.h
index 870cce11ba98..4111997c360b 100644
--- a/io_uring/epoll.h
+++ b/io_uring/epoll.h
@@ -3,4 +3,6 @@
#if defined(CONFIG_EPOLL)
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags);
+int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags);
#endif
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 9511262c513e..db77df513d55 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -529,6 +529,17 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_eopnotsupp_prep,
#endif
},
+ [IORING_OP_EPOLL_WAIT] = {
+ .needs_file = 1,
+ .audit_skip = 1,
+ .pollin = 1,
+#if defined(CONFIG_EPOLL)
+ .prep = io_epoll_wait_prep,
+ .issue = io_epoll_wait,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -761,6 +772,9 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_RECV_ZC] = {
.name = "RECV_ZC",
},
+ [IORING_OP_EPOLL_WAIT] = {
+ .name = "EPOLL_WAIT",
+ },
};
const char *io_uring_get_opcode(u8 opcode)