summaryrefslogtreecommitdiff
path: root/drivers/block/ublk_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/ublk_drv.c')
-rw-r--r--drivers/block/ublk_drv.c571
1 files changed, 413 insertions, 158 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index f9032076bc06..6f51072776f1 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -50,6 +50,8 @@
/* private ioctl command mirror */
#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
+#define UBLK_CMD_UPDATE_SIZE _IOC_NR(UBLK_U_CMD_UPDATE_SIZE)
+#define UBLK_CMD_QUIESCE_DEV _IOC_NR(UBLK_U_CMD_QUIESCE_DEV)
#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
@@ -64,7 +66,10 @@
| UBLK_F_CMD_IOCTL_ENCODE \
| UBLK_F_USER_COPY \
| UBLK_F_ZONED \
- | UBLK_F_USER_RECOVERY_FAIL_IO)
+ | UBLK_F_USER_RECOVERY_FAIL_IO \
+ | UBLK_F_UPDATE_SIZE \
+ | UBLK_F_AUTO_BUF_REG \
+ | UBLK_F_QUIESCE)
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
@@ -77,7 +82,11 @@
UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)
struct ublk_rq_data {
- struct kref ref;
+ refcount_t ref;
+
+ /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */
+ u16 buf_index;
+ void *buf_ctx_handle;
};
struct ublk_uring_cmd_pdu {
@@ -99,6 +108,9 @@ struct ublk_uring_cmd_pdu {
* setup in ublk uring_cmd handler
*/
struct ublk_queue *ubq;
+
+ struct ublk_auto_buf_reg buf;
+
u16 tag;
};
@@ -131,6 +143,14 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+/*
+ * request buffer is registered automatically, so we have to unregister it
+ * before completing this request.
+ *
+ * io_uring will unregister buffer automatically for us during exiting.
+ */
+#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10
+
/* atomic RW with ubq->cancel_lock */
#define UBLK_IO_FLAG_CANCELED 0x80000000
@@ -140,7 +160,12 @@ struct ublk_io {
unsigned int flags;
int res;
- struct io_uring_cmd *cmd;
+ union {
+ /* valid if UBLK_IO_FLAG_ACTIVE is set */
+ struct io_uring_cmd *cmd;
+ /* valid if UBLK_IO_FLAG_OWNED_BY_SRV is set */
+ struct request *req;
+ };
};
struct ublk_queue {
@@ -198,13 +223,19 @@ struct ublk_params_header {
__u32 types;
};
+static void ublk_io_release(void *priv);
static void ublk_stop_dev_unlocked(struct ublk_device *ub);
static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
const struct ublk_queue *ubq, int tag, size_t offset);
static inline unsigned int ublk_req_build_flags(struct request *req);
-static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
- int tag);
+
+static inline struct ublksrv_io_desc *
+ublk_get_iod(const struct ublk_queue *ubq, unsigned tag)
+{
+ return &ubq->io_cmd_buf[tag];
+}
+
static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
{
return ub->dev_info.flags & UBLK_F_ZONED;
@@ -356,8 +387,7 @@ static int ublk_report_zones(struct gendisk *disk, sector_t sector,
if (ret)
goto free_req;
- ret = blk_rq_map_kern(disk->queue, req, buffer, buffer_length,
- GFP_KERNEL);
+ ret = blk_rq_map_kern(req, buffer, buffer_length, GFP_KERNEL);
if (ret)
goto erase_desc;
@@ -477,7 +507,6 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq,
#endif
static inline void __ublk_complete_rq(struct request *req);
-static void ublk_complete_rq(struct kref *ref);
static dev_t ublk_chr_devt;
static const struct class ublk_chr_class = {
@@ -609,6 +638,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY;
}
+static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
+{
+ return ubq->flags & UBLK_F_AUTO_BUF_REG;
+}
+
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_USER_COPY;
@@ -616,7 +650,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
static inline bool ublk_need_map_io(const struct ublk_queue *ubq)
{
- return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq);
+ return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) &&
+ !ublk_support_auto_buf_reg(ubq);
}
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
@@ -627,8 +662,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
*
* for zero copy, request buffer need to be registered to io_uring
* buffer table, so reference is needed
+ *
+ * For auto buffer register, ublk server still may issue
+ * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up,
+ * so reference is required too.
*/
- return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq);
+ return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) ||
+ ublk_support_auto_buf_reg(ubq);
}
static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
@@ -637,7 +677,7 @@ static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- kref_init(&data->ref);
+ refcount_set(&data->ref, 1);
}
}
@@ -647,7 +687,7 @@ static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- return kref_get_unless_zero(&data->ref);
+ return refcount_inc_not_zero(&data->ref);
}
return true;
@@ -659,7 +699,8 @@ static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
if (ublk_need_req_ref(ubq)) {
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
- kref_put(&data->ref, ublk_complete_rq);
+ if (refcount_dec_and_test(&data->ref))
+ __ublk_complete_rq(req);
} else {
__ublk_complete_rq(req);
}
@@ -695,12 +736,6 @@ static inline bool ublk_rq_has_data(const struct request *rq)
return bio_has_data(rq->bio);
}
-static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
- int tag)
-{
- return &ubq->io_cmd_buf[tag];
-}
-
static inline struct ublksrv_io_desc *
ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)
{
@@ -1117,18 +1152,12 @@ exit:
blk_mq_end_request(req, res);
}
-static void ublk_complete_rq(struct kref *ref)
+static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req,
+ int res, unsigned issue_flags)
{
- struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
- ref);
- struct request *req = blk_mq_rq_from_pdu(data);
+ /* read cmd first because req will overwrite it */
+ struct io_uring_cmd *cmd = io->cmd;
- __ublk_complete_rq(req);
-}
-
-static void ubq_complete_io_cmd(struct ublk_io *io, int res,
- unsigned issue_flags)
-{
/* mark this cmd owned by ublksrv */
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
@@ -1138,8 +1167,10 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res,
*/
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+ io->req = req;
+
/* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, res, 0, issue_flags);
+ io_uring_cmd_done(cmd, res, 0, issue_flags);
}
#define UBLK_REQUEUE_DELAY_MS 3
@@ -1154,16 +1185,91 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
+static void ublk_auto_buf_reg_fallback(struct request *req)
+{
+ const struct ublk_queue *ubq = req->mq_hctx->driver_data;
+ struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag);
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
+ refcount_set(&data->ref, 1);
+}
+
+static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
+ unsigned int issue_flags)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+ int ret;
+
+ ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
+ pdu->buf.index, issue_flags);
+ if (ret) {
+ if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) {
+ ublk_auto_buf_reg_fallback(req);
+ return true;
+ }
+ blk_mq_end_request(req, BLK_STS_IOERR);
+ return false;
+ }
+ /* one extra reference is dropped by ublk_io_release */
+ refcount_set(&data->ref, 2);
+
+ data->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd);
+ /* store buffer index in request payload */
+ data->buf_index = pdu->buf.index;
+ io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
+ return true;
+}
+
+static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
+ struct request *req, struct ublk_io *io,
+ unsigned int issue_flags)
+{
+ if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
+ return ublk_auto_buf_reg(req, io, issue_flags);
+
+ ublk_init_req_ref(ubq, req);
+ return true;
+}
+
+static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
+ struct ublk_io *io)
+{
+ unsigned mapped_bytes = ublk_map_io(ubq, req, io);
+
+ /* partially mapped, update io descriptor */
+ if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
+ /*
+ * Nothing mapped, retry until we succeed.
+ *
+ * We may never succeed in mapping any bytes here because
+ * of OOM. TODO: reserve one buffer with single page pinned
+ * for providing forward progress guarantee.
+ */
+ if (unlikely(!mapped_bytes)) {
+ blk_mq_requeue_request(req, false);
+ blk_mq_delay_kick_requeue_list(req->q,
+ UBLK_REQUEUE_DELAY_MS);
+ return false;
+ }
+
+ ublk_get_iod(ubq, req->tag)->nr_sectors =
+ mapped_bytes >> 9;
+ }
+
+ return true;
+}
+
static void ublk_dispatch_req(struct ublk_queue *ubq,
struct request *req,
unsigned int issue_flags)
{
int tag = req->tag;
struct ublk_io *io = &ubq->ios[tag];
- unsigned int mapped_bytes;
- pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n",
- __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
+ pr_devel("%s: complete: qid %d tag %d io_flags %x addr %llx\n",
+ __func__, ubq->q_id, req->tag, io->flags,
ublk_get_iod(ubq, req->tag)->addr);
/*
@@ -1183,54 +1289,22 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) {
/*
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
- * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
+ * so immediately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
* and notify it.
*/
- if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
- io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
- pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
- __func__, io->cmd->cmd_op, ubq->q_id,
- req->tag, io->flags);
- ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags);
- return;
- }
- /*
- * We have handled UBLK_IO_NEED_GET_DATA command,
- * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
- * do the copy work.
- */
- io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
- /* update iod->addr because ublksrv may have passed a new io buffer */
- ublk_get_iod(ubq, req->tag)->addr = io->addr;
- pr_devel("%s: update iod->addr: op %d, qid %d tag %d io_flags %x addr %llx\n",
- __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
- ublk_get_iod(ubq, req->tag)->addr);
+ io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
+ pr_devel("%s: need get data. qid %d tag %d io_flags %x\n",
+ __func__, ubq->q_id, req->tag, io->flags);
+ ublk_complete_io_cmd(io, req, UBLK_IO_RES_NEED_GET_DATA,
+ issue_flags);
+ return;
}
- mapped_bytes = ublk_map_io(ubq, req, io);
-
- /* partially mapped, update io descriptor */
- if (unlikely(mapped_bytes != blk_rq_bytes(req))) {
- /*
- * Nothing mapped, retry until we succeed.
- *
- * We may never succeed in mapping any bytes here because
- * of OOM. TODO: reserve one buffer with single page pinned
- * for providing forward progress guarantee.
- */
- if (unlikely(!mapped_bytes)) {
- blk_mq_requeue_request(req, false);
- blk_mq_delay_kick_requeue_list(req->q,
- UBLK_REQUEUE_DELAY_MS);
- return;
- }
-
- ublk_get_iod(ubq, req->tag)->nr_sectors =
- mapped_bytes >> 9;
- }
+ if (!ublk_start_io(ubq, req, io))
+ return;
- ublk_init_req_ref(ubq, req);
- ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
+ if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags))
+ ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
}
static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
@@ -1590,30 +1664,6 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma)
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
}
-static void ublk_commit_completion(struct ublk_device *ub,
- const struct ublksrv_io_cmd *ub_cmd)
-{
- u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
- struct ublk_queue *ubq = ublk_get_queue(ub, qid);
- struct ublk_io *io = &ubq->ios[tag];
- struct request *req;
-
- /* now this cmd slot is owned by nbd driver */
- io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
- io->res = ub_cmd->result;
-
- /* find the io request and complete */
- req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
- if (WARN_ON_ONCE(unlikely(!req)))
- return;
-
- if (req_op(req) == REQ_OP_ZONE_APPEND)
- req->__sector = ub_cmd->zone_append_lba;
-
- if (likely(!blk_should_fake_timeout(req->q)))
- ublk_put_req_ref(ubq, req);
-}
-
static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
struct request *req)
{
@@ -1642,17 +1692,8 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
- if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) {
- struct request *rq;
-
- /*
- * Either we fail the request or ublk_rq_task_work_cb
- * will do it
- */
- rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
- if (rq && blk_mq_request_started(rq))
- __ublk_fail_req(ubq, io, rq);
- }
+ if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
+ __ublk_fail_req(ubq, io, io->req);
}
}
@@ -1708,7 +1749,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
* that ublk_dispatch_req() is always called
*/
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
- if (req && blk_mq_request_started(req))
+ if (req && blk_mq_request_started(req) && req->tag == tag)
return;
spin_lock(&ubq->cancel_lock);
@@ -1940,6 +1981,20 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
io_uring_cmd_mark_cancelable(cmd, issue_flags);
}
+static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
+{
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
+
+ if (pdu->buf.reserved0 || pdu->buf.reserved1)
+ return -EINVAL;
+
+ if (pdu->buf.flags & ~UBLK_AUTO_BUF_REG_F_MASK)
+ return -EINVAL;
+ return 0;
+}
+
static void ublk_io_release(void *priv)
{
struct request *rq = priv;
@@ -1953,16 +2008,12 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
unsigned int index, unsigned int issue_flags)
{
struct ublk_device *ub = cmd->file->private_data;
- const struct ublk_io *io = &ubq->ios[tag];
struct request *req;
int ret;
if (!ublk_support_zero_copy(ubq))
return -EINVAL;
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- return -EINVAL;
-
req = __ublk_check_and_get_req(ub, ubq, tag, 0);
if (!req)
return -EINVAL;
@@ -1978,17 +2029,12 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
}
static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
- const struct ublk_queue *ubq, unsigned int tag,
+ const struct ublk_queue *ubq,
unsigned int index, unsigned int issue_flags)
{
- const struct ublk_io *io = &ubq->ios[tag];
-
if (!ublk_support_zero_copy(ubq))
return -EINVAL;
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- return -EINVAL;
-
return io_buffer_unregister_bvec(cmd, index, issue_flags);
}
@@ -2031,6 +2077,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
goto out;
}
+ if (ublk_support_auto_buf_reg(ubq)) {
+ ret = ublk_set_auto_buf_reg(cmd);
+ if (ret)
+ goto out;
+ }
+
ublk_fill_io_cmd(io, cmd, buf_addr);
ublk_mark_io_ready(ub, ubq);
out:
@@ -2038,6 +2090,90 @@ out:
return ret;
}
+static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
+ struct ublk_io *io, struct io_uring_cmd *cmd,
+ const struct ublksrv_io_cmd *ub_cmd,
+ unsigned int issue_flags)
+{
+ struct request *req = io->req;
+
+ if (ublk_need_map_io(ubq)) {
+ /*
+ * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+ * NEED GET DATA is not enabled or it is Read IO.
+ */
+ if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
+ req_op(req) == REQ_OP_READ))
+ return -EINVAL;
+ } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
+ /*
+ * User copy requires addr to be unset when command is
+ * not zone append
+ */
+ return -EINVAL;
+ }
+
+ if (ublk_support_auto_buf_reg(ubq)) {
+ int ret;
+
+ /*
+ * `UBLK_F_AUTO_BUF_REG` only works iff `UBLK_IO_FETCH_REQ`
+ * and `UBLK_IO_COMMIT_AND_FETCH_REQ` are issued from same
+ * `io_ring_ctx`.
+ *
+ * If this uring_cmd's io_ring_ctx isn't same with the
+ * one for registering the buffer, it is ublk server's
+ * responsibility for unregistering the buffer, otherwise
+ * this ublk request gets stuck.
+ */
+ if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ if (data->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd))
+ io_buffer_unregister_bvec(cmd, data->buf_index,
+ issue_flags);
+ io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
+ }
+
+ ret = ublk_set_auto_buf_reg(cmd);
+ if (ret)
+ return ret;
+ }
+
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
+
+ /* now this cmd slot is owned by ublk driver */
+ io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
+ io->res = ub_cmd->result;
+
+ if (req_op(req) == REQ_OP_ZONE_APPEND)
+ req->__sector = ub_cmd->zone_append_lba;
+
+ if (likely(!blk_should_fake_timeout(req->q)))
+ ublk_put_req_ref(ubq, req);
+
+ return 0;
+}
+
+static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io)
+{
+ struct request *req = io->req;
+
+ /*
+ * We have handled UBLK_IO_NEED_GET_DATA command,
+ * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
+ * do the copy work.
+ */
+ io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
+ /* update iod->addr because ublksrv may have passed a new io buffer */
+ ublk_get_iod(ubq, req->tag)->addr = io->addr;
+ pr_devel("%s: update iod->addr: qid %d tag %d io_flags %x addr %llx\n",
+ __func__, ubq->q_id, req->tag, io->flags,
+ ublk_get_iod(ubq, req->tag)->addr);
+
+ return ublk_start_io(ubq, req, io);
+}
+
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
const struct ublksrv_io_cmd *ub_cmd)
@@ -2048,7 +2184,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
u32 cmd_op = cmd->cmd_op;
unsigned tag = ub_cmd->tag;
int ret = -EINVAL;
- struct request *req;
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
@@ -2058,9 +2193,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
ubq = ublk_get_queue(ub, ub_cmd->q_id);
- if (!ubq || ub_cmd->q_id != ubq->q_id)
- goto out;
-
if (ubq->ubq_daemon && ubq->ubq_daemon != current)
goto out;
@@ -2075,6 +2207,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
goto out;
}
+ /* only UBLK_IO_FETCH_REQ is allowed if io is not OWNED_BY_SRV */
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) &&
+ _IOC_NR(cmd_op) != UBLK_IO_FETCH_REQ)
+ goto out;
+
/*
* ensure that the user issues UBLK_IO_NEED_GET_DATA
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
@@ -2092,45 +2229,23 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_IO_REGISTER_IO_BUF:
return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
case UBLK_IO_UNREGISTER_IO_BUF:
- return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
+ return ublk_unregister_io_buf(cmd, ubq, ub_cmd->addr, issue_flags);
case UBLK_IO_FETCH_REQ:
ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr);
if (ret)
goto out;
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
- req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
-
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- goto out;
-
- if (ublk_need_map_io(ubq)) {
- /*
- * COMMIT_AND_FETCH_REQ has to provide IO buffer if
- * NEED GET DATA is not enabled or it is Read IO.
- */
- if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
- req_op(req) == REQ_OP_READ))
- goto out;
- } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
- /*
- * User copy requires addr to be unset when command is
- * not zone append
- */
- ret = -EINVAL;
+ ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags);
+ if (ret)
goto out;
- }
-
- ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
- ublk_commit_completion(ub, ub_cmd);
break;
case UBLK_IO_NEED_GET_DATA:
- if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
- goto out;
- ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
- req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
- ublk_dispatch_req(ubq, req, issue_flags);
- return -EIOCBQUEUED;
+ io->addr = ub_cmd->addr;
+ if (!ublk_get_data(ubq, io))
+ return -EIOCBQUEUED;
+
+ return UBLK_IO_RES_OK;
default:
goto out;
}
@@ -2728,6 +2843,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
return -EINVAL;
}
+ if ((info.flags & UBLK_F_QUIESCE) && !(info.flags & UBLK_F_USER_RECOVERY)) {
+ pr_warn("UBLK_F_QUIESCE requires UBLK_F_USER_RECOVERY\n");
+ return -EINVAL;
+ }
+
/*
* unprivileged device can't be trusted, but RECOVERY and
* RECOVERY_REISSUE still may hang error handling, so can't
@@ -2744,8 +2864,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
* For USER_COPY, we depends on userspace to fill request
* buffer by pwrite() to ublk char device, which can't be
* used for unprivileged device
+ *
+ * Same with zero copy or auto buffer register.
*/
- if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
return -EINVAL;
}
@@ -2803,7 +2926,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
UBLK_F_URING_CMD_COMP_IN_TASK;
/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
- if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
/*
@@ -3106,6 +3230,127 @@ static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header)
return 0;
}
+static void ublk_ctrl_set_size(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header)
+{
+ struct ublk_param_basic *p = &ub->params.basic;
+ u64 new_size = header->data[0];
+
+ mutex_lock(&ub->mutex);
+ p->dev_sectors = new_size;
+ set_capacity_and_notify(ub->ub_disk, p->dev_sectors);
+ mutex_unlock(&ub->mutex);
+}
+
+struct count_busy {
+ const struct ublk_queue *ubq;
+ unsigned int nr_busy;
+};
+
+static bool ublk_count_busy_req(struct request *rq, void *data)
+{
+ struct count_busy *idle = data;
+
+ if (!blk_mq_request_started(rq) && rq->mq_hctx->driver_data == idle->ubq)
+ idle->nr_busy += 1;
+ return true;
+}
+
+/* uring_cmd is guaranteed to be active if the associated request is idle */
+static bool ubq_has_idle_io(const struct ublk_queue *ubq)
+{
+ struct count_busy data = {
+ .ubq = ubq,
+ };
+
+ blk_mq_tagset_busy_iter(&ubq->dev->tag_set, ublk_count_busy_req, &data);
+ return data.nr_busy < ubq->q_depth;
+}
+
+/* Wait until each hw queue has at least one idle IO */
+static int ublk_wait_for_idle_io(struct ublk_device *ub,
+ unsigned int timeout_ms)
+{
+ unsigned int elapsed = 0;
+ int ret;
+
+ while (elapsed < timeout_ms && !signal_pending(current)) {
+ unsigned int queues_cancelable = 0;
+ int i;
+
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+ queues_cancelable += !!ubq_has_idle_io(ubq);
+ }
+
+ /*
+ * Each queue needs at least one active command for
+ * notifying ublk server
+ */
+ if (queues_cancelable == ub->dev_info.nr_hw_queues)
+ break;
+
+ msleep(UBLK_REQUEUE_DELAY_MS);
+ elapsed += UBLK_REQUEUE_DELAY_MS;
+ }
+
+ if (signal_pending(current))
+ ret = -EINTR;
+ else if (elapsed >= timeout_ms)
+ ret = -EBUSY;
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static int ublk_ctrl_quiesce_dev(struct ublk_device *ub,
+ const struct ublksrv_ctrl_cmd *header)
+{
+ /* zero means wait forever */
+ u64 timeout_ms = header->data[0];
+ struct gendisk *disk;
+ int i, ret = -ENODEV;
+
+ if (!(ub->dev_info.flags & UBLK_F_QUIESCE))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&ub->mutex);
+ disk = ublk_get_disk(ub);
+ if (!disk)
+ goto unlock;
+ if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+ goto put_disk;
+
+ ret = 0;
+ /* already in expected state */
+ if (ub->dev_info.state != UBLK_S_DEV_LIVE)
+ goto put_disk;
+
+ /* Mark all queues as canceling */
+ blk_mq_quiesce_queue(disk->queue);
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+ ubq->canceling = true;
+ }
+ blk_mq_unquiesce_queue(disk->queue);
+
+ if (!timeout_ms)
+ timeout_ms = UINT_MAX;
+ ret = ublk_wait_for_idle_io(ub, timeout_ms);
+
+put_disk:
+ ublk_put_disk(disk);
+unlock:
+ mutex_unlock(&ub->mutex);
+
+ /* Cancel pending uring_cmd */
+ if (!ret)
+ ublk_cancel_dev(ub);
+ return ret;
+}
+
/*
* All control commands are sent via /dev/ublk-control, so we have to check
* the destination device's permission
@@ -3191,6 +3436,8 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
case UBLK_CMD_SET_PARAMS:
case UBLK_CMD_START_USER_RECOVERY:
case UBLK_CMD_END_USER_RECOVERY:
+ case UBLK_CMD_UPDATE_SIZE:
+ case UBLK_CMD_QUIESCE_DEV:
mask = MAY_READ | MAY_WRITE;
break;
default:
@@ -3282,6 +3529,13 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_CMD_END_USER_RECOVERY:
ret = ublk_ctrl_end_recovery(ub, header);
break;
+ case UBLK_CMD_UPDATE_SIZE:
+ ublk_ctrl_set_size(ub, header);
+ ret = 0;
+ break;
+ case UBLK_CMD_QUIESCE_DEV:
+ ret = ublk_ctrl_quiesce_dev(ub, header);
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -3315,6 +3569,7 @@ static int __init ublk_init(void)
BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+ BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8);
init_waitqueue_head(&ublk_idr_wq);