65 files changed, 1025 insertions, 780 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 104aa5355090..239c1744a926 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -299,6 +299,7 @@ config DM_CRYPT
 	select CRYPTO
 	select CRYPTO_CBC
 	select CRYPTO_ESSIV
+	select CRYPTO_LIB_MD5 # needed by lmk IV mode
 	help
 	  This device-mapper target allows you to create a device that
 	  transparently encrypts the data on it. You'll need to activate
@@ -546,6 +547,7 @@ config DM_VERITY
 	depends on BLK_DEV_DM
 	select CRYPTO
 	select CRYPTO_HASH
+	select CRYPTO_LIB_SHA256
 	select DM_BUFIO
 	help
 	  This device-mapper target creates a read-only device that
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 48ce750bf70a..7708d92df23e 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -24,21 +24,18 @@
  * Since the gens and priorities are all stored contiguously on disk, we can
  * batch this up: We fill up the free_inc list with freshly invalidated buckets,
  * call prio_write(), and when prio_write() finishes we pull buckets off the
- * free_inc list and optionally discard them.
+ * free_inc list.
  *
  * free_inc isn't the only freelist - if it was, we'd often to sleep while
  * priorities and gens were being written before we could allocate. c->free is a
  * smaller freelist, and buckets on that list are always ready to be used.
  *
- * If we've got discards enabled, that happens when a bucket moves from the
- * free_inc list to the free list.
- *
  * There is another freelist, because sometimes we have buckets that we know
  * have nothing pointing into them - these we can reuse without waiting for
  * priorities to be rewritten. These come from freed btree nodes and buckets
  * that garbage collection discovered no longer had valid keys pointing into
  * them (because they were overwritten). That's the unused list - buckets on the
- * unused list move to the free list, optionally being discarded in the process.
+ * unused list move to the free list.
  *
  * It's also important to ensure that gens don't wrap around - with respect to
  * either the oldest gen in the btree or the gen on disk. This is quite
@@ -118,8 +115,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
 /*
  * Background allocation thread: scans for buckets to be invalidated,
  * invalidates them, rewrites prios/gens (marking them as invalidated on disk),
- * then optionally issues discard commands to the newly free buckets, then puts
- * them on the various freelists.
+ * then puts them on the various freelists.
  */
 
 static inline bool can_inc_bucket_gen(struct bucket *b)
@@ -321,8 +317,7 @@ static int bch_allocator_thread(void *arg)
 	while (1) {
 		/*
 		 * First, we pull buckets off of the unused and free_inc lists,
-		 * possibly issue discards to them, then we add the bucket to
-		 * the free list:
+		 * then we add the bucket to the free list:
 		 */
 		while (1) {
 			long bucket;
@@ -330,14 +325,6 @@ static int bch_allocator_thread(void *arg)
 			if (!fifo_pop(&ca->free_inc, bucket))
 				break;
 
-			if (ca->discard) {
-				mutex_unlock(&ca->set->bucket_lock);
-				blkdev_issue_discard(ca->bdev,
-					bucket_to_sector(ca->set, bucket),
-					ca->sb.bucket_size, GFP_KERNEL);
-				mutex_lock(&ca->set->bucket_lock);
-			}
-
 			allocator_wait(ca, bch_allocator_push(ca, bucket));
 			wake_up(&ca->set->btree_cache_wait);
 			wake_up(&ca->set->bucket_wait);
@@ -412,7 +399,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
 				TASK_UNINTERRUPTIBLE);
 
 		mutex_unlock(&ca->set->bucket_lock);
+
+		atomic_inc(&ca->set->bucket_wait_cnt);
 		schedule();
+		atomic_dec(&ca->set->bucket_wait_cnt);
+
 		mutex_lock(&ca->set->bucket_lock);
 	} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
 		 !fifo_pop(&ca->free[reserve], r));
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 1d33e40d26ea..8ccacba85547 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -447,8 +447,7 @@ struct cache {
 	 * free_inc: Incoming buckets - these are buckets that currently have
 	 * cached data in them, and we can't reuse them until after we write
 	 * their new gen to disk. After prio_write() finishes writing the new
-	 * gens/prios, they'll be moved to the free list (and possibly discarded
-	 * in the process)
+	 * gens/prios, they'll be moved to the free list.
 	 */
 	DECLARE_FIFO(long, free)[RESERVE_NR];
 	DECLARE_FIFO(long, free_inc);
@@ -467,8 +466,6 @@ struct cache {
 	 */
 	unsigned int		invalidate_needs_gc;
 
-	bool			discard; /* Get rid of? */
-
 	struct journal_device	journal;
 
 	/* The rest of this all shows up in sysfs */
@@ -607,6 +604,7 @@ struct cache_set {
 	 */
 	atomic_t		prio_blocked;
 	wait_queue_head_t	bucket_wait;
+	atomic_t		bucket_wait_cnt;
 
 	/*
 	 * For any bio we don't skip we subtract the number of sectors from
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 011f6062c4c0..6ee2c6a506a2 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -327,9 +327,13 @@ struct btree_iter {
 /* Fixed-size btree_iter that can be allocated on the stack */
 
 struct btree_iter_stack {
-	struct btree_iter iter;
-	struct btree_iter_set stack_data[MAX_BSETS];
+	/* Must be last as it ends in a flexible-array member. */
+	TRAILING_OVERLAP(struct btree_iter, iter, data,
+		struct btree_iter_set stack_data[MAX_BSETS];
+	);
 };
+static_assert(offsetof(struct btree_iter_stack, iter.data) ==
+	      offsetof(struct btree_iter_stack, stack_data));
 
 typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 210b59007d98..3ed39c823826 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -89,8 +89,9 @@
  * Test module load/unload
  */
 
-#define MAX_GC_TIMES		100
-#define MIN_GC_NODES		100
+#define MAX_GC_TIMES_SHIFT	7  /* 128 loops */
+#define GC_NODES_MIN		10
+#define GC_SLEEP_MS_MIN		10
 #define GC_SLEEP_MS		100
 
 #define PTR_DIRTY_BIT		(((uint64_t) 1 << 36))
@@ -371,7 +372,7 @@ static void do_btree_node_write(struct btree *b)
 	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
 		       bset_sector_offset(&b->keys, i));
 
-	if (!bch_bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
+	if (!bch_bio_alloc_pages(b->bio, GFP_NOWAIT)) {
 		struct bio_vec *bv;
 		void *addr = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
 		struct bvec_iter_all iter_all;
@@ -1578,29 +1579,29 @@ static unsigned int btree_gc_count_keys(struct btree *b)
 
 static size_t btree_gc_min_nodes(struct cache_set *c)
 {
-	size_t min_nodes;
+	size_t min_nodes = GC_NODES_MIN;
 
-	/*
-	 * Since incremental GC would stop 100ms when front
-	 * side I/O comes, so when there are many btree nodes,
-	 * if GC only processes constant (100) nodes each time,
-	 * GC would last a long time, and the front side I/Os
-	 * would run out of the buckets (since no new bucket
-	 * can be allocated during GC), and be blocked again.
-	 * So GC should not process constant nodes, but varied
-	 * nodes according to the number of btree nodes, which
-	 * realized by dividing GC into constant(100) times,
-	 * so when there are many btree nodes, GC can process
-	 * more nodes each time, otherwise, GC will process less
-	 * nodes each time (but no less than MIN_GC_NODES)
-	 */
-	min_nodes = c->gc_stats.nodes / MAX_GC_TIMES;
-	if (min_nodes < MIN_GC_NODES)
-		min_nodes = MIN_GC_NODES;
+	if (atomic_read(&c->search_inflight) == 0) {
+		size_t n = c->gc_stats.nodes >> MAX_GC_TIMES_SHIFT;
+
+		if (min_nodes < n)
+			min_nodes = n;
+	}
 
 	return min_nodes;
 }
 
+static uint64_t btree_gc_sleep_ms(struct cache_set *c)
+{
+	uint64_t sleep_ms;
+
+	if (atomic_read(&c->bucket_wait_cnt) > 0)
+		sleep_ms = GC_SLEEP_MS_MIN;
+	else
+		sleep_ms = GC_SLEEP_MS;
+
+	return sleep_ms;
+}
 
 static int btree_gc_recurse(struct btree *b, struct btree_op *op,
 			    struct closure *writes, struct gc_stat *gc)
@@ -1668,8 +1669,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
 		memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
 		r->b = NULL;
 
-		if (atomic_read(&b->c->search_inflight) &&
-		    gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) {
+		if (gc->nodes >= (gc->nodes_pre + btree_gc_min_nodes(b->c))) {
 			gc->nodes_pre =  gc->nodes;
 			ret = -EAGAIN;
 			break;
@@ -1846,8 +1846,8 @@ static void bch_btree_gc(struct cache_set *c)
 		cond_resched();
 
 		if (ret == -EAGAIN)
-			schedule_timeout_interruptible(msecs_to_jiffies
-						       (GC_SLEEP_MS));
+			schedule_timeout_interruptible(
+				msecs_to_jiffies(btree_gc_sleep_ms(c)));
 		else if (ret)
 			pr_warn("gc failed!\n");
 	} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -2822,7 +2822,8 @@ void bch_btree_exit(void)
 
 int __init bch_btree_init(void)
 {
-	btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0);
+	btree_io_wq = alloc_workqueue("bch_btree_io",
+				      WQ_MEM_RECLAIM | WQ_PERCPU, 0);
 	if (!btree_io_wq)
 		return -ENOMEM;
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index d50eb82ccb4f..144693b7c46a 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -275,8 +275,7 @@ bsearch:
 			 * ja->cur_idx
 			 */
 			ja->cur_idx = i;
-			ja->last_idx = ja->discard_idx = (i + 1) %
-				ca->sb.njournal_buckets;
+			ja->last_idx = (i + 1) % ca->sb.njournal_buckets;
 
 		}
 
@@ -336,16 +335,6 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
 	}
 }
 
-static bool is_discard_enabled(struct cache_set *s)
-{
-	struct cache *ca = s->cache;
-
-	if (ca->discard)
-		return true;
-
-	return false;
-}
-
 int bch_journal_replay(struct cache_set *s, struct list_head *list)
 {
 	int ret = 0, keys = 0, entries = 0;
@@ -360,15 +349,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
 		BUG_ON(i->pin && atomic_read(i->pin) != 1);
 
 		if (n != i->j.seq) {
-			if (n == start && is_discard_enabled(s))
-				pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
-					n, i->j.seq - 1, start, end);
-			else {
-				pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
-					n, i->j.seq - 1, start, end);
-				ret = -EIO;
-				goto err;
-			}
+			pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
+				n, i->j.seq - 1, start, end);
+			ret = -EIO;
+			goto err;
 		}
 
 		for (k = i->j.start;
@@ -568,65 +552,6 @@ out:
 
 #define last_seq(j)	((j)->seq - fifo_used(&(j)->pin) + 1)
 
-static void journal_discard_endio(struct bio *bio)
-{
-	struct journal_device *ja =
-		container_of(bio, struct journal_device, discard_bio);
-	struct cache *ca = container_of(ja, struct cache, journal);
-
-	atomic_set(&ja->discard_in_flight, DISCARD_DONE);
-
-	closure_wake_up(&ca->set->journal.wait);
-	closure_put(&ca->set->cl);
-}
-
-static void journal_discard_work(struct work_struct *work)
-{
-	struct journal_device *ja =
-		container_of(work, struct journal_device, discard_work);
-
-	submit_bio(&ja->discard_bio);
-}
-
-static void do_journal_discard(struct cache *ca)
-{
-	struct journal_device *ja = &ca->journal;
-	struct bio *bio = &ja->discard_bio;
-
-	if (!ca->discard) {
-		ja->discard_idx = ja->last_idx;
-		return;
-	}
-
-	switch (atomic_read(&ja->discard_in_flight)) {
-	case DISCARD_IN_FLIGHT:
-		return;
-
-	case DISCARD_DONE:
-		ja->discard_idx = (ja->discard_idx + 1) %
-			ca->sb.njournal_buckets;
-
-		atomic_set(&ja->discard_in_flight, DISCARD_READY);
-		fallthrough;
-
-	case DISCARD_READY:
-		if (ja->discard_idx == ja->last_idx)
-			return;
-
-		atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
-
-		bio_init_inline(bio, ca->bdev, 1, REQ_OP_DISCARD);
-		bio->bi_iter.bi_sector	= bucket_to_sector(ca->set,
-						ca->sb.d[ja->discard_idx]);
-		bio->bi_iter.bi_size	= bucket_bytes(ca);
-		bio->bi_end_io		= journal_discard_endio;
-
-		closure_get(&ca->set->cl);
-		INIT_WORK(&ja->discard_work, journal_discard_work);
-		queue_work(bch_journal_wq, &ja->discard_work);
-	}
-}
-
 static unsigned int free_journal_buckets(struct cache_set *c)
 {
 	struct journal *j = &c->journal;
@@ -635,10 +560,10 @@ static unsigned int free_journal_buckets(struct cache_set *c)
 	unsigned int n;
 
 	/* In case njournal_buckets is not power of 2 */
-	if (ja->cur_idx >= ja->discard_idx)
-		n = ca->sb.njournal_buckets +  ja->discard_idx - ja->cur_idx;
+	if (ja->cur_idx >= ja->last_idx)
+		n = ca->sb.njournal_buckets + ja->last_idx - ja->cur_idx;
 	else
-		n = ja->discard_idx - ja->cur_idx;
+		n = ja->last_idx - ja->cur_idx;
 
 	if (n > (1 + j->do_reserve))
 		return n - (1 + j->do_reserve);
@@ -668,8 +593,6 @@ static void journal_reclaim(struct cache_set *c)
 		ja->last_idx = (ja->last_idx + 1) %
 			ca->sb.njournal_buckets;
 
-	do_journal_discard(ca);
-
 	if (c->journal.blocks_free)
 		goto out;
 
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index cd316b4a1e95..9e9d1b3016a5 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -139,19 +139,6 @@ struct journal_device {
 	/* Last journal bucket that still contains an open journal entry */
 	unsigned int		last_idx;
 
-	/* Next journal bucket to be discarded */
-	unsigned int		discard_idx;
-
-#define DISCARD_READY		0
-#define DISCARD_IN_FLIGHT	1
-#define DISCARD_DONE		2
-	/* 1 - discard in flight, -1 - discard completed */
-	atomic_t		discard_in_flight;
-
-	struct work_struct	discard_work;
-	struct bio		discard_bio;
-	struct bio_vec		discard_bv;
-
 	/* Bio for journal reads/writes to this device */
 	struct bio		bio;
 	struct bio_vec		bv[8];
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6d250e366412..c17d4517af22 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1388,7 +1388,7 @@ static CLOSURE_CALLBACK(cached_dev_flush)
 	bch_cache_accounting_destroy(&dc->accounting);
 	kobject_del(&d->kobj);
 
-	continue_at(cl, cached_dev_free, system_wq);
+	continue_at(cl, cached_dev_free, system_percpu_wq);
 }
 
 static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
@@ -1400,7 +1400,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
 	__module_get(THIS_MODULE);
 	INIT_LIST_HEAD(&dc->list);
 	closure_init(&dc->disk.cl, NULL);
-	set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
+	set_closure_fn(&dc->disk.cl, cached_dev_flush, system_percpu_wq);
 	kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
 	INIT_WORK(&dc->detach, cached_dev_detach_finish);
 	sema_init(&dc->sb_write_mutex, 1);
@@ -1513,7 +1513,7 @@ static CLOSURE_CALLBACK(flash_dev_flush)
 	bcache_device_unlink(d);
 	mutex_unlock(&bch_register_lock);
 	kobject_del(&d->kobj);
-	continue_at(cl, flash_dev_free, system_wq);
+	continue_at(cl, flash_dev_free, system_percpu_wq);
 }
 
 static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
@@ -1525,7 +1525,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
 		goto err_ret;
 
 	closure_init(&d->cl, NULL);
-	set_closure_fn(&d->cl, flash_dev_flush, system_wq);
+	set_closure_fn(&d->cl, flash_dev_flush, system_percpu_wq);
 
 	kobject_init(&d->kobj, &bch_flash_dev_ktype);
 
@@ -1833,7 +1833,7 @@ static CLOSURE_CALLBACK(__cache_set_unregister)
 
 	mutex_unlock(&bch_register_lock);
 
-	continue_at(cl, cache_set_flush, system_wq);
+	continue_at(cl, cache_set_flush, system_percpu_wq);
 }
 
 void bch_cache_set_stop(struct cache_set *c)
@@ -1863,10 +1863,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 
 	__module_get(THIS_MODULE);
 	closure_init(&c->cl, NULL);
-	set_closure_fn(&c->cl, cache_set_free, system_wq);
+	set_closure_fn(&c->cl, cache_set_free, system_percpu_wq);
 
 	closure_init(&c->caching, &c->cl);
-	set_closure_fn(&c->caching, __cache_set_unregister, system_wq);
+	set_closure_fn(&c->caching, __cache_set_unregister, system_percpu_wq);
 
 	/* Maybe create continue_at_noreturn() and use it here? */
 	closure_set_stopped(&c->cl);
@@ -1939,7 +1939,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	if (!c->uuids)
 		goto err;
 
-	c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0);
+	c->moving_gc_wq = alloc_workqueue("bcache_gc",
+					  WQ_MEM_RECLAIM | WQ_PERCPU, 0);
 	if (!c->moving_gc_wq)
 		goto err;
 
@@ -2382,9 +2383,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
 	ca->bdev = file_bdev(bdev_file);
 	ca->sb_disk = sb_disk;
 
-	if (bdev_max_discard_sectors(file_bdev(bdev_file)))
-		ca->discard = CACHE_DISCARD(&ca->sb);
-
 	ret = cache_alloc(ca);
 	if (ret != 0) {
 		if (ret == -ENOMEM)
@@ -2531,7 +2529,7 @@ static void register_device_async(struct async_reg_args *args)
 		INIT_DELAYED_WORK(&args->reg_work, register_cache_worker);
 
 	/* 10 jiffies is enough for a delay */
-	queue_delayed_work(system_wq, &args->reg_work, 10);
+	queue_delayed_work(system_percpu_wq, &args->reg_work, 10);
 }
 
 static void *alloc_holder_object(struct cache_sb *sb)
@@ -2905,24 +2903,25 @@ static int __init bcache_init(void)
 	if (bch_btree_init())
 		goto err;
 
-	bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
+	bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
 	if (!bcache_wq)
 		goto err;
 
 	/*
 	 * Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
 	 *
-	 * 1. It used `system_wq` before which also does no memory reclaim.
+	 * 1. It used `system_percpu_wq` before which also does no memory reclaim.
 	 * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
 	 *    reduced throughput can be observed.
 	 *
-	 * We still want to user our own queue to not congest the `system_wq`.
+	 * We still want to user our own queue to not congest the `system_percpu_wq`.
 	 */
-	bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
+	bch_flush_wq = alloc_workqueue("bch_flush", WQ_PERCPU, 0);
 	if (!bch_flush_wq)
 		goto err;
 
-	bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
+	bch_journal_wq = alloc_workqueue("bch_journal",
+					 WQ_MEM_RECLAIM | WQ_PERCPU, 0);
 	if (!bch_journal_wq)
 		goto err;
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 826b14cae4e5..72f38e5b6f5c 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -134,7 +134,6 @@ read_attribute(partial_stripes_expensive);
 rw_attribute(synchronous);
 rw_attribute(journal_delay_ms);
 rw_attribute(io_disable);
-rw_attribute(discard);
 rw_attribute(running);
 rw_attribute(label);
 rw_attribute(errors);
@@ -1036,7 +1035,6 @@ SHOW(__bch_cache)
 	sysfs_hprint(bucket_size,	bucket_bytes(ca));
 	sysfs_hprint(block_size,	block_bytes(ca));
 	sysfs_print(nbuckets,		ca->sb.nbuckets);
-	sysfs_print(discard,		ca->discard);
 	sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9);
 	sysfs_hprint(btree_written,
 		     atomic_long_read(&ca->btree_sectors_written) << 9);
@@ -1142,18 +1140,6 @@ STORE(__bch_cache)
 	if (bcache_is_reboot)
 		return -EBUSY;
 
-	if (attr == &sysfs_discard) {
-		bool v = strtoul_or_return(buf);
-
-		if (bdev_max_discard_sectors(ca->bdev))
-			ca->discard = v;
-
-		if (v != CACHE_DISCARD(&ca->sb)) {
-			SET_CACHE_DISCARD(&ca->sb, v);
-			bcache_write_super(ca->set);
-		}
-	}
-
 	if (attr == &sysfs_cache_replacement_policy) {
 		v = __sysfs_match_string(cache_replacement_policies, -1, buf);
 		if (v < 0)
@@ -1185,7 +1171,6 @@ static struct attribute *bch_cache_attrs[] = {
 	&sysfs_block_size,
 	&sysfs_nbuckets,
 	&sysfs_priority_stats,
-	&sysfs_discard,
 	&sysfs_written,
 	&sysfs_btree_written,
 	&sysfs_metadata_written,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ba73dc1a3df..4b237074f453 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -805,8 +805,7 @@ static int bch_writeback_thread(void *arg)
 			 * may set BCH_ENABLE_AUTO_GC via sysfs, then when
 			 * BCH_DO_AUTO_GC is set, garbage collection thread
 			 * will be wake up here. After moving gc, the shrunk
-			 * btree and discarded free buckets SSD space may be
-			 * helpful for following write requests.
+			 * btree may be helpful for following write requests.
 			 */
 			if (c->gc_after_writeback ==
 			    (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) {
@@ -1076,7 +1075,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
 int bch_cached_dev_writeback_start(struct cached_dev *dc)
 {
 	dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq",
-						WQ_MEM_RECLAIM, 0);
+						WQ_MEM_RECLAIM | WQ_PERCPU, 0);
 	if (!dc->writeback_write_wq)
 		return -ENOMEM;
 
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index e6d28be11c5c..5235f3e4924b 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1374,7 +1374,7 @@ static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio
 {
 	unsigned int n_sectors;
 	sector_t sector;
-	unsigned int offset, end;
+	unsigned int offset, end, align;
 
 	b->end_io = end_io;
 
@@ -1388,9 +1388,11 @@ static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio
 			b->c->write_callback(b);
 		offset = b->write_start;
 		end = b->write_end;
-		offset &= -DM_BUFIO_WRITE_ALIGN;
-		end += DM_BUFIO_WRITE_ALIGN - 1;
-		end &= -DM_BUFIO_WRITE_ALIGN;
+		align = max(DM_BUFIO_WRITE_ALIGN,
+			bdev_physical_block_size(b->c->bdev));
+		offset &= -align;
+		end += align - 1;
+		end &= -align;
 		if (unlikely(end > b->c->block_size))
 			end = b->c->block_size;
 
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index a3c9f74fe2dc..1cda8618d74d 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -139,7 +139,6 @@ struct mapped_device {
 	struct srcu_struct io_barrier;
 
 #ifdef CONFIG_BLK_DEV_ZONED
-	unsigned int nr_zones;
 	void *zone_revalidate_map;
 	struct task_struct *revalidate_map_task;
 #endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5ef43231fe77..79704fbc523b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -21,6 +21,7 @@
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/crypto.h>
+#include <linux/fips.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/backing-dev.h>
@@ -120,7 +121,6 @@ struct iv_benbi_private {
 
 #define LMK_SEED_SIZE 64 /* hash + 0 */
 struct iv_lmk_private {
-	struct crypto_shash *hash_tfm;
 	u8 *seed;
 };
 
@@ -254,22 +254,15 @@ static unsigned int max_write_size = 0;
 module_param(max_write_size, uint, 0644);
 MODULE_PARM_DESC(max_write_size, "Maximum size of a write request");
 
-static unsigned get_max_request_sectors(struct dm_target *ti, struct bio *bio)
+static unsigned get_max_request_sectors(struct dm_target *ti, struct bio *bio, bool no_split)
 {
 	struct crypt_config *cc = ti->private;
 	unsigned val, sector_align;
 	bool wrt = op_is_write(bio_op(bio));
 
-	if (wrt) {
-		/*
-		 * For zoned devices, splitting write operations creates the
-		 * risk of deadlocking queue freeze operations with zone write
-		 * plugging BIO work when the reminder of a split BIO is
-		 * issued. So always allow the entire BIO to proceed.
-		 */
-		if (ti->emulate_zone_append)
-			return bio_sectors(bio);
-
+	if (no_split) {
+		val = -1;
+	} else if (wrt) {
 		val = min_not_zero(READ_ONCE(max_write_size),
 				   DM_CRYPT_DEFAULT_MAX_WRITE_SIZE);
 	} else {
@@ -465,10 +458,6 @@ static void crypt_iv_lmk_dtr(struct crypt_config *cc)
 {
 	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
 
-	if (lmk->hash_tfm && !IS_ERR(lmk->hash_tfm))
-		crypto_free_shash(lmk->hash_tfm);
-	lmk->hash_tfm = NULL;
-
 	kfree_sensitive(lmk->seed);
 	lmk->seed = NULL;
 }
@@ -483,11 +472,10 @@ static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
 		return -EINVAL;
 	}
 
-	lmk->hash_tfm = crypto_alloc_shash("md5", 0,
-					   CRYPTO_ALG_ALLOCATES_MEMORY);
-	if (IS_ERR(lmk->hash_tfm)) {
-		ti->error = "Error initializing LMK hash";
-		return PTR_ERR(lmk->hash_tfm);
+	if (fips_enabled) {
+		ti->error = "LMK support is disabled due to FIPS";
+		/* ... because it uses MD5. */
+		return -EINVAL;
 	}
 
 	/* No seed in LMK version 2 */
@@ -498,7 +486,6 @@ static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
 
 	lmk->seed = kzalloc(LMK_SEED_SIZE, GFP_KERNEL);
 	if (!lmk->seed) {
-		crypt_iv_lmk_dtr(cc);
 		ti->error = "Error kmallocing seed storage in LMK";
 		return -ENOMEM;
 	}
@@ -514,7 +501,7 @@ static int crypt_iv_lmk_init(struct crypt_config *cc)
 	/* LMK seed is on the position of LMK_KEYS + 1 key */
 	if (lmk->seed)
 		memcpy(lmk->seed, cc->key + (cc->tfms_count * subkey_size),
-		       crypto_shash_digestsize(lmk->hash_tfm));
+		       MD5_DIGEST_SIZE);
 
 	return 0;
 }
@@ -529,55 +516,31 @@ static int crypt_iv_lmk_wipe(struct crypt_config *cc)
 	return 0;
 }
 
-static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
-			    struct dm_crypt_request *dmreq,
-			    u8 *data)
+static void crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
+			     struct dm_crypt_request *dmreq, u8 *data)
 {
 	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
-	SHASH_DESC_ON_STACK(desc, lmk->hash_tfm);
-	union {
-		struct md5_state md5state;
-		u8 state[CRYPTO_MD5_STATESIZE];
-	} u;
+	struct md5_ctx ctx;
 	__le32 buf[4];
-	int i, r;
 
-	desc->tfm = lmk->hash_tfm;
+	md5_init(&ctx);
 
-	r = crypto_shash_init(desc);
-	if (r)
-		return r;
-
-	if (lmk->seed) {
-		r = crypto_shash_update(desc, lmk->seed, LMK_SEED_SIZE);
-		if (r)
-			return r;
-	}
+	if (lmk->seed)
+		md5_update(&ctx, lmk->seed, LMK_SEED_SIZE);
 
 	/* Sector is always 512B, block size 16, add data of blocks 1-31 */
-	r = crypto_shash_update(desc, data + 16, 16 * 31);
-	if (r)
-		return r;
+	md5_update(&ctx, data + 16, 16 * 31);
 
 	/* Sector is cropped to 56 bits here */
 	buf[0] = cpu_to_le32(dmreq->iv_sector & 0xFFFFFFFF);
 	buf[1] = cpu_to_le32((((u64)dmreq->iv_sector >> 32) & 0x00FFFFFF) | 0x80000000);
 	buf[2] = cpu_to_le32(4024);
 	buf[3] = 0;
-	r = crypto_shash_update(desc, (u8 *)buf, sizeof(buf));
-	if (r)
-		return r;
+	md5_update(&ctx, (u8 *)buf, sizeof(buf));
 
 	/* No MD5 padding here */
-	r = crypto_shash_export(desc, &u.md5state);
-	if (r)
-		return r;
-
-	for (i = 0; i < MD5_HASH_WORDS; i++)
-		__cpu_to_le32s(&u.md5state.hash[i]);
-	memcpy(iv, &u.md5state.hash, cc->iv_size);
-
-	return 0;
+	cpu_to_le32_array(ctx.state.h, ARRAY_SIZE(ctx.state.h));
+	memcpy(iv, ctx.state.h, cc->iv_size);
 }
 
 static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv,
@@ -585,17 +548,15 @@ static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv,
 {
 	struct scatterlist *sg;
 	u8 *src;
-	int r = 0;
 
 	if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) {
 		sg = crypt_get_sg_data(cc, dmreq->sg_in);
 		src = kmap_local_page(sg_page(sg));
-		r = crypt_iv_lmk_one(cc, iv, dmreq, src + sg->offset);
+		crypt_iv_lmk_one(cc, iv, dmreq, src + sg->offset);
 		kunmap_local(src);
 	} else
 		memset(iv, 0, cc->iv_size);
-
-	return r;
+	return 0;
 }
 
 static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv,
@@ -603,21 +564,19 @@ static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv,
 {
 	struct scatterlist *sg;
 	u8 *dst;
-	int r;
 
 	if (bio_data_dir(dmreq->ctx->bio_in) == WRITE)
 		return 0;
 
 	sg = crypt_get_sg_data(cc, dmreq->sg_out);
 	dst = kmap_local_page(sg_page(sg));
-	r = crypt_iv_lmk_one(cc, iv, dmreq, dst + sg->offset);
+	crypt_iv_lmk_one(cc, iv, dmreq, dst + sg->offset);
 
 	/* Tweak the first block of plaintext sector */
-	if (!r)
-		crypto_xor(dst + sg->offset, iv, cc->iv_size);
+	crypto_xor(dst + sg->offset, iv, cc->iv_size);
 
 	kunmap_local(dst);
-	return r;
+	return 0;
 }
 
 static void crypt_iv_tcw_dtr(struct crypt_config *cc)
@@ -1781,7 +1740,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
 		bio_for_each_folio_all(fi, clone) {
 			if (folio_test_large(fi.folio)) {
 				percpu_counter_sub(&cc->n_allocated_pages,
-						1 << folio_order(fi.folio));
+						folio_nr_pages(fi.folio));
 				folio_put(fi.folio);
 			} else {
 				mempool_free(&fi.folio->page, &cc->page_pool);
@@ -3496,6 +3455,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	struct dm_crypt_io *io;
 	struct crypt_config *cc = ti->private;
 	unsigned max_sectors;
+	bool no_split;
 
 	/*
 	 * If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues.
@@ -3513,10 +3473,20 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 
 	/*
 	 * Check if bio is too large, split as needed.
+	 *
+	 * For zoned devices, splitting write operations creates the
+	 * risk of deadlocking queue freeze operations with zone write
+	 * plugging BIO work when the reminder of a split BIO is
+	 * issued. So always allow the entire BIO to proceed.
 	 */
-	max_sectors = get_max_request_sectors(ti, bio);
-	if (unlikely(bio_sectors(bio) > max_sectors))
+	no_split = (ti->emulate_zone_append && op_is_write(bio_op(bio))) ||
+		   (bio->bi_opf & REQ_ATOMIC);
+	max_sectors = get_max_request_sectors(ti, bio, no_split);
+	if (unlikely(bio_sectors(bio) > max_sectors)) {
+		if (unlikely(no_split))
+			return DM_MAPIO_KILL;
 		dm_accept_partial_bio(bio, max_sectors);
+	}
 
 	/*
 	 * Ensure that bio is a multiple of internal sector encryption size
@@ -3762,15 +3732,20 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	if (ti->emulate_zone_append)
 		limits->max_hw_sectors = min(limits->max_hw_sectors,
 					     BIO_MAX_VECS << PAGE_SECTORS_SHIFT);
+
+	limits->atomic_write_hw_unit_max = min(limits->atomic_write_hw_unit_max,
+					       BIO_MAX_VECS << PAGE_SHIFT);
+	limits->atomic_write_hw_max = min(limits->atomic_write_hw_max,
+					  BIO_MAX_VECS << PAGE_SHIFT);
 }
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 28, 0},
+	.version = {1, 29, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
-	.features = DM_TARGET_ZONED_HM,
+	.features = DM_TARGET_ZONED_HM | DM_TARGET_ATOMIC_WRITES,
 	.report_zones = crypt_report_zones,
 	.map    = crypt_map,
 	.status = crypt_status,
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 6abb31ca9662..b354e74a670e 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -103,7 +103,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, enum req_op op, struct bio_vec *bv,
 			} else {
 				flush_dcache_page(bv->bv_page);
 				memcpy(ba, pa, cur_len);
-				dm_bufio_mark_partial_buffer_dirty(b, buf_off, buf_off + cur_len);
+				dm_bufio_mark_buffer_dirty(b);
 			}
 
 			dm_bufio_release(b);
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index b67976637538..061b4d310813 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -29,7 +29,7 @@ typedef sector_t chunk_t;
  * chunk within the device.
  */
 struct dm_exception {
-	struct hlist_bl_node hash_list;
+	struct hlist_node hash_list;
 
 	chunk_t old_chunk;
 	chunk_t new_chunk;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 7bb7174f8f4f..f0c84e7a5daa 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -432,6 +432,7 @@ static int log_writes_kthread(void *arg)
 	struct log_writes_c *lc = arg;
 	sector_t sector = 0;
 
+	set_freezable();
 	while (!kthread_should_stop()) {
 		bool super = false;
 		bool logging_enabled;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index aaf4a0a4b0eb..c18358271618 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -131,7 +131,7 @@ static void queue_if_no_path_timeout_work(struct timer_list *t);
 #define MPATHF_QUEUE_IO 0			/* Must we queue all I/O? */
 #define MPATHF_QUEUE_IF_NO_PATH 1		/* Queue I/O if last path fails? */
 #define MPATHF_SAVED_QUEUE_IF_NO_PATH 2		/* Saved state during suspension */
-#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3	/* If there's already a hw_handler present, don't change it. */
+/* MPATHF_RETAIN_ATTACHED_HW_HANDLER no longer has any effect */
 #define MPATHF_PG_INIT_DISABLED 4		/* pg_init is not currently allowed */
 #define MPATHF_PG_INIT_REQUIRED 5		/* pg_init needs calling? */
 #define MPATHF_PG_INIT_DELAY_RETRY 6		/* Delay pg_init retry? */
@@ -237,16 +237,10 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
 
 static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
 {
-	if (m->queue_mode == DM_TYPE_NONE) {
+	if (m->queue_mode == DM_TYPE_NONE)
 		m->queue_mode = DM_TYPE_REQUEST_BASED;
-	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+	else if (m->queue_mode == DM_TYPE_BIO_BASED)
 		INIT_WORK(&m->process_queued_bios, process_queued_bios);
-		/*
-		 * bio-based doesn't support any direct scsi_dh management;
-		 * it just discovers if a scsi_dh is attached.
-		 */
-		set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
-	}
 
 	dm_table_set_type(ti->table, m->queue_mode);
 
@@ -887,36 +881,30 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
 	struct request_queue *q = bdev_get_queue(bdev);
 	int r;
 
-	if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) {
-retain:
-		if (*attached_handler_name) {
-			/*
-			 * Clear any hw_handler_params associated with a
-			 * handler that isn't already attached.
-			 */
-			if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
-				kfree(m->hw_handler_params);
-				m->hw_handler_params = NULL;
-			}
-
-			/*
-			 * Reset hw_handler_name to match the attached handler
-			 *
-			 * NB. This modifies the table line to show the actual
-			 * handler instead of the original table passed in.
-			 */
-			kfree(m->hw_handler_name);
-			m->hw_handler_name = *attached_handler_name;
-			*attached_handler_name = NULL;
+	if (*attached_handler_name) {
+		/*
+		 * Clear any hw_handler_params associated with a
+		 * handler that isn't already attached.
+		 */
+		if (m->hw_handler_name && strcmp(*attached_handler_name,
+						 m->hw_handler_name)) {
+			kfree(m->hw_handler_params);
+			m->hw_handler_params = NULL;
 		}
+
+		/*
+		 * Reset hw_handler_name to match the attached handler
+		 *
+		 * NB. This modifies the table line to show the actual
+		 * handler instead of the original table passed in.
+		 */
+		kfree(m->hw_handler_name);
+		m->hw_handler_name = *attached_handler_name;
+		*attached_handler_name = NULL;
 	}
 
 	if (m->hw_handler_name) {
 		r = scsi_dh_attach(q, m->hw_handler_name);
-		if (r == -EBUSY) {
-			DMINFO("retaining handler on device %pg", bdev);
-			goto retain;
-		}
 		if (r < 0) {
 			*error = "error attaching hardware handler";
 			return r;
@@ -1138,7 +1126,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 		}
 
 		if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
-			set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+			/* no longer has any effect */
 			continue;
 		}
 
@@ -1823,7 +1811,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 		DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
 			      (m->pg_init_retries > 0) * 2 +
 			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
-			      test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
 			      (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
 
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
@@ -1832,8 +1819,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
 		if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
 			DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
-		if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
-			DMEMIT("retain_attached_hw_handler ");
 		if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
 			switch (m->queue_mode) {
 			case DM_TYPE_BIO_BASED:
@@ -2307,7 +2292,7 @@ static struct target_type multipath_target = {
 	.name = "multipath",
 	.version = {1, 15, 0},
 	.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
-		    DM_TARGET_PASSES_INTEGRITY,
+		    DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ATOMIC_WRITES,
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-pcache/Makefile b/drivers/md/dm-pcache/Makefile
index 86776e4acad2..cedfd38854f6 100644
--- a/drivers/md/dm-pcache/Makefile
+++ b/drivers/md/dm-pcache/Makefile
@@ -1,3 +1,3 @@
 dm-pcache-y := dm_pcache.o cache_dev.o segment.o backing_dev.o cache.o cache_gc.o cache_writeback.o cache_segment.o cache_key.o cache_req.o
 
-obj-m += dm-pcache.o
+obj-$(CONFIG_DM_PCACHE) += dm-pcache.o
diff --git a/drivers/md/dm-pcache/cache.c b/drivers/md/dm-pcache/cache.c
index d8e92367d947..534bf07b794f 100644
--- a/drivers/md/dm-pcache/cache.c
+++ b/drivers/md/dm-pcache/cache.c
@@ -10,7 +10,8 @@ struct kmem_cache *key_cache;
 
 static inline struct pcache_cache_info *get_cache_info_addr(struct pcache_cache *cache)
 {
-	return cache->cache_info_addr + cache->info_index;
+	return (struct pcache_cache_info *)((char *)cache->cache_info_addr +
+						(size_t)cache->info_index * PCACHE_CACHE_INFO_SIZE);
 }
 
 static void cache_info_write(struct pcache_cache *cache)
@@ -21,10 +22,10 @@ static void cache_info_write(struct pcache_cache *cache)
 	cache_info->header.crc = pcache_meta_crc(&cache_info->header,
 						sizeof(struct pcache_cache_info));
 
+	cache->info_index = (cache->info_index + 1) % PCACHE_META_INDEX_MAX;
 	memcpy_flushcache(get_cache_info_addr(cache), cache_info,
 			sizeof(struct pcache_cache_info));
-
-	cache->info_index = (cache->info_index + 1) % PCACHE_META_INDEX_MAX;
+	pmem_wmb();
 }
 
 static void cache_info_init_default(struct pcache_cache *cache);
@@ -49,6 +50,8 @@ static int cache_info_init(struct pcache_cache *cache, struct pcache_cache_optio
 			return -EINVAL;
 		}
 
+		cache->info_index = ((char *)cache_info_addr - (char *)cache->cache_info_addr) / PCACHE_CACHE_INFO_SIZE;
+
 		return 0;
 	}
 
@@ -93,10 +96,10 @@ void cache_pos_encode(struct pcache_cache *cache,
 	pos_onmedia.header.seq = seq;
 	pos_onmedia.header.crc = cache_pos_onmedia_crc(&pos_onmedia);
 
+	*index = (*index + 1) % PCACHE_META_INDEX_MAX;
+
 	memcpy_flushcache(pos_onmedia_addr, &pos_onmedia, sizeof(struct pcache_cache_pos_onmedia));
 	pmem_wmb();
-
-	*index = (*index + 1) % PCACHE_META_INDEX_MAX;
 }
 
 int cache_pos_decode(struct pcache_cache *cache,
@@ -181,7 +184,7 @@ static void cache_info_init_default(struct pcache_cache *cache)
 {
 	struct pcache_cache_info *cache_info = &cache->cache_info;
 
-	cache_info->header.seq = 0;
+	memset(cache_info, 0, sizeof(*cache_info));
 	cache_info->n_segs = cache->cache_dev->seg_num;
 	cache_info_set_gc_percent(cache_info, PCACHE_CACHE_GC_PERCENT_DEFAULT);
 }
@@ -411,7 +414,7 @@ void pcache_cache_stop(struct dm_pcache *pcache)
 {
 	struct pcache_cache *cache = &pcache->cache;
 
-	cache_flush(cache);
+	pcache_cache_flush(cache);
 
 	cancel_delayed_work_sync(&cache->gc_work);
 	flush_work(&cache->clean_work);
diff --git a/drivers/md/dm-pcache/cache.h b/drivers/md/dm-pcache/cache.h
index 1136d86958c8..27613b56be54 100644
--- a/drivers/md/dm-pcache/cache.h
+++ b/drivers/md/dm-pcache/cache.h
@@ -339,7 +339,7 @@ void cache_seg_put(struct pcache_cache_segment *cache_seg);
 void cache_seg_set_next_seg(struct pcache_cache_segment *cache_seg, u32 seg_id);
 
 /* cache request*/
-int cache_flush(struct pcache_cache *cache);
+int pcache_cache_flush(struct pcache_cache *cache);
 void miss_read_end_work_fn(struct work_struct *work);
 int pcache_cache_handle_req(struct pcache_cache *cache, struct pcache_request *pcache_req);
 
diff --git a/drivers/md/dm-pcache/cache_req.c b/drivers/md/dm-pcache/cache_req.c
index 27f94c1fa968..7854a30e07b7 100644
--- a/drivers/md/dm-pcache/cache_req.c
+++ b/drivers/md/dm-pcache/cache_req.c
@@ -790,7 +790,7 @@ err:
 }
 
 /**
- * cache_flush - Flush all ksets to persist any pending cache data
+ * pcache_cache_flush - Flush all ksets to persist any pending cache data
  * @cache: Pointer to the cache structure
  *
  * This function iterates through all ksets associated with the provided `cache`
@@ -802,7 +802,7 @@ err:
  * the respective error code, preventing the flush operation from proceeding to
  * subsequent ksets.
  */
-int cache_flush(struct pcache_cache *cache)
+int pcache_cache_flush(struct pcache_cache *cache)
 {
 	struct pcache_cache_kset *kset;
 	int ret;
@@ -827,7 +827,7 @@ int pcache_cache_handle_req(struct pcache_cache *cache, struct pcache_request *p
 	struct bio *bio = pcache_req->bio;
 
 	if (unlikely(bio->bi_opf & REQ_PREFLUSH))
-		return cache_flush(cache);
+		return pcache_cache_flush(cache);
 
 	if (bio_data_dir(bio) == READ)
 		return cache_read(cache, pcache_req);
diff --git a/drivers/md/dm-pcache/cache_segment.c b/drivers/md/dm-pcache/cache_segment.c
index f0b58980806e..9d92e2b067ed 100644
--- a/drivers/md/dm-pcache/cache_segment.c
+++ b/drivers/md/dm-pcache/cache_segment.c
@@ -26,11 +26,11 @@ static void cache_seg_info_write(struct pcache_cache_segment *cache_seg)
 	seg_info->header.seq++;
 	seg_info->header.crc = pcache_meta_crc(&seg_info->header, sizeof(struct pcache_segment_info));
 
+	cache_seg->info_index = (cache_seg->info_index + 1) % PCACHE_META_INDEX_MAX;
+
 	seg_info_addr = get_seg_info_addr(cache_seg);
 	memcpy_flushcache(seg_info_addr, seg_info, sizeof(struct pcache_segment_info));
 	pmem_wmb();
-
-	cache_seg->info_index = (cache_seg->info_index + 1) % PCACHE_META_INDEX_MAX;
 	mutex_unlock(&cache_seg->info_lock);
 }
 
@@ -56,7 +56,10 @@ static int cache_seg_info_load(struct pcache_cache_segment *cache_seg)
 		ret = -EIO;
 		goto out;
 	}
-	cache_seg->info_index = cache_seg_info_addr - cache_seg_info_addr_base;
+
+	cache_seg->info_index =
+		((char *)cache_seg_info_addr - (char *)cache_seg_info_addr_base) /
+		PCACHE_SEG_INFO_SIZE;
 out:
 	mutex_unlock(&cache_seg->info_lock);
 
@@ -129,10 +132,10 @@ static void cache_seg_ctrl_write(struct pcache_cache_segment *cache_seg)
 	cache_seg_gen.header.crc = pcache_meta_crc(&cache_seg_gen.header,
 						 sizeof(struct pcache_cache_seg_gen));
 
+	cache_seg->gen_index = (cache_seg->gen_index + 1) % PCACHE_META_INDEX_MAX;
+
 	memcpy_flushcache(get_cache_seg_gen_addr(cache_seg), &cache_seg_gen, sizeof(struct pcache_cache_seg_gen));
 	pmem_wmb();
-
-	cache_seg->gen_index = (cache_seg->gen_index + 1) % PCACHE_META_INDEX_MAX;
 }
 
 static void cache_seg_ctrl_init(struct pcache_cache_segment *cache_seg)
diff --git a/drivers/md/dm-pcache/pcache_internal.h b/drivers/md/dm-pcache/pcache_internal.h
index d427e534727c..b7a3319d2bd3 100644
--- a/drivers/md/dm-pcache/pcache_internal.h
+++ b/drivers/md/dm-pcache/pcache_internal.h
@@ -99,7 +99,7 @@ static inline void __must_check *pcache_meta_find_latest(struct pcache_meta_head
 		/* Update latest if a more recent sequence is found */
 		if (!latest || pcache_meta_seq_after(meta->seq, seq_latest)) {
 			seq_latest = meta->seq;
-			latest = (void *)header + (i * meta_max_size);
+			latest = meta_addr;
 		}
 	}
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index c6f7129e43d3..4bacdc499984 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2287,6 +2287,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
 
 			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
 			rs->raid_type = get_raid_type_by_ll(mddev->level, mddev->layout);
+			if (!rs->raid_type)
+				return -EINVAL;
 		}
 
 	} else {
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f40c18da4000..dbd148967de4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -40,10 +40,15 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
 #define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
 					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
 
+struct dm_hlist_head {
+	struct hlist_head head;
+	spinlock_t lock;
+};
+
 struct dm_exception_table {
 	uint32_t hash_mask;
 	unsigned int hash_shift;
-	struct hlist_bl_head *table;
+	struct dm_hlist_head *table;
 };
 
 struct dm_snapshot {
@@ -628,8 +633,8 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
 
 /* Lock to protect access to the completed and pending exception hash tables. */
 struct dm_exception_table_lock {
-	struct hlist_bl_head *complete_slot;
-	struct hlist_bl_head *pending_slot;
+	spinlock_t *complete_slot;
+	spinlock_t *pending_slot;
 };
 
 static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
@@ -638,20 +643,20 @@ static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
 	struct dm_exception_table *complete = &s->complete;
 	struct dm_exception_table *pending = &s->pending;
 
-	lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
-	lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
+	lock->complete_slot = &complete->table[exception_hash(complete, chunk)].lock;
+	lock->pending_slot = &pending->table[exception_hash(pending, chunk)].lock;
 }
 
 static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
 {
-	hlist_bl_lock(lock->complete_slot);
-	hlist_bl_lock(lock->pending_slot);
+	spin_lock_nested(lock->complete_slot, 1);
+	spin_lock_nested(lock->pending_slot, 2);
 }
 
 static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
 {
-	hlist_bl_unlock(lock->pending_slot);
-	hlist_bl_unlock(lock->complete_slot);
+	spin_unlock(lock->pending_slot);
+	spin_unlock(lock->complete_slot);
 }
 
 static int dm_exception_table_init(struct dm_exception_table *et,
@@ -661,13 +666,15 @@ static int dm_exception_table_init(struct dm_exception_table *et,
 
 	et->hash_shift = hash_shift;
 	et->hash_mask = size - 1;
-	et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head),
+	et->table = kvmalloc_array(size, sizeof(struct dm_hlist_head),
 				   GFP_KERNEL);
 	if (!et->table)
 		return -ENOMEM;
 
-	for (i = 0; i < size; i++)
-		INIT_HLIST_BL_HEAD(et->table + i);
+	for (i = 0; i < size; i++) {
+		INIT_HLIST_HEAD(&et->table[i].head);
+		spin_lock_init(&et->table[i].lock);
+	}
 
 	return 0;
 }
@@ -675,16 +682,17 @@ static int dm_exception_table_init(struct dm_exception_table *et,
 static void dm_exception_table_exit(struct dm_exception_table *et,
 				    struct kmem_cache *mem)
 {
-	struct hlist_bl_head *slot;
+	struct dm_hlist_head *slot;
 	struct dm_exception *ex;
-	struct hlist_bl_node *pos, *n;
+	struct hlist_node *pos;
 	int i, size;
 
 	size = et->hash_mask + 1;
 	for (i = 0; i < size; i++) {
 		slot = et->table + i;
 
-		hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) {
+		hlist_for_each_entry_safe(ex, pos, &slot->head, hash_list) {
+			hlist_del(&ex->hash_list);
 			kmem_cache_free(mem, ex);
 			cond_resched();
 		}
@@ -700,7 +708,7 @@ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
 
 static void dm_remove_exception(struct dm_exception *e)
 {
-	hlist_bl_del(&e->hash_list);
+	hlist_del(&e->hash_list);
 }
 
 /*
@@ -710,12 +718,11 @@ static void dm_remove_exception(struct dm_exception *e)
 static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
 						chunk_t chunk)
 {
-	struct hlist_bl_head *slot;
-	struct hlist_bl_node *pos;
+	struct hlist_head *slot;
 	struct dm_exception *e;
 
-	slot = &et->table[exception_hash(et, chunk)];
-	hlist_bl_for_each_entry(e, pos, slot, hash_list)
+	slot = &et->table[exception_hash(et, chunk)].head;
+	hlist_for_each_entry(e, slot, hash_list)
 		if (chunk >= e->old_chunk &&
 		    chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
 			return e;
@@ -762,18 +769,17 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
 static void dm_insert_exception(struct dm_exception_table *eh,
 				struct dm_exception *new_e)
 {
-	struct hlist_bl_head *l;
-	struct hlist_bl_node *pos;
+	struct hlist_head *l;
 	struct dm_exception *e = NULL;
 
-	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
+	l = &eh->table[exception_hash(eh, new_e->old_chunk)].head;
 
 	/* Add immediately if this table doesn't support consecutive chunks */
 	if (!eh->hash_shift)
 		goto out;
 
 	/* List is ordered by old_chunk */
-	hlist_bl_for_each_entry(e, pos, l, hash_list) {
+	hlist_for_each_entry(e, l, hash_list) {
 		/* Insert after an existing chunk? */
 		if (new_e->old_chunk == (e->old_chunk +
 					 dm_consecutive_chunk_count(e) + 1) &&
@@ -804,13 +810,13 @@ out:
 		 * Either the table doesn't support consecutive chunks or slot
 		 * l is empty.
 		 */
-		hlist_bl_add_head(&new_e->hash_list, l);
+		hlist_add_head(&new_e->hash_list, l);
 	} else if (new_e->old_chunk < e->old_chunk) {
 		/* Add before an existing exception */
-		hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
+		hlist_add_before(&new_e->hash_list, &e->hash_list);
 	} else {
 		/* Add to l's tail: e is the last exception in this slot */
-		hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
+		hlist_add_behind(&new_e->hash_list, &e->hash_list);
 	}
 }
 
@@ -820,7 +826,6 @@ out:
  */
 static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 {
-	struct dm_exception_table_lock lock;
 	struct dm_snapshot *s = context;
 	struct dm_exception *e;
 
@@ -833,17 +838,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 	/* Consecutive_count is implicitly initialised to zero */
 	e->new_chunk = new;
 
-	/*
-	 * Although there is no need to lock access to the exception tables
-	 * here, if we don't then hlist_bl_add_head(), called by
-	 * dm_insert_exception(), will complain about accessing the
-	 * corresponding list without locking it first.
-	 */
-	dm_exception_table_lock_init(s, old, &lock);
-
-	dm_exception_table_lock(&lock);
 	dm_insert_exception(&s->complete, e);
-	dm_exception_table_unlock(&lock);
 
 	return 0;
 }
@@ -873,7 +868,7 @@ static int calc_max_buckets(void)
 	/* use a fixed size of 2MB */
 	unsigned long mem = 2 * 1024 * 1024;
 
-	mem /= sizeof(struct hlist_bl_head);
+	mem /= sizeof(struct dm_hlist_head);
 
 	return mem;
 }
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index bfaef27ca79f..22bc70923a83 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -86,17 +86,13 @@ static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
 
 static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
 {
-	sprintf(buf, "%d\n", dm_suspended_md(md));
-
-	return strlen(buf);
+	return sysfs_emit(buf, "%d\n", dm_suspended_md(md));
 }
 
 static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf)
 {
 	/* Purely for userspace compatibility */
-	sprintf(buf, "%d\n", true);
-
-	return strlen(buf);
+	return sysfs_emit(buf, "%d\n", true);
 }
 
 static DM_ATTR_RO(name);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ad0a60a07b93..0522cd700e0e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2043,6 +2043,10 @@ bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
 	return true;
 }
 
+/*
+ * This function will be skipped by noflush reloads of immutable request
+ * based devices (dm-mpath).
+ */
 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			      struct queue_limits *limits)
 {
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c84149ba4e38..52ffb495f5a8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -395,13 +395,13 @@ static void begin_discard(struct discard_op *op, struct thin_c *tc, struct bio *
 	op->bio = NULL;
 }
 
-static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t data_e)
+static void issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t data_e)
 {
 	struct thin_c *tc = op->tc;
 	sector_t s = block_to_sectors(tc->pool, data_b);
 	sector_t len = block_to_sectors(tc->pool, data_e - data_b);
 
-	return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
+	__blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
 }
 
 static void end_discard(struct discard_op *op, int r)
@@ -1113,9 +1113,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
 				break;
 		}
 
-		r = issue_discard(&op, b, e);
-		if (r)
-			goto out;
+		issue_discard(&op, b, e);
 
 		b = e;
 	}
@@ -1188,8 +1186,8 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 		struct discard_op op;
 
 		begin_discard(&op, tc, discard_parent);
-		r = issue_discard(&op, m->data_block, data_end);
-		end_discard(&op, r);
+		issue_discard(&op, m->data_block, data_end);
+		end_discard(&op, 0);
 	}
 }
 
@@ -4383,11 +4381,8 @@ static void thin_postsuspend(struct dm_target *ti)
 {
 	struct thin_c *tc = ti->private;
 
-	/*
-	 * The dm_noflush_suspending flag has been cleared by now, so
-	 * unfortunately we must always run this.
-	 */
-	noflush_work(tc, do_noflush_stop);
+	if (dm_noflush_suspending(ti))
+		noflush_work(tc, do_noflush_stop);
 }
 
 static int thin_preresume(struct dm_target *ti)
diff --git a/drivers/md/dm-vdo/action-manager.c b/drivers/md/dm-vdo/action-manager.c
index a0e5e7077d13..e3bba0b28aad 100644
--- a/drivers/md/dm-vdo/action-manager.c
+++ b/drivers/md/dm-vdo/action-manager.c
@@ -43,7 +43,7 @@ struct action {
  * @actions: The two action slots.
  * @current_action: The current action slot.
  * @zones: The number of zones in which an action is to be applied.
- * @Scheduler: A function to schedule a default next action.
+ * @scheduler: A function to schedule a default next action.
  * @get_zone_thread_id: A function to get the id of the thread on which to apply an action to a
  *                      zone.
  * @initiator_thread_id: The ID of the thread on which actions may be initiated.
diff --git a/drivers/md/dm-vdo/admin-state.c b/drivers/md/dm-vdo/admin-state.c
index 3f9dba525154..da153fef085e 100644
--- a/drivers/md/dm-vdo/admin-state.c
+++ b/drivers/md/dm-vdo/admin-state.c
@@ -149,7 +149,8 @@ const struct admin_state_code *VDO_ADMIN_STATE_RESUMING = &VDO_CODE_RESUMING;
 /**
  * get_next_state() - Determine the state which should be set after a given operation completes
  *                    based on the operation and the current state.
- * @operation The operation to be started.
+ * @state: The current admin state.
+ * @operation: The operation to be started.
  *
  * Return: The state to set when the operation completes or NULL if the operation can not be
  *         started in the current state.
@@ -187,6 +188,8 @@ static const struct admin_state_code *get_next_state(const struct admin_state *s
 
 /**
  * vdo_finish_operation() - Finish the current operation.
+ * @state: The current admin state.
+ * @result: The result of the operation.
  *
  * Will notify the operation waiter if there is one. This method should be used for operations
  * started with vdo_start_operation(). For operations which were started with vdo_start_draining(),
@@ -214,8 +217,10 @@ bool vdo_finish_operation(struct admin_state *state, int result)
 
 /**
  * begin_operation() - Begin an operation if it may be started given the current state.
- * @waiter A completion to notify when the operation is complete; may be NULL.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The operation to be started.
+ * @waiter: A completion to notify when the operation is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
  *
  * Return: VDO_SUCCESS or an error.
  */
@@ -259,8 +264,10 @@ static int __must_check begin_operation(struct admin_state *state,
 
 /**
  * start_operation() - Start an operation if it may be started given the current state.
- * @waiter     A completion to notify when the operation is complete.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The operation to be started.
+ * @waiter: A completion to notify when the operation is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
  *
  * Return: true if the operation was started.
  */
@@ -274,10 +281,10 @@ static inline bool __must_check start_operation(struct admin_state *state,
 
 /**
  * check_code() - Check the result of a state validation.
- * @valid true if the code is of an appropriate type.
- * @code The code which failed to be of the correct type.
- * @what What the code failed to be, for logging.
- * @waiter The completion to notify of the error; may be NULL.
+ * @valid: True if the code is of an appropriate type.
+ * @code: The code which failed to be of the correct type.
+ * @what: What the code failed to be, for logging.
+ * @waiter: The completion to notify of the error; may be NULL.
  *
  * If the result failed, log an invalid state error and, if there is a waiter, notify it.
  *
@@ -301,7 +308,8 @@ static bool check_code(bool valid, const struct admin_state_code *code, const ch
 
 /**
  * assert_vdo_drain_operation() - Check that an operation is a drain.
- * @waiter The completion to finish with an error if the operation is not a drain.
+ * @operation: The operation to check.
+ * @waiter: The completion to finish with an error if the operation is not a drain.
  *
  * Return: true if the specified operation is a drain.
  */
@@ -313,9 +321,10 @@ static bool __must_check assert_vdo_drain_operation(const struct admin_state_cod
 
 /**
  * vdo_start_draining() - Initiate a drain operation if the current state permits it.
- * @operation The type of drain to initiate.
- * @waiter The completion to notify when the drain is complete.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The type of drain to initiate.
+ * @waiter: The completion to notify when the drain is complete.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
  *
  * Return: true if the drain was initiated, if not the waiter will be notified.
  */
@@ -345,6 +354,7 @@ bool vdo_start_draining(struct admin_state *state,
 
 /**
  * vdo_finish_draining() - Finish a drain operation if one was in progress.
+ * @state: The current admin state.
  *
  * Return: true if the state was draining; will notify the waiter if so.
  */
@@ -355,6 +365,8 @@ bool vdo_finish_draining(struct admin_state *state)
 
 /**
  * vdo_finish_draining_with_result() - Finish a drain operation with a status code.
+ * @state: The current admin state.
+ * @result: The result of the drain operation.
  *
  * Return: true if the state was draining; will notify the waiter if so.
  */
@@ -365,7 +377,8 @@ bool vdo_finish_draining_with_result(struct admin_state *state, int result)
 
 /**
  * vdo_assert_load_operation() - Check that an operation is a load.
- * @waiter The completion to finish with an error if the operation is not a load.
+ * @operation: The operation to check.
+ * @waiter: The completion to finish with an error if the operation is not a load.
  *
  * Return: true if the specified operation is a load.
  */
@@ -377,9 +390,10 @@ bool vdo_assert_load_operation(const struct admin_state_code *operation,
 
 /**
  * vdo_start_loading() - Initiate a load operation if the current state permits it.
- * @operation The type of load to initiate.
- * @waiter The completion to notify when the load is complete (may be NULL).
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The type of load to initiate.
+ * @waiter: The completion to notify when the load is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
  *
  * Return: true if the load was initiated, if not the waiter will be notified.
  */
@@ -393,6 +407,7 @@ bool vdo_start_loading(struct admin_state *state,
 
 /**
  * vdo_finish_loading() - Finish a load operation if one was in progress.
+ * @state: The current admin state.
  *
  * Return: true if the state was loading; will notify the waiter if so.
  */
@@ -403,7 +418,8 @@ bool vdo_finish_loading(struct admin_state *state)
 
 /**
  * vdo_finish_loading_with_result() - Finish a load operation with a status code.
- * @result The result of the load operation.
+ * @state: The current admin state.
+ * @result: The result of the load operation.
  *
  * Return: true if the state was loading; will notify the waiter if so.
  */
@@ -414,7 +430,8 @@ bool vdo_finish_loading_with_result(struct admin_state *state, int result)
 
 /**
  * assert_vdo_resume_operation() - Check whether an admin_state_code is a resume operation.
- * @waiter The completion to notify if the operation is not a resume operation; may be NULL.
+ * @operation: The operation to check.
+ * @waiter: The completion to notify if the operation is not a resume operation; may be NULL.
  *
  * Return: true if the code is a resume operation.
  */
@@ -427,9 +444,10 @@ static bool __must_check assert_vdo_resume_operation(const struct admin_state_co
 
 /**
  * vdo_start_resuming() - Initiate a resume operation if the current state permits it.
- * @operation The type of resume to start.
- * @waiter The completion to notify when the resume is complete (may be NULL).
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The type of resume to start.
+ * @waiter: The completion to notify when the resume is complete; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
  *
  * Return: true if the resume was initiated, if not the waiter will be notified.
  */
@@ -443,6 +461,7 @@ bool vdo_start_resuming(struct admin_state *state,
 
 /**
  * vdo_finish_resuming() - Finish a resume operation if one was in progress.
+ * @state: The current admin state.
  *
  * Return: true if the state was resuming; will notify the waiter if so.
  */
@@ -453,7 +472,8 @@ bool vdo_finish_resuming(struct admin_state *state)
 
 /**
  * vdo_finish_resuming_with_result() - Finish a resume operation with a status code.
- * @result The result of the resume operation.
+ * @state: The current admin state.
+ * @result: The result of the resume operation.
  *
  * Return: true if the state was resuming; will notify the waiter if so.
  */
@@ -465,6 +485,7 @@ bool vdo_finish_resuming_with_result(struct admin_state *state, int result)
 /**
  * vdo_resume_if_quiescent() - Change the state to normal operation if the current state is
  *                             quiescent.
+ * @state: The current admin state.
  *
  * Return: VDO_SUCCESS if the state resumed, VDO_INVALID_ADMIN_STATE otherwise.
  */
@@ -479,6 +500,8 @@ int vdo_resume_if_quiescent(struct admin_state *state)
 
 /**
  * vdo_start_operation() - Attempt to start an operation.
+ * @state: The current admin state.
+ * @operation: The operation to attempt to start.
  *
  * Return: VDO_SUCCESS if the operation was started, VDO_INVALID_ADMIN_STATE if not
  */
@@ -490,8 +513,10 @@ int vdo_start_operation(struct admin_state *state,
 
 /**
  * vdo_start_operation_with_waiter() - Attempt to start an operation.
- * @waiter the completion to notify when the operation completes or fails to start; may be NULL.
- * @initiator The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
+ * @state: The current admin state.
+ * @operation: The operation to attempt to start.
+ * @waiter: The completion to notify when the operation completes or fails to start; may be NULL.
+ * @initiator: The vdo_admin_initiator_fn to call if the operation may begin; may be NULL.
  *
  * Return: VDO_SUCCESS if the operation was started, VDO_INVALID_ADMIN_STATE if not
  */
diff --git a/drivers/md/dm-vdo/block-map.c b/drivers/md/dm-vdo/block-map.c
index baf683cabb1b..a7db5b41155e 100644
--- a/drivers/md/dm-vdo/block-map.c
+++ b/drivers/md/dm-vdo/block-map.c
@@ -174,6 +174,7 @@ static inline struct vdo_page_completion *page_completion_from_waiter(struct vdo
 
 /**
  * initialize_info() - Initialize all page info structures and put them on the free list.
+ * @cache: The page cache.
  *
  * Return: VDO_SUCCESS or an error.
  */
@@ -209,6 +210,7 @@ static int initialize_info(struct vdo_page_cache *cache)
 /**
  * allocate_cache_components() - Allocate components of the cache which require their own
  *                               allocation.
+ * @cache: The page cache.
  *
  * The caller is responsible for all clean up on errors.
  *
@@ -238,6 +240,8 @@ static int __must_check allocate_cache_components(struct vdo_page_cache *cache)
 /**
  * assert_on_cache_thread() - Assert that a function has been called on the VDO page cache's
  *                            thread.
+ * @cache: The page cache.
+ * @function_name: The funtion name to report if the assertion fails.
  */
 static inline void assert_on_cache_thread(struct vdo_page_cache *cache,
 					  const char *function_name)
@@ -271,6 +275,7 @@ static void report_cache_pressure(struct vdo_page_cache *cache)
 
 /**
  * get_page_state_name() - Return the name of a page state.
+ * @state: The page state to describe.
  *
  * If the page state is invalid a static string is returned and the invalid state is logged.
  *
@@ -342,6 +347,8 @@ static void update_lru(struct page_info *info)
 /**
  * set_info_state() - Set the state of a page_info and put it on the right list, adjusting
  *                    counters.
+ * @info: The page info to update.
+ * @new_state: The new state to set.
  */
 static void set_info_state(struct page_info *info, enum vdo_page_buffer_state new_state)
 {
@@ -416,6 +423,7 @@ static int reset_page_info(struct page_info *info)
 
 /**
  * find_free_page() - Find a free page.
+ * @cache: The page cache.
  *
  * Return: A pointer to the page info structure (if found), NULL otherwise.
  */
@@ -433,6 +441,7 @@ static struct page_info * __must_check find_free_page(struct vdo_page_cache *cac
 
 /**
  * find_page() - Find the page info (if any) associated with a given pbn.
+ * @cache: The page cache.
  * @pbn: The absolute physical block number of the page.
  *
  * Return: The page info for the page if available, or NULL if not.
@@ -449,6 +458,7 @@ static struct page_info * __must_check find_page(struct vdo_page_cache *cache,
 
 /**
  * select_lru_page() - Determine which page is least recently used.
+ * @cache: The page cache.
  *
  * Picks the least recently used from among the non-busy entries at the front of each of the lru
  * list. Since whenever we mark a page busy we also put it to the end of the list it is unlikely
@@ -523,6 +533,8 @@ static void complete_waiter_with_page(struct vdo_waiter *waiter, void *page_info
 
 /**
  * distribute_page_over_waitq() - Complete a waitq of VDO page completions with a page result.
+ * @info: The loaded page info.
+ * @waitq: The list of waiting data_vios.
  *
  * Upon completion the waitq will be empty.
  *
@@ -548,7 +560,9 @@ static unsigned int distribute_page_over_waitq(struct page_info *info,
 
 /**
  * set_persistent_error() - Set a persistent error which all requests will receive in the future.
+ * @cache: The page cache.
  * @context: A string describing what triggered the error.
+ * @result: The error result to set on the cache.
  *
  * Once triggered, all enqueued completions will get this error. Any future requests will result in
  * this error as well.
@@ -581,6 +595,7 @@ static void set_persistent_error(struct vdo_page_cache *cache, const char *conte
 /**
  * validate_completed_page() - Check that a page completion which is being freed to the cache
  *                             referred to a valid page and is in a valid state.
+ * @completion: The page completion to check.
  * @writable: Whether a writable page is required.
  *
  * Return: VDO_SUCCESS if the page was valid, otherwise as error
@@ -758,6 +773,8 @@ static void load_cache_page_endio(struct bio *bio)
 
 /**
  * launch_page_load() - Begin the process of loading a page.
+ * @info: The page info to launch.
+ * @pbn: The absolute physical block number of the page to load.
  *
  * Return: VDO_SUCCESS or an error code.
  */
@@ -836,6 +853,7 @@ static void save_pages(struct vdo_page_cache *cache)
 
 /**
  * schedule_page_save() - Add a page to the outgoing list of pages waiting to be saved.
+ * @info: The page info to save.
  *
  * Once in the list, a page may not be used until it has been written out.
  */
@@ -854,6 +872,7 @@ static void schedule_page_save(struct page_info *info)
 /**
  * launch_page_save() - Add a page to outgoing pages waiting to be saved, and then start saving
  * pages if another save is not in progress.
+ * @info: The page info to save.
  */
 static void launch_page_save(struct page_info *info)
 {
@@ -864,6 +883,7 @@ static void launch_page_save(struct page_info *info)
 /**
  * completion_needs_page() - Determine whether a given vdo_page_completion (as a waiter) is
  *                           requesting a given page number.
+ * @waiter: The page completion waiter to check.
  * @context: A pointer to the pbn of the desired page.
  *
  * Implements waiter_match_fn.
@@ -880,6 +900,7 @@ static bool completion_needs_page(struct vdo_waiter *waiter, void *context)
 /**
  * allocate_free_page() - Allocate a free page to the first completion in the waiting queue, and
  *                        any other completions that match it in page number.
+ * @info: The page info to allocate a page for.
  */
 static void allocate_free_page(struct page_info *info)
 {
@@ -925,6 +946,7 @@ static void allocate_free_page(struct page_info *info)
 
 /**
  * discard_a_page() - Begin the process of discarding a page.
+ * @cache: The page cache.
  *
  * If no page is discardable, increments a count of deferred frees so that the next release of a
  * page which is no longer busy will kick off another discard cycle. This is an indication that the
@@ -955,10 +977,6 @@ static void discard_a_page(struct vdo_page_cache *cache)
 	launch_page_save(info);
 }
 
-/**
- * discard_page_for_completion() - Helper used to trigger a discard so that the completion can get
- *                                 a different page.
- */
 static void discard_page_for_completion(struct vdo_page_completion *vdo_page_comp)
 {
 	struct vdo_page_cache *cache = vdo_page_comp->cache;
@@ -1132,6 +1150,7 @@ static void write_pages(struct vdo_completion *flush_completion)
 
 /**
  * vdo_release_page_completion() - Release a VDO Page Completion.
+ * @completion: The page completion to release.
  *
  * The page referenced by this completion (if any) will no longer be held busy by this completion.
  * If a page becomes discardable and there are completions awaiting free pages then a new round of
@@ -1172,10 +1191,6 @@ void vdo_release_page_completion(struct vdo_completion *completion)
 	}
 }
 
-/**
- * load_page_for_completion() - Helper function to load a page as described by a VDO Page
- *                              Completion.
- */
 static void load_page_for_completion(struct page_info *info,
 				     struct vdo_page_completion *vdo_page_comp)
 {
@@ -1319,6 +1334,7 @@ int vdo_get_cached_page(struct vdo_completion *completion,
 
 /**
  * vdo_invalidate_page_cache() - Invalidate all entries in the VDO page cache.
+ * @cache: The page cache.
  *
  * There must not be any dirty pages in the cache.
  *
@@ -1345,6 +1361,10 @@ int vdo_invalidate_page_cache(struct vdo_page_cache *cache)
 
 /**
  * get_tree_page_by_index() - Get the tree page for a given height and page index.
+ * @forest: The block map forest.
+ * @root_index: The root index of the tree to search.
+ * @height: The height in the tree.
+ * @page_index: The page index.
  *
  * Return: The requested page.
  */
@@ -2211,6 +2231,7 @@ static void allocate_block_map_page(struct block_map_zone *zone,
 /**
  * vdo_find_block_map_slot() - Find the block map slot in which the block map entry for a data_vio
  *                             resides and cache that result in the data_vio.
+ * @data_vio: The data vio.
  *
  * All ancestors in the tree will be allocated or loaded, as needed.
  */
@@ -2435,6 +2456,7 @@ static void deforest(struct forest *forest, size_t first_page_segment)
 /**
  * make_forest() - Make a collection of trees for a block_map, expanding the existing forest if
  *                 there is one.
+ * @map: The block map.
  * @entries: The number of entries the block map will hold.
  *
  * Return: VDO_SUCCESS or an error.
@@ -2476,6 +2498,7 @@ static int make_forest(struct block_map *map, block_count_t entries)
 
 /**
  * replace_forest() - Replace a block_map's forest with the already-prepared larger forest.
+ * @map: The block map.
  */
 static void replace_forest(struct block_map *map)
 {
@@ -2492,6 +2515,7 @@ static void replace_forest(struct block_map *map)
 /**
  * finish_cursor() - Finish the traversal of a single tree. If it was the last cursor, finish the
  *                   traversal.
+ * @cursor: The cursor to complete.
  */
 static void finish_cursor(struct cursor *cursor)
 {
@@ -2549,6 +2573,7 @@ static void traversal_endio(struct bio *bio)
 
 /**
  * traverse() - Traverse a single block map tree.
+ * @cursor: A cursor tracking traversal progress.
  *
  * This is the recursive heart of the traversal process.
  */
@@ -2619,6 +2644,7 @@ static void traverse(struct cursor *cursor)
 /**
  * launch_cursor() - Start traversing a single block map tree now that the cursor has a VIO with
  *                   which to load pages.
+ * @waiter: The parent of the cursor to launch.
  * @context: The pooled_vio just acquired.
  *
  * Implements waiter_callback_fn.
@@ -2636,6 +2662,8 @@ static void launch_cursor(struct vdo_waiter *waiter, void *context)
 
 /**
  * compute_boundary() - Compute the number of pages used at each level of the given root's tree.
+ * @map: The block map.
+ * @root_index: The tree root index.
  *
  * Return: The list of page counts as a boundary structure.
  */
@@ -2668,6 +2696,7 @@ static struct boundary compute_boundary(struct block_map *map, root_count_t root
 
 /**
  * vdo_traverse_forest() - Walk the entire forest of a block map.
+ * @map: The block map.
  * @callback: A function to call with the pbn of each allocated node in the forest.
  * @completion: The completion to notify on each traversed PBN, and when traversal completes.
  */
@@ -2707,6 +2736,9 @@ void vdo_traverse_forest(struct block_map *map, vdo_entry_callback_fn callback,
 
 /**
  * initialize_block_map_zone() - Initialize the per-zone portions of the block map.
+ * @map: The block map.
+ * @zone_number: The zone to initialize.
+ * @cache_size: The total block map cache size.
  * @maximum_age: The number of journal blocks before a dirtied page is considered old and must be
  *               written out.
  */
@@ -3091,6 +3123,7 @@ static void fetch_mapping_page(struct data_vio *data_vio, bool modifiable,
 
 /**
  * clear_mapped_location() - Clear a data_vio's mapped block location, setting it to be unmapped.
+ * @data_vio: The data vio.
  *
  * This indicates the block map entry for the logical block is either unmapped or corrupted.
  */
@@ -3104,6 +3137,8 @@ static void clear_mapped_location(struct data_vio *data_vio)
 /**
  * set_mapped_location() - Decode and validate a block map entry, and set the mapped location of a
  *                         data_vio.
+ * @data_vio: The data vio.
+ * @entry: The new mapped entry to set.
  *
  * Return: VDO_SUCCESS or VDO_BAD_MAPPING if the map entry is invalid or an error code for any
  *         other failure
diff --git a/drivers/md/dm-vdo/completion.c b/drivers/md/dm-vdo/completion.c
index 5ad85334632d..2f00acbb3b2b 100644
--- a/drivers/md/dm-vdo/completion.c
+++ b/drivers/md/dm-vdo/completion.c
@@ -65,6 +65,8 @@ static inline void assert_incomplete(struct vdo_completion *completion)
 
 /**
  * vdo_set_completion_result() - Set the result of a completion.
+ * @completion: The completion to update.
+ * @result: The result to set.
  *
  * Older errors will not be masked.
  */
@@ -77,6 +79,7 @@ void vdo_set_completion_result(struct vdo_completion *completion, int result)
 
 /**
  * vdo_launch_completion_with_priority() - Run or enqueue a completion.
+ * @completion: The completion to launch.
  * @priority: The priority at which to enqueue the completion.
  *
  * If called on the correct thread (i.e. the one specified in the completion's callback_thread_id
@@ -125,6 +128,8 @@ void vdo_enqueue_completion(struct vdo_completion *completion,
 
 /**
  * vdo_requeue_completion_if_needed() - Requeue a completion if not called on the specified thread.
+ * @completion: The completion to requeue.
+ * @callback_thread_id: The thread on which to requeue the completion.
  *
  * Return: True if the completion was requeued; callers may not access the completion in this case.
  */
diff --git a/drivers/md/dm-vdo/data-vio.c b/drivers/md/dm-vdo/data-vio.c
index 262e11581f2d..3333e1e5b02e 100644
--- a/drivers/md/dm-vdo/data-vio.c
+++ b/drivers/md/dm-vdo/data-vio.c
@@ -227,6 +227,7 @@ static inline u64 get_arrival_time(struct bio *bio)
 /**
  * check_for_drain_complete_locked() - Check whether a data_vio_pool has no outstanding data_vios
  *				       or waiters while holding the pool's lock.
+ * @pool: The data_vio pool.
  */
 static bool check_for_drain_complete_locked(struct data_vio_pool *pool)
 {
@@ -387,6 +388,7 @@ struct data_vio_compression_status advance_data_vio_compression_stage(struct dat
 
 /**
  * cancel_data_vio_compression() - Prevent this data_vio from being compressed or packed.
+ * @data_vio: The data_vio.
  *
  * Return: true if the data_vio is in the packer and the caller was the first caller to cancel it.
  */
@@ -483,6 +485,8 @@ static void attempt_logical_block_lock(struct vdo_completion *completion)
 /**
  * launch_data_vio() - (Re)initialize a data_vio to have a new logical block number, keeping the
  *		       same parent and other state and send it on its way.
+ * @data_vio: The data_vio to launch.
+ * @lbn: The logical block number.
  */
 static void launch_data_vio(struct data_vio *data_vio, logical_block_number_t lbn)
 {
@@ -641,6 +645,7 @@ static void update_limiter(struct limiter *limiter)
 
 /**
  * schedule_releases() - Ensure that release processing is scheduled.
+ * @pool: The data_vio pool.
  *
  * If this call switches the state to processing, enqueue. Otherwise, some other thread has already
  * done so.
@@ -768,6 +773,8 @@ static void initialize_limiter(struct limiter *limiter, struct data_vio_pool *po
 
 /**
  * initialize_data_vio() - Allocate the components of a data_vio.
+ * @data_vio: The data_vio to initialize.
+ * @vdo: The vdo containing the data_vio.
  *
  * The caller is responsible for cleaning up the data_vio on error.
  *
@@ -880,6 +887,7 @@ int make_data_vio_pool(struct vdo *vdo, data_vio_count_t pool_size,
 
 /**
  * free_data_vio_pool() - Free a data_vio_pool and the data_vios in it.
+ * @pool: The data_vio pool to free.
  *
  * All data_vios must be returned to the pool before calling this function.
  */
@@ -944,6 +952,8 @@ static void wait_permit(struct limiter *limiter, struct bio *bio)
 
 /**
  * vdo_launch_bio() - Acquire a data_vio from the pool, assign the bio to it, and launch it.
+ * @pool: The data_vio pool.
+ * @bio: The bio to launch.
  *
  * This will block if data_vios or discard permits are not available.
  */
@@ -994,6 +1004,7 @@ static void assert_on_vdo_cpu_thread(const struct vdo *vdo, const char *name)
 
 /**
  * drain_data_vio_pool() - Wait asynchronously for all data_vios to be returned to the pool.
+ * @pool: The data_vio pool.
  * @completion: The completion to notify when the pool has drained.
  */
 void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
@@ -1005,6 +1016,7 @@ void drain_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *comp
 
 /**
  * resume_data_vio_pool() - Resume a data_vio pool.
+ * @pool: The data_vio pool.
  * @completion: The completion to notify when the pool has resumed.
  */
 void resume_data_vio_pool(struct data_vio_pool *pool, struct vdo_completion *completion)
@@ -1024,6 +1036,7 @@ static void dump_limiter(const char *name, struct limiter *limiter)
 
 /**
  * dump_data_vio_pool() - Dump a data_vio pool to the log.
+ * @pool: The data_vio pool.
  * @dump_vios: Whether to dump the details of each busy data_vio as well.
  */
 void dump_data_vio_pool(struct data_vio_pool *pool, bool dump_vios)
@@ -1114,6 +1127,7 @@ static void perform_cleanup_stage(struct data_vio *data_vio,
 /**
  * release_allocated_lock() - Release the PBN lock and/or the reference on the allocated block at
  *			      the end of processing a data_vio.
+ * @completion: The data_vio holding the lock.
  */
 static void release_allocated_lock(struct vdo_completion *completion)
 {
@@ -1194,6 +1208,7 @@ static void transfer_lock(struct data_vio *data_vio, struct lbn_lock *lock)
 /**
  * release_logical_lock() - Release the logical block lock and flush generation lock at the end of
  *			    processing a data_vio.
+ * @completion: The data_vio holding the lock.
  */
 static void release_logical_lock(struct vdo_completion *completion)
 {
@@ -1228,6 +1243,7 @@ static void clean_hash_lock(struct vdo_completion *completion)
 
 /**
  * finish_cleanup() - Make some assertions about a data_vio which has finished cleaning up.
+ * @data_vio: The data_vio.
  *
  * If it is part of a multi-block discard, starts on the next block, otherwise, returns it to the
  * pool.
@@ -1342,6 +1358,7 @@ void handle_data_vio_error(struct vdo_completion *completion)
 /**
  * get_data_vio_operation_name() - Get the name of the last asynchronous operation performed on a
  *				   data_vio.
+ * @data_vio: The data_vio.
  */
 const char *get_data_vio_operation_name(struct data_vio *data_vio)
 {
@@ -1355,7 +1372,7 @@ const char *get_data_vio_operation_name(struct data_vio *data_vio)
 
 /**
  * data_vio_allocate_data_block() - Allocate a data block.
- *
+ * @data_vio: The data_vio.
  * @write_lock_type: The type of write lock to obtain on the block.
  * @callback: The callback which will attempt an allocation in the current zone and continue if it
  *	      succeeds.
@@ -1379,6 +1396,7 @@ void data_vio_allocate_data_block(struct data_vio *data_vio,
 
 /**
  * release_data_vio_allocation_lock() - Release the PBN lock on a data_vio's allocated block.
+ * @data_vio: The data_vio.
  * @reset: If true, the allocation will be reset (i.e. any allocated pbn will be forgotten).
  *
  * If the reference to the locked block is still provisional, it will be released as well.
@@ -1399,6 +1417,7 @@ void release_data_vio_allocation_lock(struct data_vio *data_vio, bool reset)
 
 /**
  * uncompress_data_vio() - Uncompress the data a data_vio has just read.
+ * @data_vio: The data_vio.
  * @mapping_state: The mapping state indicating which fragment to decompress.
  * @buffer: The buffer to receive the uncompressed data.
  */
@@ -1519,6 +1538,7 @@ static void complete_zero_read(struct vdo_completion *completion)
 
 /**
  * read_block() - Read a block asynchronously.
+ * @completion: The data_vio doing the read.
  *
  * This is the callback registered in read_block_mapping().
  */
@@ -1675,6 +1695,7 @@ static void journal_remapping(struct vdo_completion *completion)
 
 /**
  * read_old_block_mapping() - Get the previous PBN/LBN mapping of an in-progress write.
+ * @completion: The data_vio doing the read.
  *
  * Gets the previous PBN mapped to this LBN from the block map, so as to make an appropriate
  * journal entry referencing the removal of this LBN->PBN mapping.
@@ -1704,6 +1725,7 @@ void update_metadata_for_data_vio_write(struct data_vio *data_vio, struct pbn_lo
 
 /**
  * pack_compressed_data() - Attempt to pack the compressed data_vio into a block.
+ * @completion: The data_vio.
  *
  * This is the callback registered in launch_compress_data_vio().
  */
@@ -1725,6 +1747,7 @@ static void pack_compressed_data(struct vdo_completion *completion)
 
 /**
  * compress_data_vio() - Do the actual work of compressing the data on a CPU queue.
+ * @completion: The data_vio.
  *
  * This callback is registered in launch_compress_data_vio().
  */
@@ -1754,6 +1777,7 @@ static void compress_data_vio(struct vdo_completion *completion)
 
 /**
  * launch_compress_data_vio() - Continue a write by attempting to compress the data.
+ * @data_vio: The data_vio.
  *
  * This is a re-entry point to vio_write used by hash locks.
  */
@@ -1796,7 +1820,8 @@ void launch_compress_data_vio(struct data_vio *data_vio)
 /**
  * hash_data_vio() - Hash the data in a data_vio and set the hash zone (which also flags the record
  *		     name as set).
-
+ * @completion: The data_vio.
+ *
  * This callback is registered in prepare_for_dedupe().
  */
 static void hash_data_vio(struct vdo_completion *completion)
@@ -1832,6 +1857,7 @@ static void prepare_for_dedupe(struct data_vio *data_vio)
 /**
  * write_bio_finished() - This is the bio_end_io function registered in write_block() to be called
  *			  when a data_vio's write to the underlying storage has completed.
+ * @bio: The bio to update.
  */
 static void write_bio_finished(struct bio *bio)
 {
@@ -1884,6 +1910,7 @@ void write_data_vio(struct data_vio *data_vio)
 
 /**
  * acknowledge_write_callback() - Acknowledge a write to the requestor.
+ * @completion: The data_vio.
  *
  * This callback is registered in allocate_block() and continue_write_with_block_map_slot().
  */
@@ -1909,6 +1936,7 @@ static void acknowledge_write_callback(struct vdo_completion *completion)
 
 /**
  * allocate_block() - Attempt to allocate a block in the current allocation zone.
+ * @completion: The data_vio.
  *
  * This callback is registered in continue_write_with_block_map_slot().
  */
@@ -1941,6 +1969,7 @@ static void allocate_block(struct vdo_completion *completion)
 
 /**
  * handle_allocation_error() - Handle an error attempting to allocate a block.
+ * @completion: The data_vio.
  *
  * This error handler is registered in continue_write_with_block_map_slot().
  */
@@ -1970,6 +1999,7 @@ static int assert_is_discard(struct data_vio *data_vio)
 
 /**
  * continue_data_vio_with_block_map_slot() - Read the data_vio's mapping from the block map.
+ * @completion: The data_vio to continue.
  *
  * This callback is registered in launch_read_data_vio().
  */
diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c
index 4d983092a152..75a26f3f4461 100644
--- a/drivers/md/dm-vdo/dedupe.c
+++ b/drivers/md/dm-vdo/dedupe.c
@@ -917,6 +917,8 @@ static int __must_check acquire_lock(struct hash_zone *zone,
 
 /**
  * enter_forked_lock() - Bind the data_vio to a new hash lock.
+ * @waiter: The data_vio's waiter link.
+ * @context: The new hash lock.
  *
  * Implements waiter_callback_fn. Binds the data_vio that was waiting to a new hash lock and waits
  * on that lock.
@@ -971,7 +973,7 @@ static void fork_hash_lock(struct hash_lock *old_lock, struct data_vio *new_agen
  *                   path.
  * @lock: The hash lock.
  * @data_vio: The data_vio to deduplicate using the hash lock.
- * @has_claim: true if the data_vio already has claimed an increment from the duplicate lock.
+ * @has_claim: True if the data_vio already has claimed an increment from the duplicate lock.
  *
  * If no increments are available, this will roll over to a new hash lock and launch the data_vio
  * as the writing agent for that lock.
@@ -996,7 +998,7 @@ static void launch_dedupe(struct hash_lock *lock, struct data_vio *data_vio,
  *                    true copy of their data on disk.
  * @lock: The hash lock.
  * @agent: The data_vio acting as the agent for the lock.
- * @agent_is_done: true only if the agent has already written or deduplicated against its data.
+ * @agent_is_done: True only if the agent has already written or deduplicated against its data.
  *
  * If the agent itself needs to deduplicate, an increment for it must already have been claimed
  * from the duplicate lock, ensuring the hash lock will still have a data_vio holding it.
@@ -2146,8 +2148,8 @@ static void start_expiration_timer(struct dedupe_context *context)
 /**
  * report_dedupe_timeouts() - Record and eventually report that some dedupe requests reached their
  *                            expiration time without getting answers, so we timed them out.
- * @zones: the hash zones.
- * @timeouts: the number of newly timed out requests.
+ * @zones: The hash zones.
+ * @timeouts: The number of newly timed out requests.
  */
 static void report_dedupe_timeouts(struct hash_zones *zones, unsigned int timeouts)
 {
@@ -2509,6 +2511,8 @@ static void initiate_suspend_index(struct admin_state *state)
 
 /**
  * suspend_index() - Suspend the UDS index prior to draining hash zones.
+ * @context: Not used.
+ * @completion: The completion for the suspend operation.
  *
  * Implements vdo_action_preamble_fn
  */
@@ -2521,21 +2525,13 @@ static void suspend_index(void *context, struct vdo_completion *completion)
 			   initiate_suspend_index);
 }
 
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
 static void initiate_drain(struct admin_state *state)
 {
 	check_for_drain_complete(container_of(state, struct hash_zone, state));
 }
 
-/**
- * drain_hash_zone() - Drain a hash zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
 static void drain_hash_zone(void *context, zone_count_t zone_number,
 			    struct vdo_completion *parent)
 {
@@ -2572,6 +2568,8 @@ static void launch_dedupe_state_change(struct hash_zones *zones)
 
 /**
  * resume_index() - Resume the UDS index prior to resuming hash zones.
+ * @context: Not used.
+ * @parent: The completion for the resume operation.
  *
  * Implements vdo_action_preamble_fn
  */
@@ -2602,11 +2600,7 @@ static void resume_index(void *context, struct vdo_completion *parent)
 	vdo_finish_completion(parent);
 }
 
-/**
- * resume_hash_zone() - Resume a hash zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
 static void resume_hash_zone(void *context, zone_count_t zone_number,
 			     struct vdo_completion *parent)
 {
@@ -2634,7 +2628,7 @@ void vdo_resume_hash_zones(struct hash_zones *zones, struct vdo_completion *pare
 /**
  * get_hash_zone_statistics() - Add the statistics for this hash zone to the tally for all zones.
  * @zone: The hash zone to query.
- * @tally: The tally
+ * @tally: The tally.
  */
 static void get_hash_zone_statistics(const struct hash_zone *zone,
 				     struct hash_lock_statistics *tally)
@@ -2680,8 +2674,8 @@ static void get_index_statistics(struct hash_zones *zones,
 
 /**
  * vdo_get_dedupe_statistics() - Tally the statistics from all the hash zones and the UDS index.
- * @zones: The hash zones to query
- * @stats: A structure to store the statistics
+ * @zones: The hash zones to query.
+ * @stats: A structure to store the statistics.
  *
  * Return: The sum of the hash lock statistics from all hash zones plus the statistics from the UDS
  *         index
@@ -2856,9 +2850,9 @@ void vdo_set_dedupe_index_min_timer_interval(unsigned int value)
 
 /**
  * acquire_context() - Acquire a dedupe context from a hash_zone if any are available.
- * @zone: the hash zone
+ * @zone: The hash zone.
  *
- * Return: A dedupe_context or NULL if none are available
+ * Return: A dedupe_context or NULL if none are available.
  */
 static struct dedupe_context * __must_check acquire_context(struct hash_zone *zone)
 {
diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index 0e04c2021682..6af40d40f255 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -1144,6 +1144,7 @@ static bool vdo_uses_device(struct vdo *vdo, const void *context)
 /**
  * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in
  *                             progress.
+ * @vdo: The vdo.
  */
 static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo)
 {
@@ -1188,9 +1189,9 @@ static struct vdo_completion *prepare_admin_completion(struct vdo *vdo,
 /**
  * advance_phase() - Increment the phase of the current admin operation and prepare the admin
  *                   completion to run on the thread for the next phase.
- * @vdo: The on which an admin operation is being performed
+ * @vdo: The vdo on which an admin operation is being performed.
  *
- * Return: The current phase
+ * Return: The current phase.
  */
 static u32 advance_phase(struct vdo *vdo)
 {
diff --git a/drivers/md/dm-vdo/encodings.c b/drivers/md/dm-vdo/encodings.c
index b7cc0f41caca..dd59691be840 100644
--- a/drivers/md/dm-vdo/encodings.c
+++ b/drivers/md/dm-vdo/encodings.c
@@ -432,7 +432,10 @@ static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
 /**
  * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
  *                                  level in order to grow the forest to a new number of entries.
+ * @root_count: The number of block map roots.
+ * @old_sizes: The sizes of the old tree segments.
  * @entries: The new number of entries the block map must address.
+ * @new_sizes: The sizes of the new tree segments.
  *
  * Return: The total number of non-leaf pages required.
  */
@@ -462,6 +465,9 @@ block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
 
 /**
  * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
+ * @buffer: A buffer to store the encoding.
+ * @offset: The offset in the buffer at which to encode.
+ * @state: The recovery journal state to encode.
  *
  * Return: VDO_SUCCESS or an error code.
  */
@@ -484,6 +490,7 @@ static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
 /**
  * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
  * @buffer: The buffer containing the saved state.
+ * @offset: The offset to start decoding from.
  * @state: A pointer to a recovery journal state to hold the result of a successful decode.
  *
  * Return: VDO_SUCCESS or an error code.
@@ -544,6 +551,9 @@ const char *vdo_get_journal_operation_name(enum journal_operation operation)
 
 /**
  * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
+ * @buffer: A buffer to store the encoding.
+ * @offset: The offset in the buffer at which to encode.
+ * @state: The slab depot state to encode.
  */
 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
 					struct slab_depot_state_2_0 state)
@@ -570,6 +580,9 @@ static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
 
 /**
  * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
+ * @buffer: The buffer being decoded.
+ * @offset: The offset to start decoding from.
+ * @state: A pointer to a slab depot state to hold the decoded result.
  *
  * Return: VDO_SUCCESS or an error code.
  */
@@ -1156,6 +1169,9 @@ static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_componen
 
 /**
  * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
+ * @buffer: The buffer being decoded.
+ * @offset: The offset to start decoding from.
+ * @component: The vdo component structure to decode into.
  *
  * Return: VDO_SUCCESS or an error.
  */
@@ -1290,7 +1306,7 @@ void vdo_destroy_component_states(struct vdo_component_states *states)
  *                       understand.
  * @buffer: The buffer being decoded.
  * @offset: The offset to start decoding from.
- * @geometry: The vdo geometry
+ * @geometry: The vdo geometry.
  * @states: An object to hold the successfully decoded state.
  *
  * Return: VDO_SUCCESS or an error.
@@ -1329,7 +1345,7 @@ static int __must_check decode_components(u8 *buffer, size_t *offset,
 /**
  * vdo_decode_component_states() - Decode the payload of a super block.
  * @buffer: The buffer containing the encoded super block contents.
- * @geometry: The vdo geometry
+ * @geometry: The vdo geometry.
  * @states: A pointer to hold the decoded states.
  *
  * Return: VDO_SUCCESS or an error.
@@ -1383,6 +1399,9 @@ int vdo_validate_component_states(struct vdo_component_states *states,
 
 /**
  * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
+ * @buffer: A buffer to store the encoding.
+ * @offset: The offset into the buffer to start the encoding.
+ * @states: The component states to encode.
  */
 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
 					const struct vdo_component_states *states)
@@ -1402,6 +1421,8 @@ static void vdo_encode_component_states(u8 *buffer, size_t *offset,
 
 /**
  * vdo_encode_super_block() - Encode a super block into its on-disk representation.
+ * @buffer: A buffer to store the encoding.
+ * @states: The component states to encode.
  */
 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
 {
@@ -1426,6 +1447,7 @@ void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
 
 /**
  * vdo_decode_super_block() - Decode a super block from its on-disk representation.
+ * @buffer: The buffer to decode from.
  */
 int vdo_decode_super_block(u8 *buffer)
 {
diff --git a/drivers/md/dm-vdo/flush.c b/drivers/md/dm-vdo/flush.c
index dd4fdee2ca0c..82a259ef1601 100644
--- a/drivers/md/dm-vdo/flush.c
+++ b/drivers/md/dm-vdo/flush.c
@@ -522,11 +522,7 @@ static void vdo_complete_flush(struct vdo_flush *flush)
 	vdo_enqueue_completion(completion, BIO_Q_FLUSH_PRIORITY);
 }
 
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
 static void initiate_drain(struct admin_state *state)
 {
 	check_for_drain_complete(container_of(state, struct flusher, state));
diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c
index 0613c82bbe8e..8a79b33b8b09 100644
--- a/drivers/md/dm-vdo/funnel-workqueue.c
+++ b/drivers/md/dm-vdo/funnel-workqueue.c
@@ -372,6 +372,13 @@ static int make_simple_work_queue(const char *thread_name_prefix, const char *na
 /**
  * vdo_make_work_queue() - Create a work queue; if multiple threads are requested, completions will
  *                         be distributed to them in round-robin fashion.
+ * @thread_name_prefix: A prefix for the thread names to identify them as a vdo thread.
+ * @name: A base name to identify this queue.
+ * @owner: The vdo_thread structure to manage this queue.
+ * @type: The type of queue to create.
+ * @thread_count: The number of actual threads handling this queue.
+ * @thread_privates: An array of private contexts, one for each thread; may be NULL.
+ * @queue_ptr: A pointer to return the new work queue.
  *
  * Each queue is associated with a struct vdo_thread which has a single vdo thread id. Regardless
  * of the actual number of queues and threads allocated here, code outside of the queue
diff --git a/drivers/md/dm-vdo/io-submitter.c b/drivers/md/dm-vdo/io-submitter.c
index 11d47770b54d..e26d75f8366d 100644
--- a/drivers/md/dm-vdo/io-submitter.c
+++ b/drivers/md/dm-vdo/io-submitter.c
@@ -118,6 +118,7 @@ static void send_bio_to_device(struct vio *vio, struct bio *bio)
 /**
  * vdo_submit_vio() - Submits a vio's bio to the underlying block device. May block if the device
  *		      is busy. This callback should be used by vios which did not attempt to merge.
+ * @completion: The vio to submit.
  */
 void vdo_submit_vio(struct vdo_completion *completion)
 {
@@ -133,7 +134,7 @@ void vdo_submit_vio(struct vdo_completion *completion)
  * The list will always contain at least one entry (the bio for the vio on which it is called), but
  * other bios may have been merged with it as well.
  *
- * Return: bio  The head of the bio list to submit.
+ * Return: The head of the bio list to submit.
  */
 static struct bio *get_bio_list(struct vio *vio)
 {
@@ -158,6 +159,7 @@ static struct bio *get_bio_list(struct vio *vio)
 /**
  * submit_data_vio() - Submit a data_vio's bio to the storage below along with
  *		       any bios that have been merged with it.
+ * @completion: The vio to submit.
  *
  * Context: This call may block and so should only be called from a bio thread.
  */
@@ -184,7 +186,7 @@ static void submit_data_vio(struct vdo_completion *completion)
  * There are two types of merging possible, forward and backward, which are distinguished by a flag
  * that uses kernel elevator terminology.
  *
- * Return: the vio to merge to, NULL if no merging is possible.
+ * Return: The vio to merge to, NULL if no merging is possible.
  */
 static struct vio *get_mergeable_locked(struct int_map *map, struct vio *vio,
 					bool back_merge)
@@ -262,7 +264,7 @@ static int merge_to_next_head(struct int_map *bio_map, struct vio *vio,
  *
  * Currently this is only used for data_vios, but is broken out for future use with metadata vios.
  *
- * Return: whether or not the vio was merged.
+ * Return: Whether or not the vio was merged.
  */
 static bool try_bio_map_merge(struct vio *vio)
 {
@@ -306,7 +308,7 @@ static bool try_bio_map_merge(struct vio *vio)
 
 /**
  * vdo_submit_data_vio() - Submit I/O for a data_vio.
- * @data_vio: the data_vio for which to issue I/O.
+ * @data_vio: The data_vio for which to issue I/O.
  *
  * If possible, this I/O will be merged other pending I/Os. Otherwise, the data_vio will be sent to
  * the appropriate bio zone directly.
@@ -321,13 +323,13 @@ void vdo_submit_data_vio(struct data_vio *data_vio)
 
 /**
  * __submit_metadata_vio() - Submit I/O for a metadata vio.
- * @vio: the vio for which to issue I/O
- * @physical: the physical block number to read or write
- * @callback: the bio endio function which will be called after the I/O completes
- * @error_handler: the handler for submission or I/O errors (may be NULL)
- * @operation: the type of I/O to perform
- * @data: the buffer to read or write (may be NULL)
- * @size: the I/O amount in bytes
+ * @vio: The vio for which to issue I/O.
+ * @physical: The physical block number to read or write.
+ * @callback: The bio endio function which will be called after the I/O completes.
+ * @error_handler: The handler for submission or I/O errors; may be NULL.
+ * @operation: The type of I/O to perform.
+ * @data: The buffer to read or write; may be NULL.
+ * @size: The I/O amount in bytes.
  *
  * The vio is enqueued on a vdo bio queue so that bio submission (which may block) does not block
  * other vdo threads.
@@ -441,7 +443,7 @@ int vdo_make_io_submitter(unsigned int thread_count, unsigned int rotation_inter
 
 /**
  * vdo_cleanup_io_submitter() - Tear down the io_submitter fields as needed for a physical layer.
- * @io_submitter: The I/O submitter data to tear down (may be NULL).
+ * @io_submitter: The I/O submitter data to tear down; may be NULL.
  */
 void vdo_cleanup_io_submitter(struct io_submitter *io_submitter)
 {
diff --git a/drivers/md/dm-vdo/logger.c b/drivers/md/dm-vdo/logger.c
index 3f7dc2cb6b98..76a987ccf926 100644
--- a/drivers/md/dm-vdo/logger.c
+++ b/drivers/md/dm-vdo/logger.c
@@ -34,7 +34,7 @@ static const char *get_current_interrupt_type(void)
 	if (in_nmi())
 		return "NMI";
 
-	if (in_irq())
+	if (in_hardirq())
 		return "HI";
 
 	if (in_softirq())
diff --git a/drivers/md/dm-vdo/logical-zone.c b/drivers/md/dm-vdo/logical-zone.c
index 026f031ffc9e..0a27e60a9dfd 100644
--- a/drivers/md/dm-vdo/logical-zone.c
+++ b/drivers/md/dm-vdo/logical-zone.c
@@ -159,21 +159,13 @@ static void check_for_drain_complete(struct logical_zone *zone)
 	vdo_finish_draining(&zone->state);
 }
 
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
 static void initiate_drain(struct admin_state *state)
 {
 	check_for_drain_complete(container_of(state, struct logical_zone, state));
 }
 
-/**
- * drain_logical_zone() - Drain a logical zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
 static void drain_logical_zone(void *context, zone_count_t zone_number,
 			       struct vdo_completion *parent)
 {
@@ -192,11 +184,7 @@ void vdo_drain_logical_zones(struct logical_zones *zones,
 			       parent);
 }
 
-/**
- * resume_logical_zone() - Resume a logical zone.
- *
- * Implements vdo_zone_action_fn.
- */
+/** Implements vdo_zone_action_fn. */
 static void resume_logical_zone(void *context, zone_count_t zone_number,
 				struct vdo_completion *parent)
 {
@@ -356,7 +344,7 @@ struct physical_zone *vdo_get_next_allocation_zone(struct logical_zone *zone)
 
 /**
  * vdo_dump_logical_zone() - Dump information about a logical zone to the log for debugging.
- * @zone: The zone to dump
+ * @zone: The zone to dump.
  *
  * Context: the information is dumped in a thread-unsafe fashion.
  *
diff --git a/drivers/md/dm-vdo/packer.c b/drivers/md/dm-vdo/packer.c
index f70f5edabc10..666be6d557e1 100644
--- a/drivers/md/dm-vdo/packer.c
+++ b/drivers/md/dm-vdo/packer.c
@@ -35,10 +35,10 @@ static const struct version_number COMPRESSED_BLOCK_1_0 = {
 /**
  * vdo_get_compressed_block_fragment() - Get a reference to a compressed fragment from a compressed
  *                                       block.
- * @mapping_state [in] The mapping state for the look up.
- * @compressed_block [in] The compressed block that was read from disk.
- * @fragment_offset [out] The offset of the fragment within a compressed block.
- * @fragment_size [out] The size of the fragment.
+ * @mapping_state: The mapping state describing the fragment.
+ * @block: The compressed block that was read from disk.
+ * @fragment_offset: The offset of the fragment within the compressed block.
+ * @fragment_size: The size of the fragment.
  *
  * Return: If a valid compressed fragment is found, VDO_SUCCESS; otherwise, VDO_INVALID_FRAGMENT if
  *         the fragment is invalid.
@@ -382,6 +382,7 @@ static void initialize_compressed_block(struct compressed_block *block, u16 size
  * @compression: The agent's compression_state to pack in to.
  * @data_vio: The data_vio to pack.
  * @offset: The offset into the compressed block at which to pack the fragment.
+ * @slot: The slot number in the compressed block.
  * @block: The compressed block which will be written out when batch is fully packed.
  *
  * Return: The new amount of space used.
@@ -705,11 +706,7 @@ void vdo_increment_packer_flush_generation(struct packer *packer)
 	vdo_flush_packer(packer);
 }
 
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
 static void initiate_drain(struct admin_state *state)
 {
 	struct packer *packer = container_of(state, struct packer, state);
diff --git a/drivers/md/dm-vdo/physical-zone.c b/drivers/md/dm-vdo/physical-zone.c
index a43b5c45fab7..686eb7d714e6 100644
--- a/drivers/md/dm-vdo/physical-zone.c
+++ b/drivers/md/dm-vdo/physical-zone.c
@@ -60,7 +60,7 @@ static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type
  * vdo_is_pbn_read_lock() - Check whether a pbn_lock is a read lock.
  * @lock: The lock to check.
  *
- * Return: true if the lock is a read lock.
+ * Return: True if the lock is a read lock.
  */
 bool vdo_is_pbn_read_lock(const struct pbn_lock *lock)
 {
@@ -75,6 +75,7 @@ static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type t
 /**
  * vdo_downgrade_pbn_write_lock() - Downgrade a PBN write lock to a PBN read lock.
  * @lock: The PBN write lock to downgrade.
+ * @compressed_write: True if the written block was a compressed block.
  *
  * The lock holder count is cleared and the caller is responsible for setting the new count.
  */
@@ -582,7 +583,7 @@ static bool continue_allocating(struct data_vio *data_vio)
  *				  that fails try the next if possible.
  * @data_vio: The data_vio needing an allocation.
  *
- * Return: true if a block was allocated, if not the data_vio will have been dispatched so the
+ * Return: True if a block was allocated, if not the data_vio will have been dispatched so the
  *         caller must not touch it.
  */
 bool vdo_allocate_block_in_zone(struct data_vio *data_vio)
diff --git a/drivers/md/dm-vdo/recovery-journal.c b/drivers/md/dm-vdo/recovery-journal.c
index de58184f538f..9cc0f0ff1664 100644
--- a/drivers/md/dm-vdo/recovery-journal.c
+++ b/drivers/md/dm-vdo/recovery-journal.c
@@ -109,7 +109,7 @@ static atomic_t *get_decrement_counter(struct recovery_journal *journal,
  * @journal: The recovery journal.
  * @lock_number: The lock to check.
  *
- * Return: true if the journal zone is locked.
+ * Return: True if the journal zone is locked.
  */
 static bool is_journal_zone_locked(struct recovery_journal *journal,
 				   block_count_t lock_number)
@@ -217,7 +217,7 @@ static struct recovery_journal_block * __must_check pop_free_list(struct recover
  * Indicates it has any uncommitted entries, which includes both entries not written and entries
  * written but not yet acknowledged.
  *
- * Return: true if the block has any uncommitted entries.
+ * Return: True if the block has any uncommitted entries.
  */
 static inline bool __must_check is_block_dirty(const struct recovery_journal_block *block)
 {
@@ -228,7 +228,7 @@ static inline bool __must_check is_block_dirty(const struct recovery_journal_blo
  * is_block_empty() - Check whether a journal block is empty.
  * @block: The block to check.
  *
- * Return: true if the block has no entries.
+ * Return: True if the block has no entries.
  */
 static inline bool __must_check is_block_empty(const struct recovery_journal_block *block)
 {
@@ -239,7 +239,7 @@ static inline bool __must_check is_block_empty(const struct recovery_journal_blo
  * is_block_full() - Check whether a journal block is full.
  * @block: The block to check.
  *
- * Return: true if the block is full.
+ * Return: True if the block is full.
  */
 static inline bool __must_check is_block_full(const struct recovery_journal_block *block)
 {
@@ -260,6 +260,8 @@ static void assert_on_journal_thread(struct recovery_journal *journal,
 
 /**
  * continue_waiter() - Release a data_vio from the journal.
+ * @waiter: The data_vio waiting on journal activity.
+ * @context: The result of the journal operation.
  *
  * Invoked whenever a data_vio is to be released from the journal, either because its entry was
  * committed to disk, or because there was an error. Implements waiter_callback_fn.
@@ -273,7 +275,7 @@ static void continue_waiter(struct vdo_waiter *waiter, void *context)
  * has_block_waiters() - Check whether the journal has any waiters on any blocks.
  * @journal: The journal in question.
  *
- * Return: true if any block has a waiter.
+ * Return: True if any block has a waiter.
  */
 static inline bool has_block_waiters(struct recovery_journal *journal)
 {
@@ -296,7 +298,7 @@ static void notify_commit_waiters(struct recovery_journal *journal);
  * suspend_lock_counter() - Prevent the lock counter from notifying.
  * @counter: The counter.
  *
- * Return: true if the lock counter was not notifying and hence the suspend was efficacious.
+ * Return: True if the lock counter was not notifying and hence the suspend was efficacious.
  */
 static bool suspend_lock_counter(struct lock_counter *counter)
 {
@@ -416,7 +418,7 @@ sequence_number_t vdo_get_recovery_journal_current_sequence_number(struct recove
  *
  * The head is the lowest sequence number of the block map head and the slab journal head.
  *
- * Return: the head of the journal.
+ * Return: The head of the journal.
  */
 static inline sequence_number_t get_recovery_journal_head(const struct recovery_journal *journal)
 {
@@ -535,7 +537,7 @@ static void initialize_journal_state(struct recovery_journal *journal)
  * vdo_get_recovery_journal_length() - Get the number of usable recovery journal blocks.
  * @journal_size: The size of the recovery journal in blocks.
  *
- * Return: the number of recovery journal blocks usable for entries.
+ * Return: The number of recovery journal blocks usable for entries.
  */
 block_count_t vdo_get_recovery_journal_length(block_count_t journal_size)
 {
@@ -1078,6 +1080,8 @@ static void update_usages(struct recovery_journal *journal, struct data_vio *dat
 
 /**
  * assign_entry() - Assign an entry waiter to the active block.
+ * @waiter: The data_vio.
+ * @context: The recovery journal block.
  *
  * Implements waiter_callback_fn.
  */
@@ -1165,6 +1169,8 @@ static void recycle_journal_block(struct recovery_journal_block *block)
 /**
  * continue_committed_waiter() - invoked whenever a VIO is to be released from the journal because
  *                               its entry was committed to disk.
+ * @waiter: The data_vio waiting on a journal write.
+ * @context: A pointer to the recovery journal.
  *
  * Implements waiter_callback_fn.
  */
@@ -1362,6 +1368,8 @@ static void add_queued_recovery_entries(struct recovery_journal_block *block)
 
 /**
  * write_block() - Issue a block for writing.
+ * @waiter: The recovery journal block to write.
+ * @context: Not used.
  *
  * Implements waiter_callback_fn.
  */
@@ -1611,11 +1619,7 @@ void vdo_release_journal_entry_lock(struct recovery_journal *journal,
 	smp_mb__after_atomic();
 }
 
-/**
- * initiate_drain() - Initiate a drain.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
 static void initiate_drain(struct admin_state *state)
 {
 	check_for_drain_complete(container_of(state, struct recovery_journal, state));
diff --git a/drivers/md/dm-vdo/slab-depot.c b/drivers/md/dm-vdo/slab-depot.c
index f3d80ff7bef5..034ecaa51f48 100644
--- a/drivers/md/dm-vdo/slab-depot.c
+++ b/drivers/md/dm-vdo/slab-depot.c
@@ -40,7 +40,7 @@ static const bool NORMAL_OPERATION = true;
 
 /**
  * get_lock() - Get the lock object for a slab journal block by sequence number.
- * @journal: vdo_slab journal to retrieve from.
+ * @journal: The vdo_slab journal to retrieve from.
  * @sequence_number: Sequence number of the block.
  *
  * Return: The lock object for the given sequence number.
@@ -110,7 +110,7 @@ static void initialize_journal_state(struct slab_journal *journal)
  * block_is_full() - Check whether a journal block is full.
  * @journal: The slab journal for the block.
  *
- * Return: true if the tail block is full.
+ * Return: True if the tail block is full.
  */
 static bool __must_check block_is_full(struct slab_journal *journal)
 {
@@ -127,10 +127,11 @@ static void release_journal_locks(struct vdo_waiter *waiter, void *context);
 
 /**
  * is_slab_journal_blank() - Check whether a slab's journal is blank.
+ * @slab: The slab to check.
  *
  * A slab journal is blank if it has never had any entries recorded in it.
  *
- * Return: true if the slab's journal has never been modified.
+ * Return: True if the slab's journal has never been modified.
  */
 static bool is_slab_journal_blank(const struct vdo_slab *slab)
 {
@@ -227,6 +228,7 @@ static u8 __must_check compute_fullness_hint(struct slab_depot *depot,
 
 /**
  * check_summary_drain_complete() - Check whether an allocators summary has finished draining.
+ * @allocator: The allocator to check.
  */
 static void check_summary_drain_complete(struct block_allocator *allocator)
 {
@@ -349,7 +351,7 @@ static void launch_write(struct slab_summary_block *block)
 
 /**
  * update_slab_summary_entry() - Update the entry for a slab.
- * @slab: The slab whose entry is to be updated
+ * @slab: The slab whose entry is to be updated.
  * @waiter: The waiter that is updating the summary.
  * @tail_block_offset: The offset of the slab journal's tail block.
  * @load_ref_counts: Whether the reference counts must be loaded from disk on the vdo load.
@@ -654,6 +656,7 @@ static void update_tail_block_location(struct slab_journal *journal)
 
 /**
  * reopen_slab_journal() - Reopen a slab's journal by emptying it and then adding pending entries.
+ * @slab: The slab to reopen.
  */
 static void reopen_slab_journal(struct vdo_slab *slab)
 {
@@ -839,8 +842,6 @@ static void commit_tail(struct slab_journal *journal)
  * @sbn: The slab block number of the entry to encode.
  * @operation: The type of the entry.
  * @increment: True if this is an increment.
- *
- * Exposed for unit tests.
  */
 static void encode_slab_journal_entry(struct slab_journal_block_header *tail_header,
 				      slab_journal_payload *payload,
@@ -951,7 +952,7 @@ static inline block_count_t journal_length(const struct slab_journal *journal)
  * @parent: The completion to notify when there is space to add the entry if the entry could not be
  *          added immediately.
  *
- * Return: true if the entry was added immediately.
+ * Return: True if the entry was added immediately.
  */
 bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t pbn,
 				  enum journal_operation operation, bool increment,
@@ -1003,7 +1004,7 @@ bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t
  * requires_reaping() - Check whether the journal must be reaped before adding new entries.
  * @journal: The journal to check.
  *
- * Return: true if the journal must be reaped.
+ * Return: True if the journal must be reaped.
  */
 static bool requires_reaping(const struct slab_journal *journal)
 {
@@ -1275,6 +1276,8 @@ static void dirty_block(struct reference_block *block)
 
 /**
  * get_reference_block() - Get the reference block that covers the given block index.
+ * @slab: The slab containing the references.
+ * @index: The index of the physical block.
  */
 static struct reference_block * __must_check get_reference_block(struct vdo_slab *slab,
 								 slab_block_number index)
@@ -1379,7 +1382,8 @@ static void prioritize_slab(struct vdo_slab *slab)
 
 /**
  * adjust_free_block_count() - Adjust the free block count and (if needed) reprioritize the slab.
- * @incremented: true if the free block count went up.
+ * @slab: The slab.
+ * @incremented: True if the free block count went up.
  */
 static void adjust_free_block_count(struct vdo_slab *slab, bool incremented)
 {
@@ -1885,6 +1889,7 @@ static void add_entries(struct slab_journal *journal)
 /**
  * reset_search_cursor() - Reset the free block search back to the first reference counter in the
  *                         first reference block of a slab.
+ * @slab: The slab.
  */
 static void reset_search_cursor(struct vdo_slab *slab)
 {
@@ -1892,17 +1897,17 @@ static void reset_search_cursor(struct vdo_slab *slab)
 
 	cursor->block = cursor->first_block;
 	cursor->index = 0;
-	/* Unit tests have slabs with only one reference block (and it's a runt). */
 	cursor->end_index = min_t(u32, COUNTS_PER_BLOCK, slab->block_count);
 }
 
 /**
  * advance_search_cursor() - Advance the search cursor to the start of the next reference block in
- *                           a slab,
+ *                           a slab.
+ * @slab: The slab.
  *
  * Wraps around to the first reference block if the current block is the last reference block.
  *
- * Return: true unless the cursor was at the last reference block.
+ * Return: True unless the cursor was at the last reference block.
  */
 static bool advance_search_cursor(struct vdo_slab *slab)
 {
@@ -1933,6 +1938,9 @@ static bool advance_search_cursor(struct vdo_slab *slab)
 
 /**
  * vdo_adjust_reference_count_for_rebuild() - Adjust the reference count of a block during rebuild.
+ * @depot: The slab depot.
+ * @pbn: The physical block number to adjust.
+ * @operation: The type opf operation.
  *
  * Return: VDO_SUCCESS or an error.
  */
@@ -2038,9 +2046,7 @@ static inline slab_block_number find_zero_byte_in_word(const u8 *word_ptr,
  * @slab: The slab counters to scan.
  * @index_ptr: A pointer to hold the array index of the free block.
  *
- * Exposed for unit testing.
- *
- * Return: true if a free block was found in the specified range.
+ * Return: True if a free block was found in the specified range.
  */
 static bool find_free_block(const struct vdo_slab *slab, slab_block_number *index_ptr)
 {
@@ -2097,7 +2103,7 @@ static bool find_free_block(const struct vdo_slab *slab, slab_block_number *inde
  * @slab: The slab to search.
  * @free_index_ptr: A pointer to receive the array index of the zero reference count.
  *
- * Return: true if an unreferenced counter was found.
+ * Return: True if an unreferenced counter was found.
  */
 static bool search_current_reference_block(const struct vdo_slab *slab,
 					   slab_block_number *free_index_ptr)
@@ -2116,7 +2122,7 @@ static bool search_current_reference_block(const struct vdo_slab *slab,
  * counter index saved in the search cursor and searching up to the end of the last reference
  * block. The search does not wrap.
  *
- * Return: true if an unreferenced counter was found.
+ * Return: True if an unreferenced counter was found.
  */
 static bool search_reference_blocks(struct vdo_slab *slab,
 				    slab_block_number *free_index_ptr)
@@ -2136,6 +2142,8 @@ static bool search_reference_blocks(struct vdo_slab *slab,
 
 /**
  * make_provisional_reference() - Do the bookkeeping for making a provisional reference.
+ * @slab: The slab.
+ * @block_number: The index for the physical block to reference.
  */
 static void make_provisional_reference(struct vdo_slab *slab,
 				       slab_block_number block_number)
@@ -2155,6 +2163,7 @@ static void make_provisional_reference(struct vdo_slab *slab,
 
 /**
  * dirty_all_reference_blocks() - Mark all reference count blocks in a slab as dirty.
+ * @slab: The slab.
  */
 static void dirty_all_reference_blocks(struct vdo_slab *slab)
 {
@@ -2173,10 +2182,10 @@ static inline bool journal_points_equal(struct journal_point first,
 
 /**
  * match_bytes() - Check an 8-byte word for bytes matching the value specified
- * @input: A word to examine the bytes of
- * @match: The byte value sought
+ * @input: A word to examine the bytes of.
+ * @match: The byte value sought.
  *
- * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise
+ * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise.
  */
 static inline u64 match_bytes(u64 input, u8 match)
 {
@@ -2191,12 +2200,12 @@ static inline u64 match_bytes(u64 input, u8 match)
 
 /**
  * count_valid_references() - Process a newly loaded refcount array
- * @counters: the array of counters from a metadata block
+ * @counters: The array of counters from a metadata block.
  *
- * Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't
- * cleaned up at shutdown, changing them internally to "empty".
+ * Scan an 8-byte-aligned array of counters, fixing up any provisional values that
+ * weren't cleaned up at shutdown, changing them internally to zero.
  *
- * Return: the number of blocks that are referenced (counters not "empty")
+ * Return: The number of blocks with a non-zero reference count.
  */
 static unsigned int count_valid_references(vdo_refcount_t *counters)
 {
@@ -2351,6 +2360,7 @@ static void load_reference_block_group(struct vdo_waiter *waiter, void *context)
 /**
  * load_reference_blocks() - Load a slab's reference blocks from the underlying storage into a
  *                           pre-allocated reference counter.
+ * @slab: The slab.
  */
 static void load_reference_blocks(struct vdo_slab *slab)
 {
@@ -2375,6 +2385,7 @@ static void load_reference_blocks(struct vdo_slab *slab)
 
 /**
  * drain_slab() - Drain all reference count I/O.
+ * @slab: The slab.
  *
  * Depending upon the type of drain being performed (as recorded in the ref_count's vdo_slab), the
  * reference blocks may be loaded from disk or dirty reference blocks may be written out.
@@ -2564,6 +2575,7 @@ static void read_slab_journal_tail(struct vdo_waiter *waiter, void *context)
 
 /**
  * load_slab_journal() - Load a slab's journal by reading the journal's tail.
+ * @slab: The slab.
  */
 static void load_slab_journal(struct vdo_slab *slab)
 {
@@ -2663,11 +2675,7 @@ static void queue_slab(struct vdo_slab *slab)
 	prioritize_slab(slab);
 }
 
-/**
- * initiate_slab_action() - Initiate a slab action.
- *
- * Implements vdo_admin_initiator_fn.
- */
+/** Implements vdo_admin_initiator_fn. */
 static void initiate_slab_action(struct admin_state *state)
 {
 	struct vdo_slab *slab = container_of(state, struct vdo_slab, state);
@@ -2720,7 +2728,7 @@ static struct vdo_slab *get_next_slab(struct slab_scrubber *scrubber)
  * has_slabs_to_scrub() - Check whether a scrubber has slabs to scrub.
  * @scrubber: The scrubber to check.
  *
- * Return: true if the scrubber has slabs to scrub.
+ * Return: True if the scrubber has slabs to scrub.
  */
 static inline bool __must_check has_slabs_to_scrub(struct slab_scrubber *scrubber)
 {
@@ -2741,6 +2749,7 @@ static void uninitialize_scrubber_vio(struct slab_scrubber *scrubber)
  * finish_scrubbing() - Stop scrubbing, either because there are no more slabs to scrub or because
  *                      there's been an error.
  * @scrubber: The scrubber.
+ * @result: The result of the scrubbing operation.
  */
 static void finish_scrubbing(struct slab_scrubber *scrubber, int result)
 {
@@ -3132,11 +3141,13 @@ static struct vdo_slab *next_slab(struct slab_iterator *iterator)
 
 /**
  * abort_waiter() - Abort vios waiting to make journal entries when read-only.
+ * @waiter: A waiting data_vio.
+ * @context: Not used.
  *
  * This callback is invoked on all vios waiting to make slab journal entries after the VDO has gone
  * into read-only mode. Implements waiter_callback_fn.
  */
-static void abort_waiter(struct vdo_waiter *waiter, void *context __always_unused)
+static void abort_waiter(struct vdo_waiter *waiter, void __always_unused *context)
 {
 	struct reference_updater *updater =
 		container_of(waiter, struct reference_updater, waiter);
@@ -3536,7 +3547,7 @@ static void initiate_load(struct admin_state *state)
 /**
  * vdo_notify_slab_journals_are_recovered() - Inform a block allocator that its slab journals have
  *                                            been recovered from the recovery journal.
- * @completion The allocator completion
+ * @completion: The allocator completion.
  */
 void vdo_notify_slab_journals_are_recovered(struct vdo_completion *completion)
 {
@@ -3775,7 +3786,7 @@ static int initialize_slab_journal(struct vdo_slab *slab)
  *               in the slab.
  * @allocator: The block allocator to which the slab belongs.
  * @slab_number: The slab number of the slab.
- * @is_new: true if this slab is being allocated as part of a resize.
+ * @is_new: True if this slab is being allocated as part of a resize.
  * @slab_ptr: A pointer to receive the new slab.
  *
  * Return: VDO_SUCCESS or an error code.
@@ -3894,11 +3905,7 @@ void vdo_abandon_new_slabs(struct slab_depot *depot)
 	vdo_free(vdo_forget(depot->new_slabs));
 }
 
-/**
- * get_allocator_thread_id() - Get the ID of the thread on which a given allocator operates.
- *
- * Implements vdo_zone_thread_getter_fn.
- */
+/** Implements vdo_zone_thread_getter_fn. */
 static thread_id_t get_allocator_thread_id(void *context, zone_count_t zone_number)
 {
 	return ((struct slab_depot *) context)->allocators[zone_number].thread_id;
@@ -3911,7 +3918,7 @@ static thread_id_t get_allocator_thread_id(void *context, zone_count_t zone_numb
  * @recovery_lock: The sequence number of the recovery journal block whose locks should be
  *                 released.
  *
- * Return: true if the journal does hold a lock on the specified block (which it will release).
+ * Return: True if the journal released a lock on the specified block.
  */
 static bool __must_check release_recovery_journal_lock(struct slab_journal *journal,
 						       sequence_number_t recovery_lock)
@@ -3955,6 +3962,8 @@ static void release_tail_block_locks(void *context, zone_count_t zone_number,
 
 /**
  * prepare_for_tail_block_commit() - Prepare to commit oldest tail blocks.
+ * @context: The slab depot.
+ * @parent: The parent operation.
  *
  * Implements vdo_action_preamble_fn.
  */
@@ -3968,6 +3977,7 @@ static void prepare_for_tail_block_commit(void *context, struct vdo_completion *
 
 /**
  * schedule_tail_block_commit() - Schedule a tail block commit if necessary.
+ * @context: The slab depot.
  *
  * This method should not be called directly. Rather, call vdo_schedule_default_action() on the
  * depot's action manager.
@@ -4361,6 +4371,7 @@ struct slab_depot_state_2_0 vdo_record_slab_depot(const struct slab_depot *depot
 
 /**
  * vdo_allocate_reference_counters() - Allocate the reference counters for all slabs in the depot.
+ * @depot: The slab depot.
  *
  * Context: This method may be called only before entering normal operation from the load thread.
  *
@@ -4615,7 +4626,9 @@ static void load_summary_endio(struct bio *bio)
 }
 
 /**
- * load_slab_summary() - The preamble of a load operation.
+ * load_slab_summary() - Load the slab summary before the slab data.
+ * @context: The slab depot.
+ * @parent: The load operation.
  *
  * Implements vdo_action_preamble_fn.
  */
@@ -4731,7 +4744,7 @@ void vdo_update_slab_depot_size(struct slab_depot *depot)
  * vdo_prepare_to_grow_slab_depot() - Allocate new memory needed for a resize of a slab depot to
  *                                    the given size.
  * @depot: The depot to prepare to resize.
- * @partition: The new depot partition
+ * @partition: The new depot partition.
  *
  * Return: VDO_SUCCESS or an error.
  */
@@ -4781,6 +4794,7 @@ int vdo_prepare_to_grow_slab_depot(struct slab_depot *depot,
 /**
  * finish_registration() - Finish registering new slabs now that all of the allocators have
  *                         received their new slabs.
+ * @context: The slab depot.
  *
  * Implements vdo_action_conclusion_fn.
  */
diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c
index 80b608674022..09fd0628d18c 100644
--- a/drivers/md/dm-vdo/vdo.c
+++ b/drivers/md/dm-vdo/vdo.c
@@ -181,6 +181,8 @@ static void assign_thread_ids(struct thread_config *config,
 
 /**
  * initialize_thread_config() - Initialize the thread mapping
+ * @counts: The number and types of threads to create.
+ * @config: The thread_config to initialize.
  *
  * If the logical, physical, and hash zone counts are all 0, a single thread will be shared by all
  * three plus the packer and recovery journal. Otherwise, there must be at least one of each type,
@@ -884,6 +886,7 @@ const struct admin_state_code *vdo_get_admin_state(const struct vdo *vdo)
 
 /**
  * record_vdo() - Record the state of the VDO for encoding in the super block.
+ * @vdo: The vdo.
  */
 static void record_vdo(struct vdo *vdo)
 {
@@ -1277,7 +1280,7 @@ void vdo_enter_read_only_mode(struct vdo *vdo, int error_code)
  * vdo_is_read_only() - Check whether the VDO is read-only.
  * @vdo: The vdo.
  *
- * Return: true if the vdo is read-only.
+ * Return: True if the vdo is read-only.
  *
  * This method may be called from any thread, as opposed to examining the VDO's state field which
  * is only safe to check from the admin thread.
@@ -1291,7 +1294,7 @@ bool vdo_is_read_only(struct vdo *vdo)
  * vdo_in_read_only_mode() - Check whether a vdo is in read-only mode.
  * @vdo: The vdo to query.
  *
- * Return: true if the vdo is in read-only mode.
+ * Return: True if the vdo is in read-only mode.
  */
 bool vdo_in_read_only_mode(const struct vdo *vdo)
 {
@@ -1302,7 +1305,7 @@ bool vdo_in_read_only_mode(const struct vdo *vdo)
  * vdo_in_recovery_mode() - Check whether the vdo is in recovery mode.
  * @vdo: The vdo to query.
  *
- * Return: true if the vdo is in recovery mode.
+ * Return: True if the vdo is in recovery mode.
  */
 bool vdo_in_recovery_mode(const struct vdo *vdo)
 {
diff --git a/drivers/md/dm-vdo/vdo.h b/drivers/md/dm-vdo/vdo.h
index 483ae873e002..1aaba73997b7 100644
--- a/drivers/md/dm-vdo/vdo.h
+++ b/drivers/md/dm-vdo/vdo.h
@@ -279,8 +279,10 @@ static inline bool vdo_uses_bio_ack_queue(struct vdo *vdo)
 
 /**
  * typedef vdo_filter_fn - Method type for vdo matching methods.
+ * @vdo: The vdo to match.
+ * @context: A parameter for the filter to use.
  *
- * A filter function returns false if the vdo doesn't match.
+ * Return: True if the vdo matches the filter criteria, false if it doesn't.
  */
 typedef bool (*vdo_filter_fn)(struct vdo *vdo, const void *context);
 
diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c
index 8fc22fb14196..5ffc867d9c5e 100644
--- a/drivers/md/dm-vdo/vio.c
+++ b/drivers/md/dm-vdo/vio.c
@@ -398,8 +398,9 @@ void free_vio_pool(struct vio_pool *pool)
 
 /**
  * is_vio_pool_busy() - Check whether an vio pool has outstanding entries.
+ * @pool: The vio pool.
  *
- * Return: true if the pool is busy.
+ * Return: True if the pool is busy.
  */
 bool is_vio_pool_busy(struct vio_pool *pool)
 {
diff --git a/drivers/md/dm-vdo/vio.h b/drivers/md/dm-vdo/vio.h
index 4bfcb21901f1..7a8a6819aec4 100644
--- a/drivers/md/dm-vdo/vio.h
+++ b/drivers/md/dm-vdo/vio.h
@@ -156,8 +156,7 @@ static inline enum vdo_completion_priority get_metadata_priority(struct vio *vio
 /**
  * continue_vio() - Enqueue a vio to run its next callback.
  * @vio: The vio to continue.
- *
- * Return: The result of the current operation.
+ * @result: The result of the current operation.
  */
 static inline void continue_vio(struct vio *vio, int result)
 {
@@ -172,6 +171,9 @@ void vdo_count_completed_bios(struct bio *bio);
 
 /**
  * continue_vio_after_io() - Continue a vio now that its I/O has returned.
+ * @vio: The vio to continue.
+ * @callback: The next operation for this vio.
+ * @thread: Which thread to run the next operation on.
  */
 static inline void continue_vio_after_io(struct vio *vio, vdo_action_fn callback,
 					 thread_id_t thread)
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index d382a390d39a..c79de517afee 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -177,9 +177,11 @@ error:
 	if (r < 0 && neras)
 		DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
 			    v->data_dev->name, (unsigned long long)rsb, r);
-	else if (r > 0)
+	else if (r > 0) {
 		DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
 			     v->data_dev->name, (unsigned long long)rsb, r);
+		atomic64_inc(&v->fec->corrected);
+	}
 
 	return r;
 }
@@ -188,14 +190,13 @@ error:
  * Locate data block erasures using verity hashes.
  */
 static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
-			  u8 *want_digest, u8 *data)
+			  const u8 *want_digest, const u8 *data)
 {
 	if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits,
-				 verity_io_real_digest(v, io))))
+				 io->tmp_digest)))
 		return 0;
 
-	return memcmp(verity_io_real_digest(v, io), want_digest,
-		      v->digest_size) != 0;
+	return memcmp(io->tmp_digest, want_digest, v->digest_size) != 0;
 }
 
 /*
@@ -320,11 +321,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
 		if (fio->bufs[n])
 			continue;
 
-		fio->bufs[n] = mempool_alloc(&v->fec->prealloc_pool, GFP_NOWAIT);
-		if (unlikely(!fio->bufs[n])) {
-			DMERR("failed to allocate FEC buffer");
-			return -ENOMEM;
-		}
+		fio->bufs[n] = mempool_alloc(&v->fec->prealloc_pool, GFP_NOIO);
 	}
 
 	/* try to allocate the maximum number of buffers */
@@ -332,7 +329,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
 		if (fio->bufs[n])
 			continue;
 
-		fio->bufs[n] = mempool_alloc(&v->fec->extra_pool, GFP_NOWAIT);
+		fio->bufs[n] = kmem_cache_alloc(v->fec->cache, GFP_NOWAIT);
 		/* we can manage with even one buffer if necessary */
 		if (unlikely(!fio->bufs[n]))
 			break;
@@ -366,7 +363,7 @@ static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
  */
 static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
 			  struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
-			  bool use_erasures)
+			  const u8 *want_digest, bool use_erasures)
 {
 	int r, neras = 0;
 	unsigned int pos;
@@ -392,12 +389,11 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
 
 	/* Always re-validate the corrected block against the expected hash */
 	r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits,
-			verity_io_real_digest(v, io));
+			io->tmp_digest);
 	if (unlikely(r < 0))
 		return r;
 
-	if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
-		   v->digest_size)) {
+	if (memcmp(io->tmp_digest, want_digest, v->digest_size)) {
 		DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
 			    v->data_dev->name, (unsigned long long)rsb, neras);
 		return -EILSEQ;
@@ -408,7 +404,8 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
 
 /* Correct errors in a block. Copies corrected block to dest. */
 int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
-		      enum verity_block_type type, sector_t block, u8 *dest)
+		      enum verity_block_type type, const u8 *want_digest,
+		      sector_t block, u8 *dest)
 {
 	int r;
 	struct dm_verity_fec_io *fio = fec_io(io);
@@ -417,10 +414,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 	if (!verity_fec_is_enabled(v))
 		return -EOPNOTSUPP;
 
-	if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
-		DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
+	if (fio->level)
 		return -EIO;
-	}
 
 	fio->level++;
 
@@ -451,9 +446,9 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 	 * them first. Do a second attempt with erasures if the corruption is
 	 * bad enough.
 	 */
-	r = fec_decode_rsb(v, io, fio, rsb, offset, false);
+	r = fec_decode_rsb(v, io, fio, rsb, offset, want_digest, false);
 	if (r < 0) {
-		r = fec_decode_rsb(v, io, fio, rsb, offset, true);
+		r = fec_decode_rsb(v, io, fio, rsb, offset, want_digest, true);
 		if (r < 0)
 			goto done;
 	}
@@ -483,7 +478,8 @@ void verity_fec_finish_io(struct dm_verity_io *io)
 		mempool_free(fio->bufs[n], &f->prealloc_pool);
 
 	fec_for_each_extra_buffer(fio, n)
-		mempool_free(fio->bufs[n], &f->extra_pool);
+		if (fio->bufs[n])
+			kmem_cache_free(f->cache, fio->bufs[n]);
 
 	mempool_free(fio->output, &f->output_pool);
 }
@@ -535,7 +531,6 @@ void verity_fec_dtr(struct dm_verity *v)
 
 	mempool_exit(&f->rs_pool);
 	mempool_exit(&f->prealloc_pool);
-	mempool_exit(&f->extra_pool);
 	mempool_exit(&f->output_pool);
 	kmem_cache_destroy(f->cache);
 
@@ -788,12 +783,6 @@ int verity_fec_ctr(struct dm_verity *v)
 		return ret;
 	}
 
-	ret = mempool_init_slab_pool(&f->extra_pool, 0, f->cache);
-	if (ret) {
-		ti->error = "Cannot allocate FEC buffer extra pool";
-		return ret;
-	}
-
 	/* Preallocate an output buffer for each thread */
 	ret = mempool_init_kmalloc_pool(&f->output_pool, num_online_cpus(),
 					1 << v->data_dev_block_bits);
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index 09123a612953..5fd267873812 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -23,9 +23,6 @@
 #define DM_VERITY_FEC_BUF_MAX \
 	(1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
 
-/* maximum recursion level for verity_fec_decode */
-#define DM_VERITY_FEC_MAX_RECURSION	4
-
 #define DM_VERITY_OPT_FEC_DEV		"use_fec_from_device"
 #define DM_VERITY_OPT_FEC_BLOCKS	"fec_blocks"
 #define DM_VERITY_OPT_FEC_START		"fec_start"
@@ -45,9 +42,9 @@ struct dm_verity_fec {
 	unsigned char rsn;	/* N of RS(M, N) */
 	mempool_t rs_pool;	/* mempool for fio->rs */
 	mempool_t prealloc_pool;	/* mempool for preallocated buffers */
-	mempool_t extra_pool;	/* mempool for extra buffers */
 	mempool_t output_pool;	/* mempool for output */
 	struct kmem_cache *cache;	/* cache for buffers */
+	atomic64_t corrected; /* corrected errors */
 };
 
 /* per-bio data */
@@ -68,8 +65,8 @@ struct dm_verity_fec_io {
 extern bool verity_fec_is_enabled(struct dm_verity *v);
 
 extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
-			     enum verity_block_type type, sector_t block,
-			     u8 *dest);
+			     enum verity_block_type type, const u8 *want_digest,
+			     sector_t block, u8 *dest);
 
 extern unsigned int verity_fec_status_table(struct dm_verity *v, unsigned int sz,
 					char *result, unsigned int maxlen);
@@ -99,6 +96,7 @@ static inline bool verity_fec_is_enabled(struct dm_verity *v)
 static inline int verity_fec_decode(struct dm_verity *v,
 				    struct dm_verity_io *io,
 				    enum verity_block_type type,
+				    const u8 *want_digest,
 				    sector_t block, u8 *dest)
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 66a00a8ccb39..5c17472d7896 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -117,11 +117,25 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
 int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
 		const u8 *data, size_t len, u8 *digest)
 {
-	struct shash_desc *desc = &io->hash_desc;
+	struct shash_desc *desc;
 	int r;
 
+	if (likely(v->use_sha256_lib)) {
+		struct sha256_ctx *ctx = &io->hash_ctx.sha256;
+
+		/*
+		 * Fast path using SHA-256 library.  This is enabled only for
+		 * verity version 1, where the salt is at the beginning.
+		 */
+		*ctx = *v->initial_hashstate.sha256;
+		sha256_update(ctx, data, len);
+		sha256_final(ctx, digest);
+		return 0;
+	}
+
+	desc = &io->hash_ctx.shash;
 	desc->tfm = v->shash_tfm;
-	if (unlikely(v->initial_hashstate == NULL)) {
+	if (unlikely(v->initial_hashstate.shash == NULL)) {
 		/* Version 0: salt at end */
 		r = crypto_shash_init(desc) ?:
 		    crypto_shash_update(desc, data, len) ?:
@@ -129,7 +143,7 @@ int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
 		    crypto_shash_final(desc, digest);
 	} else {
 		/* Version 1: salt at beginning */
-		r = crypto_shash_import(desc, v->initial_hashstate) ?:
+		r = crypto_shash_import(desc, v->initial_hashstate.shash) ?:
 		    crypto_shash_finup(desc, data, len, digest);
 	}
 	if (unlikely(r))
@@ -215,12 +229,12 @@ out:
  * Verify hash of a metadata block pertaining to the specified data block
  * ("block" argument) at a specified level ("level" argument).
  *
- * On successful return, verity_io_want_digest(v, io) contains the hash value
- * for a lower tree level or for the data block (if we're at the lowest level).
+ * On successful return, want_digest contains the hash value for a lower tree
+ * level or for the data block (if we're at the lowest level).
  *
  * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
  * If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of verity_io_want_digest(v, io).
+ * against current value of want_digest.
  */
 static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 			       sector_t block, int level, bool skip_unverified,
@@ -259,7 +273,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 		if (IS_ERR(data))
 			return r;
 		if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA,
-				      hash_block, data) == 0) {
+				      want_digest, hash_block, data) == 0) {
 			aux = dm_bufio_get_aux_data(buf);
 			aux->hash_verified = 1;
 			goto release_ok;
@@ -279,11 +293,11 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 		}
 
 		r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits,
-				verity_io_real_digest(v, io));
+				io->tmp_digest);
 		if (unlikely(r < 0))
 			goto release_ret_r;
 
-		if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
+		if (likely(memcmp(io->tmp_digest, want_digest,
 				  v->digest_size) == 0))
 			aux->hash_verified = 1;
 		else if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
@@ -294,7 +308,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 			r = -EAGAIN;
 			goto release_ret_r;
 		} else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_METADATA,
-					     hash_block, data) == 0)
+					     want_digest, hash_block, data) == 0)
 			aux->hash_verified = 1;
 		else if (verity_handle_err(v,
 					   DM_VERITY_BLOCK_TYPE_METADATA,
@@ -358,7 +372,8 @@ out:
 }
 
 static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
-				   sector_t cur_block, u8 *dest)
+				   const u8 *want_digest, sector_t cur_block,
+				   u8 *dest)
 {
 	struct page *page;
 	void *buffer;
@@ -382,12 +397,11 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
 		goto free_ret;
 
 	r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits,
-			verity_io_real_digest(v, io));
+			io->tmp_digest);
 	if (unlikely(r))
 		goto free_ret;
 
-	if (memcmp(verity_io_real_digest(v, io),
-		   verity_io_want_digest(v, io), v->digest_size)) {
+	if (memcmp(io->tmp_digest, want_digest, v->digest_size)) {
 		r = -EIO;
 		goto free_ret;
 	}
@@ -402,9 +416,13 @@ free_ret:
 
 static int verity_handle_data_hash_mismatch(struct dm_verity *v,
 					    struct dm_verity_io *io,
-					    struct bio *bio, sector_t blkno,
-					    u8 *data)
+					    struct bio *bio,
+					    struct pending_block *block)
 {
+	const u8 *want_digest = block->want_digest;
+	sector_t blkno = block->blkno;
+	u8 *data = block->data;
+
 	if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
 		/*
 		 * Error handling code (FEC included) cannot be run in the
@@ -412,14 +430,14 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
 		 */
 		return -EAGAIN;
 	}
-	if (verity_recheck(v, io, blkno, data) == 0) {
+	if (verity_recheck(v, io, want_digest, blkno, data) == 0) {
 		if (v->validated_blocks)
 			set_bit(blkno, v->validated_blocks);
 		return 0;
 	}
 #if defined(CONFIG_DM_VERITY_FEC)
-	if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, blkno,
-			      data) == 0)
+	if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, want_digest,
+			      blkno, data) == 0)
 		return 0;
 #endif
 	if (bio->bi_status)
@@ -433,6 +451,58 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
 	return 0;
 }
 
+static void verity_clear_pending_blocks(struct dm_verity_io *io)
+{
+	int i;
+
+	for (i = io->num_pending - 1; i >= 0; i--) {
+		kunmap_local(io->pending_blocks[i].data);
+		io->pending_blocks[i].data = NULL;
+	}
+	io->num_pending = 0;
+}
+
+static int verity_verify_pending_blocks(struct dm_verity *v,
+					struct dm_verity_io *io,
+					struct bio *bio)
+{
+	const unsigned int block_size = 1 << v->data_dev_block_bits;
+	int i, r;
+
+	if (io->num_pending == 2) {
+		/* num_pending == 2 implies that the algorithm is SHA-256 */
+		sha256_finup_2x(v->initial_hashstate.sha256,
+				io->pending_blocks[0].data,
+				io->pending_blocks[1].data, block_size,
+				io->pending_blocks[0].real_digest,
+				io->pending_blocks[1].real_digest);
+	} else {
+		for (i = 0; i < io->num_pending; i++) {
+			r = verity_hash(v, io, io->pending_blocks[i].data,
+					block_size,
+					io->pending_blocks[i].real_digest);
+			if (unlikely(r))
+				return r;
+		}
+	}
+
+	for (i = 0; i < io->num_pending; i++) {
+		struct pending_block *block = &io->pending_blocks[i];
+
+		if (likely(memcmp(block->real_digest, block->want_digest,
+				  v->digest_size) == 0)) {
+			if (v->validated_blocks)
+				set_bit(block->blkno, v->validated_blocks);
+		} else {
+			r = verity_handle_data_hash_mismatch(v, io, bio, block);
+			if (unlikely(r))
+				return r;
+		}
+	}
+	verity_clear_pending_blocks(io);
+	return 0;
+}
+
 /*
  * Verify one "dm_verity_io" structure.
  */
@@ -440,10 +510,14 @@ static int verity_verify_io(struct dm_verity_io *io)
 {
 	struct dm_verity *v = io->v;
 	const unsigned int block_size = 1 << v->data_dev_block_bits;
+	const int max_pending = v->use_sha256_finup_2x ? 2 : 1;
 	struct bvec_iter iter_copy;
 	struct bvec_iter *iter;
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 	unsigned int b;
+	int r;
+
+	io->num_pending = 0;
 
 	if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
 		/*
@@ -457,21 +531,22 @@ static int verity_verify_io(struct dm_verity_io *io)
 
 	for (b = 0; b < io->n_blocks;
 	     b++, bio_advance_iter(bio, iter, block_size)) {
-		int r;
-		sector_t cur_block = io->block + b;
+		sector_t blkno = io->block + b;
+		struct pending_block *block;
 		bool is_zero;
 		struct bio_vec bv;
 		void *data;
 
 		if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
-		    likely(test_bit(cur_block, v->validated_blocks)))
+		    likely(test_bit(blkno, v->validated_blocks)))
 			continue;
 
-		r = verity_hash_for_block(v, io, cur_block,
-					  verity_io_want_digest(v, io),
+		block = &io->pending_blocks[io->num_pending];
+
+		r = verity_hash_for_block(v, io, blkno, block->want_digest,
 					  &is_zero);
 		if (unlikely(r < 0))
-			return r;
+			goto error;
 
 		bv = bio_iter_iovec(bio, *iter);
 		if (unlikely(bv.bv_len < block_size)) {
@@ -482,7 +557,8 @@ static int verity_verify_io(struct dm_verity_io *io)
 			 * data block size to be greater than PAGE_SIZE.
 			 */
 			DMERR_LIMIT("unaligned io (data block spans pages)");
-			return -EIO;
+			r = -EIO;
+			goto error;
 		}
 
 		data = bvec_kmap_local(&bv);
@@ -496,29 +572,26 @@ static int verity_verify_io(struct dm_verity_io *io)
 			kunmap_local(data);
 			continue;
 		}
-
-		r = verity_hash(v, io, data, block_size,
-				verity_io_real_digest(v, io));
-		if (unlikely(r < 0)) {
-			kunmap_local(data);
-			return r;
+		block->data = data;
+		block->blkno = blkno;
+		if (++io->num_pending == max_pending) {
+			r = verity_verify_pending_blocks(v, io, bio);
+			if (unlikely(r))
+				goto error;
 		}
+	}
 
-		if (likely(memcmp(verity_io_real_digest(v, io),
-				  verity_io_want_digest(v, io), v->digest_size) == 0)) {
-			if (v->validated_blocks)
-				set_bit(cur_block, v->validated_blocks);
-			kunmap_local(data);
-			continue;
-		}
-		r = verity_handle_data_hash_mismatch(v, io, bio, cur_block,
-						     data);
-		kunmap_local(data);
+	if (io->num_pending) {
+		r = verity_verify_pending_blocks(v, io, bio);
 		if (unlikely(r))
-			return r;
+			goto error;
 	}
 
 	return 0;
+
+error:
+	verity_clear_pending_blocks(io);
+	return r;
 }
 
 /*
@@ -775,6 +848,10 @@ static void verity_status(struct dm_target *ti, status_type_t type,
 	switch (type) {
 	case STATUSTYPE_INFO:
 		DMEMIT("%c", v->hash_failed ? 'C' : 'V');
+		if (verity_fec_is_enabled(v))
+			DMEMIT(" %lld", atomic64_read(&v->fec->corrected));
+		else
+			DMEMIT(" -");
 		break;
 	case STATUSTYPE_TABLE:
 		DMEMIT("%u %s %s %u %u %llu %llu %s ",
@@ -1004,7 +1081,7 @@ static void verity_dtr(struct dm_target *ti)
 
 	kvfree(v->validated_blocks);
 	kfree(v->salt);
-	kfree(v->initial_hashstate);
+	kfree(v->initial_hashstate.shash);
 	kfree(v->root_digest);
 	kfree(v->zero_digest);
 	verity_free_sig(v);
@@ -1069,8 +1146,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
 	if (!v->zero_digest)
 		return r;
 
-	io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm),
-		     GFP_KERNEL);
+	io = kmalloc(v->ti->per_io_data_size, GFP_KERNEL);
 
 	if (!io)
 		return r; /* verity_dtr will free zero_digest */
@@ -1252,11 +1328,26 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
 	}
 	v->shash_tfm = shash;
 	v->digest_size = crypto_shash_digestsize(shash);
-	DMINFO("%s using \"%s\"", alg_name, crypto_shash_driver_name(shash));
 	if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
 		ti->error = "Digest size too big";
 		return -EINVAL;
 	}
+	if (likely(v->version && strcmp(alg_name, "sha256") == 0)) {
+		/*
+		 * Fast path: use the library API for reduced overhead and
+		 * interleaved hashing support.
+		 */
+		v->use_sha256_lib = true;
+		if (sha256_finup_2x_is_optimized())
+			v->use_sha256_finup_2x = true;
+		ti->per_io_data_size =
+			offsetofend(struct dm_verity_io, hash_ctx.sha256);
+	} else {
+		/* Fallback case: use the generic crypto API. */
+		ti->per_io_data_size =
+			offsetofend(struct dm_verity_io, hash_ctx.shash) +
+			crypto_shash_descsize(shash);
+	}
 	return 0;
 }
 
@@ -1277,7 +1368,18 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
 			return -EINVAL;
 		}
 	}
-	if (v->version) { /* Version 1: salt at beginning */
+	if (likely(v->use_sha256_lib)) {
+		/* Implies version 1: salt at beginning */
+		v->initial_hashstate.sha256 =
+			kmalloc(sizeof(struct sha256_ctx), GFP_KERNEL);
+		if (!v->initial_hashstate.sha256) {
+			ti->error = "Cannot allocate initial hash state";
+			return -ENOMEM;
+		}
+		sha256_init(v->initial_hashstate.sha256);
+		sha256_update(v->initial_hashstate.sha256,
+			      v->salt, v->salt_size);
+	} else if (v->version) { /* Version 1: salt at beginning */
 		SHASH_DESC_ON_STACK(desc, v->shash_tfm);
 		int r;
 
@@ -1285,16 +1387,16 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
 		 * Compute the pre-salted hash state that can be passed to
 		 * crypto_shash_import() for each block later.
 		 */
-		v->initial_hashstate = kmalloc(
+		v->initial_hashstate.shash = kmalloc(
 			crypto_shash_statesize(v->shash_tfm), GFP_KERNEL);
-		if (!v->initial_hashstate) {
+		if (!v->initial_hashstate.shash) {
 			ti->error = "Cannot allocate initial hash state";
 			return -ENOMEM;
 		}
 		desc->tfm = v->shash_tfm;
 		r = crypto_shash_init(desc) ?:
 		    crypto_shash_update(desc, v->salt, v->salt_size) ?:
-		    crypto_shash_export(desc, v->initial_hashstate);
+		    crypto_shash_export(desc, v->initial_hashstate.shash);
 		if (r) {
 			ti->error = "Cannot set up initial hash state";
 			return r;
@@ -1556,9 +1658,6 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->per_io_data_size = sizeof(struct dm_verity_io) +
-			       crypto_shash_descsize(v->shash_tfm);
-
 	r = verity_fec_ctr(v);
 	if (r)
 		goto bad;
@@ -1690,7 +1789,7 @@ static struct target_type verity_target = {
 	.name		= "verity",
 /* Note: the LSMs depend on the singleton and immutable features */
 	.features	= DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
-	.version	= {1, 12, 0},
+	.version	= {1, 13, 0},
 	.module		= THIS_MODULE,
 	.ctr		= verity_ctr,
 	.dtr		= verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 6d141abd965c..f975a9e5c5d6 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -16,6 +16,7 @@
 #include <linux/device-mapper.h>
 #include <linux/interrupt.h>
 #include <crypto/hash.h>
+#include <crypto/sha2.h>
 
 #define DM_VERITY_MAX_LEVELS		63
 
@@ -42,7 +43,10 @@ struct dm_verity {
 	struct crypto_shash *shash_tfm;
 	u8 *root_digest;	/* digest of the root block */
 	u8 *salt;		/* salt: its size is salt_size */
-	u8 *initial_hashstate;	/* salted initial state, if version >= 1 */
+	union {
+		struct sha256_ctx *sha256;	/* for use_sha256_lib=1 */
+		u8 *shash;			/* for use_sha256_lib=0 */
+	} initial_hashstate; /* salted initial state, if version >= 1 */
 	u8 *zero_digest;	/* digest for a zero block */
 #ifdef CONFIG_SECURITY
 	u8 *root_digest_sig;	/* signature of the root digest */
@@ -59,6 +63,8 @@ struct dm_verity {
 	unsigned char version;
 	bool hash_failed:1;	/* set if hash of any block failed */
 	bool use_bh_wq:1;	/* try to verify in BH wq before normal work-queue */
+	bool use_sha256_lib:1;	/* use SHA-256 library instead of generic crypto API */
+	bool use_sha256_finup_2x:1; /* use interleaved hashing optimization */
 	unsigned int digest_size;	/* digest size for the current hash algorithm */
 	enum verity_mode mode;	/* mode for handling verification errors */
 	enum verity_mode error_mode;/* mode for handling I/O errors */
@@ -78,6 +84,13 @@ struct dm_verity {
 	mempool_t recheck_pool;
 };
 
+struct pending_block {
+	void *data;
+	sector_t blkno;
+	u8 want_digest[HASH_MAX_DIGESTSIZE];
+	u8 real_digest[HASH_MAX_DIGESTSIZE];
+};
+
 struct dm_verity_io {
 	struct dm_verity *v;
 
@@ -94,28 +107,29 @@ struct dm_verity_io {
 	struct work_struct work;
 	struct work_struct bh_work;
 
-	u8 real_digest[HASH_MAX_DIGESTSIZE];
-	u8 want_digest[HASH_MAX_DIGESTSIZE];
+	u8 tmp_digest[HASH_MAX_DIGESTSIZE];
 
 	/*
-	 * Temporary space for hashing.  This is variable-length and must be at
-	 * the end of the struct.  struct shash_desc is just the fixed part;
-	 * it's followed by a context of size crypto_shash_descsize(shash_tfm).
+	 * This is the queue of data blocks that are pending verification.  When
+	 * the crypto layer supports interleaved hashing, we allow multiple
+	 * blocks to be queued up in order to utilize it.  This can improve
+	 * performance significantly vs. sequential hashing of each block.
 	 */
-	struct shash_desc hash_desc;
-};
+	int num_pending;
+	struct pending_block pending_blocks[2];
 
-static inline u8 *verity_io_real_digest(struct dm_verity *v,
-					struct dm_verity_io *io)
-{
-	return io->real_digest;
-}
-
-static inline u8 *verity_io_want_digest(struct dm_verity *v,
-					struct dm_verity_io *io)
-{
-	return io->want_digest;
-}
+	/*
+	 * Temporary space for hashing.  Either sha256 or shash is used,
+	 * depending on the value of use_sha256_lib.  If shash is used,
+	 * then this field is variable-length, with total size
+	 * sizeof(struct shash_desc) + crypto_shash_descsize(shash_tfm).
+	 * For this reason, this field must be the end of the struct.
+	 */
+	union {
+		struct sha256_ctx sha256;
+		struct shash_desc shash;
+	} hash_ctx;
+};
 
 extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
 		       const u8 *data, size_t len, u8 *digest);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 78e17dd4d01b..c95e417194b3 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -17,33 +17,26 @@
  * For internal zone reports bypassing the top BIO submission path.
  */
 static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t,
-				  sector_t sector, unsigned int nr_zones,
-				  report_zones_cb cb, void *data)
+				  unsigned int nr_zones,
+				  struct dm_report_zones_args *args)
 {
-	struct gendisk *disk = md->disk;
-	int ret;
-	struct dm_report_zones_args args = {
-		.next_sector = sector,
-		.orig_data = data,
-		.orig_cb = cb,
-	};
-
 	do {
 		struct dm_target *tgt;
+		int ret;
 
-		tgt = dm_table_find_target(t, args.next_sector);
+		tgt = dm_table_find_target(t, args->next_sector);
 		if (WARN_ON_ONCE(!tgt->type->report_zones))
 			return -EIO;
 
-		args.tgt = tgt;
-		ret = tgt->type->report_zones(tgt, &args,
-					      nr_zones - args.zone_idx);
+		args->tgt = tgt;
+		ret = tgt->type->report_zones(tgt, args,
+					      nr_zones - args->zone_idx);
 		if (ret < 0)
 			return ret;
-	} while (args.zone_idx < nr_zones &&
-		 args.next_sector < get_capacity(disk));
+	} while (args->zone_idx < nr_zones &&
+		 args->next_sector < get_capacity(md->disk));
 
-	return args.zone_idx;
+	return args->zone_idx;
 }
 
 /*
@@ -52,7 +45,8 @@ static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t,
  * generally implemented by targets using dm_report_zones().
  */
 int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
-			unsigned int nr_zones, report_zones_cb cb, void *data)
+			unsigned int nr_zones,
+			struct blk_report_zones_args *args)
 {
 	struct mapped_device *md = disk->private_data;
 	struct dm_table *map;
@@ -76,9 +70,14 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
 		map = zone_revalidate_map;
 	}
 
-	if (map)
-		ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb,
-					     data);
+	if (map) {
+		struct dm_report_zones_args dm_args = {
+			.disk = md->disk,
+			.next_sector = sector,
+			.rep_args = args,
+		};
+		ret = dm_blk_do_report_zones(md, map, nr_zones, &dm_args);
+	}
 
 	if (put_table)
 		dm_put_live_table(md, srcu_idx);
@@ -113,7 +112,18 @@ static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx,
 	}
 
 	args->next_sector = zone->start + zone->len;
-	return args->orig_cb(zone, args->zone_idx++, args->orig_data);
+
+	/* If we have an internal callback, call it first. */
+	if (args->cb) {
+		int ret;
+
+		ret = args->cb(zone, args->zone_idx, args->data);
+		if (ret)
+			return ret;
+	}
+
+	return disk_report_zone(args->disk, zone, args->zone_idx++,
+				args->rep_args);
 }
 
 /*
@@ -193,8 +203,6 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
 		return ret;
 	}
 
-	md->nr_zones = disk->nr_zones;
-
 	return 0;
 }
 
@@ -442,7 +450,6 @@ void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim)
 			set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
 	} else {
 		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
-		md->nr_zones = 0;
 		md->disk->nr_zones = 0;
 	}
 }
@@ -492,10 +499,15 @@ int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
 			     sector_t sector, unsigned int nr_zones,
 			     unsigned long *need_reset)
 {
+	struct dm_report_zones_args args = {
+		.disk = md->disk,
+		.next_sector = sector,
+		.cb = dm_zone_need_reset_cb,
+		.data = need_reset,
+	};
 	int ret;
 
-	ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
-				     dm_zone_need_reset_cb, need_reset);
+	ret = dm_blk_do_report_zones(md, t, nr_zones, &args);
 	if (ret != nr_zones) {
 		DMERR("Get %s zone reset bitmap failed\n",
 		      md->disk->disk_name);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f5e5e59b232b..b63279202260 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -272,7 +272,7 @@ static int __init dm_init(void)
 	int r, i;
 
 #if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
-	DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
+	DMINFO("CONFIG_IMA_DISABLE_HTABLE is disabled."
 	       " Duplicate IMA measurements will not be recorded in the IMA log.");
 #endif
 
@@ -1321,6 +1321,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors)
 	BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
 	BUG_ON(bio_sectors > *tio->len_ptr);
 	BUG_ON(n_sectors > bio_sectors);
+	BUG_ON(bio->bi_opf & REQ_ATOMIC);
 
 	if (static_branch_unlikely(&zoned_enabled) &&
 	    unlikely(bdev_is_zoned(bio->bi_bdev))) {
@@ -1735,8 +1736,12 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
 	ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
 
 	len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
-	if (ci->bio->bi_opf & REQ_ATOMIC && len != ci->sector_count)
-		return BLK_STS_IOERR;
+	if (ci->bio->bi_opf & REQ_ATOMIC) {
+		if (unlikely(!dm_target_supports_atomic_writes(ti->type)))
+			return BLK_STS_IOERR;
+		if (unlikely(len != ci->sector_count))
+			return BLK_STS_IOERR;
+	}
 
 	setup_split_accounting(ci, len);
 
@@ -2005,7 +2010,7 @@ static void dm_split_and_process_bio(struct mapped_device *md,
 		 * linear target or multiple linear targets pointing to the same
 		 * device), we can send the flush with data directly to it.
 		 */
-		if (map->flush_bypasses_map) {
+		if (bio->bi_iter.bi_size && map->flush_bypasses_map) {
 			struct list_head *devices = dm_table_get_devices(map);
 			if (devices->next == devices->prev)
 				goto send_preflush_with_data;
@@ -2439,7 +2444,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 {
 	struct dm_table *old_map;
 	sector_t size, old_size;
-	int ret;
 
 	lockdep_assert_held(&md->suspend_lock);
 
@@ -2454,11 +2458,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 
 	set_capacity(md->disk, size);
 
-	ret = dm_table_set_restrictions(t, md->queue, limits);
-	if (ret) {
-		set_capacity(md->disk, old_size);
-		old_map = ERR_PTR(ret);
-		goto out;
+	if (limits) {
+		int ret = dm_table_set_restrictions(t, md->queue, limits);
+		if (ret) {
+			set_capacity(md->disk, old_size);
+			old_map = ERR_PTR(ret);
+			goto out;
+		}
 	}
 
 	/*
@@ -2836,6 +2842,7 @@ static void dm_wq_work(struct work_struct *work)
 
 static void dm_queue_flush(struct mapped_device *md)
 {
+	clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 	clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
 	smp_mb__after_atomic();
 	queue_work(md->wq, &md->work);
@@ -2848,6 +2855,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
 	struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
 	struct queue_limits limits;
+	bool update_limits = true;
 	int r;
 
 	mutex_lock(&md->suspend_lock);
@@ -2857,19 +2865,30 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 		goto out;
 
 	/*
+	 * To avoid a potential deadlock locking the queue limits, disallow
+	 * updating the queue limits during a table swap, when updating an
+	 * immutable request-based dm device (dm-multipath) during a noflush
+	 * suspend. It is userspace's responsibility to make sure that the new
+	 * table uses the same limits as the existing table, if it asks for a
+	 * noflush suspend.
+	 */
+	if (dm_request_based(md) && md->immutable_target &&
+	    __noflush_suspending(md))
+		update_limits = false;
+	/*
 	 * If the new table has no data devices, retain the existing limits.
 	 * This helps multipath with queue_if_no_path if all paths disappear,
 	 * then new I/O is queued based on these limits, and then some paths
 	 * reappear.
 	 */
-	if (dm_table_has_no_data_devices(table)) {
+	else if (dm_table_has_no_data_devices(table)) {
 		live_map = dm_get_live_table_fast(md);
 		if (live_map)
 			limits = md->queue->limits;
 		dm_put_live_table_fast(md);
 	}
 
-	if (!live_map) {
+	if (update_limits && !live_map) {
 		r = dm_calculate_queue_limits(table, &limits);
 		if (r) {
 			map = ERR_PTR(r);
@@ -2877,7 +2896,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 		}
 	}
 
-	map = __bind(md, table, &limits);
+	map = __bind(md, table, update_limits ? &limits : NULL);
 	dm_issue_global_event();
 
 out:
@@ -2930,7 +2949,6 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 
 	/*
 	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
-	 * This flag is cleared before dm_suspend returns.
 	 */
 	if (noflush)
 		set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2993,8 +3011,6 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 	if (!r)
 		set_bit(dmf_suspended_flag, &md->flags);
 
-	if (noflush)
-		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 	if (map)
 		synchronize_srcu(&md->io_barrier);
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 245f52b59215..7a795979ec72 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -109,7 +109,8 @@ void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim);
 void dm_zone_endio(struct dm_io *io, struct bio *clone);
 #ifdef CONFIG_BLK_DEV_ZONED
 int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
-			unsigned int nr_zones, report_zones_cb cb, void *data);
+			unsigned int nr_zones,
+			struct blk_report_zones_args *args);
 bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
 int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
 			     sector_t sector, unsigned int nr_zones,
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 7033d982d377..8d7b82c4a723 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -72,9 +72,11 @@ static int linear_set_limits(struct mddev *mddev)
 
 	md_init_stacking_limits(&lim);
 	lim.max_hw_sectors = mddev->chunk_sectors;
+	lim.logical_block_size = mddev->logical_block_size;
 	lim.max_write_zeroes_sectors = mddev->chunk_sectors;
 	lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors;
 	lim.io_min = mddev->chunk_sectors << 9;
+	lim.features |= BLK_FEAT_ATOMIC_WRITES;
 	err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
 	if (err)
 		return err;
diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 1eb434306162..9c1ade19b774 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -378,7 +378,7 @@ static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
 		case BitClean:
 			pctl->state[pos] = BitDirty;
 			break;
-		};
+		}
 	}
 }
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 41c476b40c7a..e5922a682953 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -99,7 +99,7 @@ static int remove_and_add_spares(struct mddev *mddev,
 				 struct md_rdev *this);
 static void mddev_detach(struct mddev *mddev);
 static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
-static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
+static void md_wakeup_thread_directly(struct md_thread __rcu **thread);
 
 /*
  * Default number of read corrections we'll attempt on an rdev
@@ -339,6 +339,7 @@ static int start_readonly;
  */
 static bool create_on_open = true;
 static bool legacy_async_del_gendisk = true;
+static bool check_new_feature = true;
 
 /*
  * We have a system wide 'event count' that is incremented
@@ -730,6 +731,8 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
 
 int mddev_init(struct mddev *mddev)
 {
+	int err = 0;
+
 	if (!IS_ENABLED(CONFIG_MD_BITMAP))
 		mddev->bitmap_id = ID_BITMAP_NONE;
 	else
@@ -741,10 +744,23 @@ int mddev_init(struct mddev *mddev)
 
 	if (percpu_ref_init(&mddev->writes_pending, no_op,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		percpu_ref_exit(&mddev->active_io);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto exit_acitve_io;
 	}
 
+	err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	if (err)
+		goto exit_writes_pending;
+
+	err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	if (err)
+		goto exit_bio_set;
+
+	err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
+			  offsetof(struct md_io_clone, bio_clone), 0);
+	if (err)
+		goto exit_sync_set;
+
 	/* We want to start with the refcount at zero */
 	percpu_ref_put(&mddev->writes_pending);
 
@@ -773,11 +789,24 @@ int mddev_init(struct mddev *mddev)
 	INIT_WORK(&mddev->del_work, mddev_delayed_delete);
 
 	return 0;
+
+exit_sync_set:
+	bioset_exit(&mddev->sync_set);
+exit_bio_set:
+	bioset_exit(&mddev->bio_set);
+exit_writes_pending:
+	percpu_ref_exit(&mddev->writes_pending);
+exit_acitve_io:
+	percpu_ref_exit(&mddev->active_io);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mddev_init);
 
 void mddev_destroy(struct mddev *mddev)
 {
+	bioset_exit(&mddev->bio_set);
+	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->io_clone_set);
 	percpu_ref_exit(&mddev->active_io);
 	percpu_ref_exit(&mddev->writes_pending);
 }
@@ -941,8 +970,11 @@ void mddev_unlock(struct mddev *mddev)
 		 * do_md_stop. dm raid only uses md_stop to stop. So dm raid
 		 * doesn't need to check MD_DELETED when getting reconfig lock
 		 */
-		if (test_bit(MD_DELETED, &mddev->flags))
+		if (test_bit(MD_DELETED, &mddev->flags) &&
+		    !test_and_set_bit(MD_DO_DELETE, &mddev->flags)) {
+			kobject_del(&mddev->kobj);
 			del_gendisk(mddev->gendisk);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -1820,9 +1852,13 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 	}
 	if (sb->pad0 ||
 	    sb->pad3[0] ||
-	    memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
-		/* Some padding is non-zero, might be a new feature */
-		return -EINVAL;
+	    memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) {
+		pr_warn("Some padding is non-zero on %pg, might be a new feature\n",
+			rdev->bdev);
+		if (check_new_feature)
+			return -EINVAL;
+		pr_warn("check_new_feature is disabled, data corruption possible\n");
+	}
 
 	rdev->preferred_minor = 0xffff;
 	rdev->data_offset = le64_to_cpu(sb->data_offset);
@@ -1963,6 +1999,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
 		mddev->layout = le32_to_cpu(sb->layout);
 		mddev->raid_disks = le32_to_cpu(sb->raid_disks);
 		mddev->dev_sectors = le64_to_cpu(sb->size);
+		mddev->logical_block_size = le32_to_cpu(sb->logical_block_size);
 		mddev->events = ev1;
 		mddev->bitmap_info.offset = 0;
 		mddev->bitmap_info.space = 0;
@@ -2172,6 +2209,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
 	sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
 	sb->level = cpu_to_le32(mddev->level);
 	sb->layout = cpu_to_le32(mddev->layout);
+	sb->logical_block_size = cpu_to_le32(mddev->logical_block_size);
 	if (test_bit(FailFast, &rdev->flags))
 		sb->devflags |= FailFast1;
 	else
@@ -2750,6 +2788,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
 	if (!md_is_rdwr(mddev)) {
 		if (force_change)
 			set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
+		pr_err("%s: can't update sb for read-only array %s\n", __func__, mdname(mddev));
 		return;
 	}
 
@@ -5134,7 +5173,7 @@ static void stop_sync_thread(struct mddev *mddev, bool locked)
 	 * Thread might be blocked waiting for metadata update which will now
 	 * never happen
 	 */
-	md_wakeup_thread_directly(mddev->sync_thread);
+	md_wakeup_thread_directly(&mddev->sync_thread);
 	if (work_pending(&mddev->sync_work))
 		flush_work(&mddev->sync_work);
 
@@ -5900,6 +5939,68 @@ static struct md_sysfs_entry md_serialize_policy =
 __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
        serialize_policy_store);
 
+static int mddev_set_logical_block_size(struct mddev *mddev,
+				unsigned int lbs)
+{
+	int err = 0;
+	struct queue_limits lim;
+
+	if (queue_logical_block_size(mddev->gendisk->queue) >= lbs) {
+		pr_err("%s: Cannot set LBS smaller than mddev LBS %u\n",
+		       mdname(mddev), lbs);
+		return -EINVAL;
+	}
+
+	lim = queue_limits_start_update(mddev->gendisk->queue);
+	lim.logical_block_size = lbs;
+	pr_info("%s: logical_block_size is changed, data may be lost\n",
+		mdname(mddev));
+	err = queue_limits_commit_update(mddev->gendisk->queue, &lim);
+	if (err)
+		return err;
+
+	mddev->logical_block_size = lbs;
+	/* New lbs will be written to superblock after array is running */
+	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
+	return 0;
+}
+
+static ssize_t
+lbs_show(struct mddev *mddev, char *page)
+{
+	return sprintf(page, "%u\n", mddev->logical_block_size);
+}
+
+static ssize_t
+lbs_store(struct mddev *mddev, const char *buf, size_t len)
+{
+	unsigned int lbs;
+	int err = -EBUSY;
+
+	/* Only 1.x meta supports configurable LBS */
+	if (mddev->major_version == 0)
+		return -EINVAL;
+
+	if (mddev->pers)
+		return -EBUSY;
+
+	err = kstrtouint(buf, 10, &lbs);
+	if (err < 0)
+		return -EINVAL;
+
+	err = mddev_lock(mddev);
+	if (err)
+		goto unlock;
+
+	err = mddev_set_logical_block_size(mddev, lbs);
+
+unlock:
+	mddev_unlock(mddev);
+	return err ?: len;
+}
+
+static struct md_sysfs_entry md_logical_block_size =
+__ATTR(logical_block_size, 0644, lbs_show, lbs_store);
 
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
@@ -5922,6 +6023,7 @@ static struct attribute *md_default_attrs[] = {
 	&md_consistency_policy.attr,
 	&md_fail_last_dev.attr,
 	&md_serialize_policy.attr,
+	&md_logical_block_size.attr,
 	NULL,
 };
 
@@ -6052,6 +6154,17 @@ int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
 			return -EINVAL;
 	}
 
+	/*
+	 * Before RAID adding folio support, the logical_block_size
+	 * should be smaller than the page size.
+	 */
+	if (lim->logical_block_size > PAGE_SIZE) {
+		pr_err("%s: logical_block_size must not larger than PAGE_SIZE\n",
+			mdname(mddev));
+		return -EINVAL;
+	}
+	mddev->logical_block_size = lim->logical_block_size;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
@@ -6064,6 +6177,13 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
 	if (mddev_is_dm(mddev))
 		return 0;
 
+	if (queue_logical_block_size(rdev->bdev->bd_disk->queue) >
+	    queue_logical_block_size(mddev->gendisk->queue)) {
+		pr_err("%s: incompatible logical_block_size, can not add\n",
+		       mdname(mddev));
+		return -EINVAL;
+	}
+
 	lim = queue_limits_start_update(mddev->gendisk->queue);
 	queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
 				mddev->gendisk->disk_name);
@@ -6384,29 +6504,9 @@ int md_run(struct mddev *mddev)
 		nowait = nowait && bdev_nowait(rdev->bdev);
 	}
 
-	if (!bioset_initialized(&mddev->bio_set)) {
-		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (err)
-			return err;
-	}
-	if (!bioset_initialized(&mddev->sync_set)) {
-		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (err)
-			goto exit_bio_set;
-	}
-
-	if (!bioset_initialized(&mddev->io_clone_set)) {
-		err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
-				  offsetof(struct md_io_clone, bio_clone), 0);
-		if (err)
-			goto exit_sync_set;
-	}
-
 	pers = get_pers(mddev->level, mddev->clevel);
-	if (!pers) {
-		err = -EINVAL;
-		goto abort;
-	}
+	if (!pers)
+		return -EINVAL;
 	if (mddev->level != pers->head.id) {
 		mddev->level = pers->head.id;
 		mddev->new_level = pers->head.id;
@@ -6417,8 +6517,7 @@ int md_run(struct mddev *mddev)
 	    pers->start_reshape == NULL) {
 		/* This personality cannot handle reshaping... */
 		put_pers(pers);
-		err = -EINVAL;
-		goto abort;
+		return -EINVAL;
 	}
 
 	if (pers->sync_request) {
@@ -6545,12 +6644,6 @@ bitmap_abort:
 	mddev->private = NULL;
 	put_pers(pers);
 	md_bitmap_destroy(mddev);
-abort:
-	bioset_exit(&mddev->io_clone_set);
-exit_sync_set:
-	bioset_exit(&mddev->sync_set);
-exit_bio_set:
-	bioset_exit(&mddev->bio_set);
 	return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
@@ -6683,6 +6776,7 @@ static void md_clean(struct mddev *mddev)
 	mddev->chunk_sectors = 0;
 	mddev->ctime = mddev->utime = 0;
 	mddev->layout = 0;
+	mddev->logical_block_size = 0;
 	mddev->max_disks = 0;
 	mddev->events = 0;
 	mddev->can_decrease_events = 0;
@@ -6775,10 +6869,6 @@ static void __md_stop(struct mddev *mddev)
 	mddev->private = NULL;
 	put_pers(pers);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-	bioset_exit(&mddev->bio_set);
-	bioset_exit(&mddev->sync_set);
-	bioset_exit(&mddev->io_clone_set);
 }
 
 void md_stop(struct mddev *mddev)
@@ -6869,6 +6959,10 @@ static int do_md_stop(struct mddev *mddev, int mode)
 		if (!md_is_rdwr(mddev))
 			set_disk_ro(disk, 0);
 
+		if (mode == 2 && mddev->pers->sync_request &&
+		    mddev->to_remove == NULL)
+			mddev->to_remove = &md_redundancy_group;
+
 		__md_stop_writes(mddev);
 		__md_stop(mddev);
 
@@ -8373,22 +8467,21 @@ static int md_thread(void *arg)
 	return 0;
 }
 
-static void md_wakeup_thread_directly(struct md_thread __rcu *thread)
+static void md_wakeup_thread_directly(struct md_thread __rcu **thread)
 {
 	struct md_thread *t;
 
 	rcu_read_lock();
-	t = rcu_dereference(thread);
+	t = rcu_dereference(*thread);
 	if (t)
 		wake_up_process(t->tsk);
 	rcu_read_unlock();
 }
 
-void md_wakeup_thread(struct md_thread __rcu *thread)
+void __md_wakeup_thread(struct md_thread __rcu *thread)
 {
 	struct md_thread *t;
 
-	rcu_read_lock();
 	t = rcu_dereference(thread);
 	if (t) {
 		pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
@@ -8396,9 +8489,8 @@ void md_wakeup_thread(struct md_thread __rcu *thread)
 		if (wq_has_sleeper(&t->wqueue))
 			wake_up(&t->wqueue);
 	}
-	rcu_read_unlock();
 }
-EXPORT_SYMBOL(md_wakeup_thread);
+EXPORT_SYMBOL(__md_wakeup_thread);
 
 struct md_thread *md_register_thread(void (*run) (struct md_thread *),
 		struct mddev *mddev, const char *name)
@@ -9978,6 +10070,52 @@ static void unregister_sync_thread(struct mddev *mddev)
 	md_reap_sync_thread(mddev);
 }
 
+static bool md_should_do_recovery(struct mddev *mddev)
+{
+	/*
+	 * As long as one of the following flags is set,
+	 * recovery needs to do or cleanup.
+	 */
+	if (test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
+	    test_bit(MD_RECOVERY_DONE, &mddev->recovery))
+		return true;
+
+	/*
+	 * If no flags are set and it is in read-only status,
+	 * there is nothing to do.
+	 */
+	if (!md_is_rdwr(mddev))
+		return false;
+
+	/*
+	 * MD_SB_CHANGE_PENDING indicates that the array is switching from clean to
+	 * active, and no action is needed for now.
+	 * All other MD_SB_* flags require to update the superblock.
+	 */
+	if (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING))
+		return true;
+
+	/*
+	 * If the array is not using external metadata and there has been no data
+	 * written for some time, then the array's status needs to be set to
+	 * in_sync.
+	 */
+	if (mddev->external == 0 && mddev->safemode == 1)
+		return true;
+
+	/*
+	 * When the system is about to restart or the process receives an signal,
+	 * the array needs to be synchronized as soon as possible.
+	 * Once the data synchronization is completed, need to change the array
+	 * status to in_sync.
+	 */
+	if (mddev->safemode == 2 && !mddev->in_sync &&
+	    mddev->resync_offset == MaxSector)
+		return true;
+
+	return false;
+}
+
 /*
  * This routine is regularly called by all per-raid-array threads to
  * deal with generic issues like resync and super-block update.
@@ -10014,18 +10152,7 @@ void md_check_recovery(struct mddev *mddev)
 		flush_signals(current);
 	}
 
-	if (!md_is_rdwr(mddev) &&
-	    !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
-	    !test_bit(MD_RECOVERY_DONE, &mddev->recovery))
-		return;
-	if ( ! (
-		(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
-		test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
-		test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
-		(mddev->external == 0 && mddev->safemode == 1) ||
-		(mddev->safemode == 2
-		 && !mddev->in_sync && mddev->resync_offset == MaxSector)
-		))
+	if (!md_should_do_recovery(mddev))
 		return;
 
 	if (mddev_trylock(mddev)) {
@@ -10281,7 +10408,6 @@ static int md_notify_reboot(struct notifier_block *this,
 			    unsigned long code, void *x)
 {
 	struct mddev *mddev;
-	int need_delay = 0;
 
 	spin_lock(&all_mddevs_lock);
 	list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
@@ -10295,21 +10421,11 @@ static int md_notify_reboot(struct notifier_block *this,
 				mddev->safemode = 2;
 			mddev_unlock(mddev);
 		}
-		need_delay = 1;
 		spin_lock(&all_mddevs_lock);
 		mddev_put_locked(mddev);
 	}
 	spin_unlock(&all_mddevs_lock);
 
-	/*
-	 * certain more exotic SCSI devices are known to be
-	 * volatile wrt too early system reboots. While the
-	 * right place to handle this issue is the given
-	 * driver, we do want to have a safe RAID driver ...
-	 */
-	if (need_delay)
-		msleep(1000);
-
 	return NOTIFY_DONE;
 }
 
@@ -10697,6 +10813,7 @@ module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
 module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
 module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
 module_param(legacy_async_del_gendisk, bool, 0600);
+module_param(check_new_feature, bool, 0600);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD RAID framework");
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1979c2d4fe89..6985f2829bbd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -354,6 +354,7 @@ enum mddev_flags {
 	MD_HAS_MULTIPLE_PPLS,
 	MD_NOT_READY,
 	MD_BROKEN,
+	MD_DO_DELETE,
 	MD_DELETED,
 };
 
@@ -432,6 +433,7 @@ struct mddev {
 	sector_t			array_sectors; /* exported array size */
 	int				external_size; /* size managed
 							* externally */
+	unsigned int			logical_block_size;
 	__u64				events;
 	/* If the last 'event' was simply a clean->dirty transition, and
 	 * we didn't write it to the spares, then it is safe and simple
@@ -882,6 +884,12 @@ struct md_io_clone {
 
 #define THREAD_WAKEUP  0
 
+#define md_wakeup_thread(thread) do {   \
+	rcu_read_lock();                    \
+	__md_wakeup_thread(thread);         \
+	rcu_read_unlock();                  \
+} while (0)
+
 static inline void safe_put_page(struct page *p)
 {
 	if (p) put_page(p);
@@ -895,7 +903,7 @@ extern struct md_thread *md_register_thread(
 	struct mddev *mddev,
 	const char *name);
 extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp);
-extern void md_wakeup_thread(struct md_thread __rcu *thread);
+extern void __md_wakeup_thread(struct md_thread __rcu *thread);
 extern void md_check_recovery(struct mddev *mddev);
 extern void md_reap_sync_thread(struct mddev *mddev);
 extern enum sync_action md_sync_action(struct mddev *mddev);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e443e478645a..985c377356eb 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -68,7 +68,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 	struct strip_zone *zone;
 	int cnt;
 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-	unsigned blksize = 512;
+	unsigned int blksize = 512;
+
+	if (!mddev_is_dm(mddev))
+		blksize = queue_logical_block_size(mddev->gendisk->queue);
 
 	*private_conf = ERR_PTR(-ENOMEM);
 	if (!conf)
@@ -84,7 +87,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 		sector_div(sectors, mddev->chunk_sectors);
 		rdev1->sectors = sectors * mddev->chunk_sectors;
 
-		blksize = max(blksize, queue_logical_block_size(
+		if (mddev_is_dm(mddev))
+			blksize = max(blksize, queue_logical_block_size(
 				      rdev1->bdev->bd_disk->queue));
 
 		rdev_for_each(rdev2, mddev) {
@@ -383,6 +387,7 @@ static int raid0_set_limits(struct mddev *mddev)
 	lim.max_hw_sectors = mddev->chunk_sectors;
 	lim.max_write_zeroes_sectors = mddev->chunk_sectors;
 	lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors;
+	lim.logical_block_size = mddev->logical_block_size;
 	lim.io_min = mddev->chunk_sectors << 9;
 	lim.io_opt = lim.io_min * mddev->raid_disks;
 	lim.chunk_sectors = mddev->chunk_sectors;
@@ -405,6 +410,12 @@ static int raid0_run(struct mddev *mddev)
 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
 
+	if (!mddev_is_dm(mddev)) {
+		ret = raid0_set_limits(mddev);
+		if (ret)
+			return ret;
+	}
+
 	/* if private is not null, we are here after takeover */
 	if (mddev->private == NULL) {
 		ret = create_strip_zones(mddev, &conf);
@@ -413,11 +424,6 @@ static int raid0_run(struct mddev *mddev)
 		mddev->private = conf;
 	}
 	conf = mddev->private;
-	if (!mddev_is_dm(mddev)) {
-		ret = raid0_set_limits(mddev);
-		if (ret)
-			return ret;
-	}
 
 	/* calculate array device size */
 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 592a40233004..57d50465eed1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3213,6 +3213,7 @@ static int raid1_set_limits(struct mddev *mddev)
 	md_init_stacking_limits(&lim);
 	lim.max_write_zeroes_sectors = 0;
 	lim.max_hw_wzeroes_unmap_sectors = 0;
+	lim.logical_block_size = mddev->logical_block_size;
 	lim.features |= BLK_FEAT_ATOMIC_WRITES;
 	err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
 	if (err)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 14dcd5142eb4..84be4cc7e873 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4000,6 +4000,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
 	md_init_stacking_limits(&lim);
 	lim.max_write_zeroes_sectors = 0;
 	lim.max_hw_wzeroes_unmap_sectors = 0;
+	lim.logical_block_size = mddev->logical_block_size;
 	lim.io_min = mddev->chunk_sectors << 9;
 	lim.chunk_sectors = mddev->chunk_sectors;
 	lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index ba768ca7f422..e29e69335c69 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3104,7 +3104,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 		goto out_mempool;
 
 	spin_lock_init(&log->tree_lock);
-	INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
+	INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT);
 
 	thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev,
 				    "reclaim");
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 24b32a0c95b4..e57ce3295292 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4956,7 +4956,8 @@ static void handle_stripe(struct stripe_head *sh)
 		goto finish;
 
 	if (s.handle_bad_blocks ||
-	    test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) {
+	    (md_is_rdwr(conf->mddev) &&
+	     test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags))) {
 		set_bit(STRIPE_HANDLE, &sh->state);
 		goto finish;
 	}
@@ -6768,7 +6769,8 @@ static void raid5d(struct md_thread *thread)
 		int batch_size, released;
 		unsigned int offset;
 
-		if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+		if (md_is_rdwr(mddev) &&
+		    test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
 			break;
 
 		released = release_stripe_list(conf, conf->temp_inactive_list);
@@ -7745,6 +7747,7 @@ static int raid5_set_limits(struct mddev *mddev)
 	stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
 
 	md_init_stacking_limits(&lim);
+	lim.logical_block_size = mddev->logical_block_size;
 	lim.io_min = mddev->chunk_sectors << 9;
 	lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
 	lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;