21 files changed, 2141 insertions, 134 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 319cfbeb0738..fa307f93fa2e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2237,6 +2237,17 @@ config TEST_DIV64
 
 	  If unsure, say N.
 
+config TEST_IOV_ITER
+	tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this to turn on testing of the operation of the I/O iterator
+	  (iov_iter). This test is executed only once during system boot (so
+	  affects only boot time), or at module load time.
+
+	  If unsure, say N.
+
 config KPROBES_SANITY_TEST
 	tristate "Kprobes sanity tests" if !KUNIT_ALL_TESTS
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 2e08397f6210..740109b6e2c8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
 CFLAGS_test_bitops.o += -Werror
 obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
+obj-$(CONFIG_TEST_IOV_ITER) += kunit_iov_iter.o
 obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
 obj-$(CONFIG_TEST_IDA) += test_ida.o
 obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 1a19a0a93dc1..e28db8e3b58c 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -28,7 +28,7 @@ static int count_argc(const char *str)
 
 /**
  * argv_free - free an argv
- * @argv - the argument vector to be freed
+ * @argv: the argument vector to be freed
  *
  * Frees an argv and the strings it points to.
  */
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(argv_free);
  * @str: the string to be split
  * @argcp: returned argument count
  *
- * Returns an array of pointers to strings which are split out from
+ * Returns: an array of pointers to strings which are split out from
  * @str.  This is performed by strictly splitting on white-space; no
  * quote processing is performed.  Multiple whitespace characters are
  * considered to be a single argument separator.  The returned array
diff --git a/lib/idr.c b/lib/idr.c
index 7ecdfdb5309e..13f2758c2377 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(idr_alloc);
  * @end: The maximum ID (exclusive).
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @nextid and @end.  If
+ * Allocates an unused ID in the range specified by @start and @end.  If
  * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
  * callers to use @start + N as @end as long as N is within integer range.
  * The search for an unused ID will start at the last ID allocated and will
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index b31597b0ca20..27234a820eeb 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1654,14 +1654,14 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 					   size_t *offset0)
 {
 	struct page **p, *page;
-	size_t skip = i->iov_offset, offset;
+	size_t skip = i->iov_offset, offset, size;
 	int k;
 
 	for (;;) {
 		if (i->nr_segs == 0)
 			return 0;
-		maxsize = min(maxsize, i->bvec->bv_len - skip);
-		if (maxsize)
+		size = min(maxsize, i->bvec->bv_len - skip);
+		if (size)
 			break;
 		i->iov_offset = 0;
 		i->nr_segs--;
@@ -1674,16 +1674,16 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
 	offset = skip % PAGE_SIZE;
 	*offset0 = offset;
 
-	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	maxpages = want_pages_array(pages, size, offset, maxpages);
 	if (!maxpages)
 		return -ENOMEM;
 	p = *pages;
 	for (k = 0; k < maxpages; k++)
 		p[k] = page + k;
 
-	maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
-	iov_iter_advance(i, maxsize);
-	return maxsize;
+	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
+	iov_iter_advance(i, size);
+	return size;
 }
 
 /*
@@ -1698,14 +1698,14 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
 {
 	struct page **p, *page;
 	const void *kaddr;
-	size_t skip = i->iov_offset, offset, len;
+	size_t skip = i->iov_offset, offset, len, size;
 	int k;
 
 	for (;;) {
 		if (i->nr_segs == 0)
 			return 0;
-		maxsize = min(maxsize, i->kvec->iov_len - skip);
-		if (maxsize)
+		size = min(maxsize, i->kvec->iov_len - skip);
+		if (size)
 			break;
 		i->iov_offset = 0;
 		i->nr_segs--;
@@ -1717,13 +1717,13 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
 	offset = (unsigned long)kaddr & ~PAGE_MASK;
 	*offset0 = offset;
 
-	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+	maxpages = want_pages_array(pages, size, offset, maxpages);
 	if (!maxpages)
 		return -ENOMEM;
 	p = *pages;
 
 	kaddr -= offset;
-	len = offset + maxsize;
+	len = offset + size;
 	for (k = 0; k < maxpages; k++) {
 		size_t seg = min_t(size_t, len, PAGE_SIZE);
 
@@ -1737,9 +1737,9 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
 		kaddr += PAGE_SIZE;
 	}
 
-	maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
-	iov_iter_advance(i, maxsize);
-	return maxsize;
+	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
+	iov_iter_advance(i, size);
+	return size;
 }
 
 /*
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 5181aa2e760b..a6348489d45f 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -65,7 +65,7 @@ struct kunit_glob_filter {
 };
 
 /* Split "suite_glob.test_glob" into two. Assumes filter_glob is not empty. */
-static void kunit_parse_glob_filter(struct kunit_glob_filter *parsed,
+static int kunit_parse_glob_filter(struct kunit_glob_filter *parsed,
 				    const char *filter_glob)
 {
 	const int len = strlen(filter_glob);
@@ -73,16 +73,28 @@ static void kunit_parse_glob_filter(struct kunit_glob_filter *parsed,
 
 	if (!period) {
 		parsed->suite_glob = kzalloc(len + 1, GFP_KERNEL);
+		if (!parsed->suite_glob)
+			return -ENOMEM;
+
 		parsed->test_glob = NULL;
 		strcpy(parsed->suite_glob, filter_glob);
-		return;
+		return 0;
 	}
 
 	parsed->suite_glob = kzalloc(period - filter_glob + 1, GFP_KERNEL);
+	if (!parsed->suite_glob)
+		return -ENOMEM;
+
 	parsed->test_glob = kzalloc(len - (period - filter_glob) + 1, GFP_KERNEL);
+	if (!parsed->test_glob) {
+		kfree(parsed->suite_glob);
+		return -ENOMEM;
+	}
 
 	strncpy(parsed->suite_glob, filter_glob, period - filter_glob);
 	strncpy(parsed->test_glob, period + 1, len - (period - filter_glob));
+
+	return 0;
 }
 
 /* Create a copy of suite with only tests that match test_glob. */
@@ -152,21 +164,24 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set,
 	}
 	copy_start = copy;
 
-	if (filter_glob)
-		kunit_parse_glob_filter(&parsed_glob, filter_glob);
+	if (filter_glob) {
+		*err = kunit_parse_glob_filter(&parsed_glob, filter_glob);
+		if (*err)
+			goto free_copy;
+	}
 
 	/* Parse attribute filters */
 	if (filters) {
 		filter_count = kunit_get_filter_count(filters);
 		parsed_filters = kcalloc(filter_count, sizeof(*parsed_filters), GFP_KERNEL);
 		if (!parsed_filters) {
-			kfree(copy);
-			return filtered;
+			*err = -ENOMEM;
+			goto free_parsed_glob;
 		}
 		for (j = 0; j < filter_count; j++)
 			parsed_filters[j] = kunit_next_attr_filter(&filters, err);
 		if (*err)
-			goto err;
+			goto free_parsed_filters;
 	}
 
 	for (i = 0; &suite_set->start[i] != suite_set->end; i++) {
@@ -178,7 +193,7 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set,
 					parsed_glob.test_glob);
 			if (IS_ERR(filtered_suite)) {
 				*err = PTR_ERR(filtered_suite);
-				goto err;
+				goto free_parsed_filters;
 			}
 		}
 		if (filter_count > 0 && parsed_filters != NULL) {
@@ -195,10 +210,11 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set,
 				filtered_suite = new_filtered_suite;
 
 				if (*err)
-					goto err;
+					goto free_parsed_filters;
+
 				if (IS_ERR(filtered_suite)) {
 					*err = PTR_ERR(filtered_suite);
-					goto err;
+					goto free_parsed_filters;
 				}
 				if (!filtered_suite)
 					break;
@@ -213,17 +229,19 @@ kunit_filter_suites(const struct kunit_suite_set *suite_set,
 	filtered.start = copy_start;
 	filtered.end = copy;
 
-err:
-	if (*err)
-		kfree(copy);
+free_parsed_filters:
+	if (filter_count)
+		kfree(parsed_filters);
 
+free_parsed_glob:
 	if (filter_glob) {
 		kfree(parsed_glob.suite_glob);
 		kfree(parsed_glob.test_glob);
 	}
 
-	if (filter_count)
-		kfree(parsed_filters);
+free_copy:
+	if (*err)
+		kfree(copy);
 
 	return filtered;
 }
diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index 4084071d0eb5..b4f6f96b2844 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -119,7 +119,7 @@ static void parse_filter_attr_test(struct kunit *test)
 {
 	int j, filter_count;
 	struct kunit_attr_filter *parsed_filters;
-	char *filters = "speed>slow, module!=example";
+	char filters[] = "speed>slow, module!=example", *filter = filters;
 	int err = 0;
 
 	filter_count = kunit_get_filter_count(filters);
@@ -128,7 +128,7 @@ static void parse_filter_attr_test(struct kunit *test)
 	parsed_filters = kunit_kcalloc(test, filter_count, sizeof(*parsed_filters),
 			GFP_KERNEL);
 	for (j = 0; j < filter_count; j++) {
-		parsed_filters[j] = kunit_next_attr_filter(&filters, &err);
+		parsed_filters[j] = kunit_next_attr_filter(&filter, &err);
 		KUNIT_ASSERT_EQ_MSG(test, err, 0, "failed to parse filter '%s'", filters[j]);
 	}
 
@@ -154,6 +154,7 @@ static void filter_attr_test(struct kunit *test)
 		.start = subsuite, .end = &subsuite[2],
 	};
 	struct kunit_suite_set got;
+	char filter[] = "speed>slow";
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "normal_suite", dummy_attr_test_cases);
@@ -168,7 +169,7 @@ static void filter_attr_test(struct kunit *test)
 	 * attribute is unset and thus, the filtering is based on the parent attribute
 	 * of slow.
 	 */
-	got = kunit_filter_suites(&suite_set, NULL, "speed>slow", NULL, &err);
+	got = kunit_filter_suites(&suite_set, NULL, filter, NULL, &err);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start);
 	KUNIT_ASSERT_EQ(test, err, 0);
 	kfree_at_end(test, got.start);
@@ -191,12 +192,13 @@ static void filter_attr_empty_test(struct kunit *test)
 		.start = subsuite, .end = &subsuite[2],
 	};
 	struct kunit_suite_set got;
+	char filter[] = "module!=dummy";
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_attr_test_cases);
 	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_attr_test_cases);
 
-	got = kunit_filter_suites(&suite_set, NULL, "module!=dummy", NULL, &err);
+	got = kunit_filter_suites(&suite_set, NULL, filter, NULL, &err);
 	KUNIT_ASSERT_EQ(test, err, 0);
 	kfree_at_end(test, got.start); /* just in case */
 
@@ -211,12 +213,13 @@ static void filter_attr_skip_test(struct kunit *test)
 		.start = subsuite, .end = &subsuite[1],
 	};
 	struct kunit_suite_set got;
+	char filter[] = "speed>slow";
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite", dummy_attr_test_cases);
 
 	/* Want: suite(slow, normal), NULL -> suite(slow with SKIP, normal), NULL */
-	got = kunit_filter_suites(&suite_set, NULL, "speed>slow", "skip", &err);
+	got = kunit_filter_suites(&suite_set, NULL, filter, "skip", &err);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start);
 	KUNIT_ASSERT_EQ(test, err, 0);
 	kfree_at_end(test, got.start);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 49698a168437..421f13981412 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -784,12 +784,13 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val,
 
 	switch (val) {
 	case MODULE_STATE_LIVE:
-		kunit_module_init(mod);
 		break;
 	case MODULE_STATE_GOING:
 		kunit_module_exit(mod);
 		break;
 	case MODULE_STATE_COMING:
+		kunit_module_init(mod);
+		break;
 	case MODULE_STATE_UNFORMED:
 		break;
 	}
diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c
new file mode 100644
index 000000000000..859b67c4d697
--- /dev/null
+++ b/lib/kunit_iov_iter.c
@@ -0,0 +1,777 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* I/O iterator tests.  This can only test kernel-backed iterator types.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <linux/bvec.h>
+#include <kunit/test.h>
+
+MODULE_DESCRIPTION("iov_iter testing");
+MODULE_AUTHOR("David Howells <dhowells@redhat.com>");
+MODULE_LICENSE("GPL");
+
+struct kvec_test_range {
+	int	from, to;
+};
+
+static const struct kvec_test_range kvec_test_ranges[] = {
+	{ 0x00002, 0x00002 },
+	{ 0x00027, 0x03000 },
+	{ 0x05193, 0x18794 },
+	{ 0x20000, 0x20000 },
+	{ 0x20000, 0x24000 },
+	{ 0x24000, 0x27001 },
+	{ 0x29000, 0xffffb },
+	{ 0xffffd, 0xffffe },
+	{ -1 }
+};
+
+static inline u8 pattern(unsigned long x)
+{
+	return x & 0xff;
+}
+
+static void iov_kunit_unmap(void *data)
+{
+	vunmap(data);
+}
+
+static void *__init iov_kunit_create_buffer(struct kunit *test,
+					    struct page ***ppages,
+					    size_t npages)
+{
+	struct page **pages;
+	unsigned long got;
+	void *buffer;
+
+	pages = kunit_kcalloc(test, npages, sizeof(struct page *), GFP_KERNEL);
+        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages);
+	*ppages = pages;
+
+	got = alloc_pages_bulk_array(GFP_KERNEL, npages, pages);
+	if (got != npages) {
+		release_pages(pages, got);
+		KUNIT_ASSERT_EQ(test, got, npages);
+	}
+
+	buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL);
+        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer);
+
+	kunit_add_action_or_reset(test, iov_kunit_unmap, buffer);
+	return buffer;
+}
+
+static void __init iov_kunit_load_kvec(struct kunit *test,
+				       struct iov_iter *iter, int dir,
+				       struct kvec *kvec, unsigned int kvmax,
+				       void *buffer, size_t bufsize,
+				       const struct kvec_test_range *pr)
+{
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < kvmax; i++, pr++) {
+		if (pr->from < 0)
+			break;
+		KUNIT_ASSERT_GE(test, pr->to, pr->from);
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+		kvec[i].iov_base = buffer + pr->from;
+		kvec[i].iov_len = pr->to - pr->from;
+		size += pr->to - pr->from;
+	}
+	KUNIT_ASSERT_LE(test, size, bufsize);
+
+	iov_iter_kvec(iter, dir, kvec, i, size);
+}
+
+/*
+ * Test copying to a ITER_KVEC-type iterator.
+ */
+static void __init iov_kunit_copy_to_kvec(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct page **spages, **bpages;
+	struct kvec kvec[8];
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, patt;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	for (i = 0; i < bufsize; i++)
+		scratch[i] = pattern(i);
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	memset(buffer, 0, bufsize);
+
+	iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec),
+			    buffer, bufsize, kvec_test_ranges);
+	size = iter.count;
+
+	copied = copy_to_iter(scratch, size, &iter);
+
+	KUNIT_EXPECT_EQ(test, copied, size);
+	KUNIT_EXPECT_EQ(test, iter.count, 0);
+	KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+	/* Build the expected image in the scratch buffer. */
+	patt = 0;
+	memset(scratch, 0, bufsize);
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++)
+		for (i = pr->from; i < pr->to; i++)
+			scratch[i] = pattern(patt++);
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+		if (buffer[i] != scratch[i])
+			return;
+	}
+
+	KUNIT_SUCCEED();
+}
+
+/*
+ * Test copying from a ITER_KVEC-type iterator.
+ */
+static void __init iov_kunit_copy_from_kvec(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct page **spages, **bpages;
+	struct kvec kvec[8];
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, j;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	for (i = 0; i < bufsize; i++)
+		buffer[i] = pattern(i);
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	memset(scratch, 0, bufsize);
+
+	iov_kunit_load_kvec(test, &iter, WRITE, kvec, ARRAY_SIZE(kvec),
+			    buffer, bufsize, kvec_test_ranges);
+	size = min(iter.count, bufsize);
+
+	copied = copy_from_iter(scratch, size, &iter);
+
+	KUNIT_EXPECT_EQ(test, copied, size);
+	KUNIT_EXPECT_EQ(test, iter.count, 0);
+	KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+	/* Build the expected image in the main buffer. */
+	i = 0;
+	memset(buffer, 0, bufsize);
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		for (j = pr->from; j < pr->to; j++) {
+			buffer[i++] = pattern(j);
+			if (i >= bufsize)
+				goto stop;
+		}
+	}
+stop:
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+		if (scratch[i] != buffer[i])
+			return;
+	}
+
+	KUNIT_SUCCEED();
+}
+
+struct bvec_test_range {
+	int	page, from, to;
+};
+
+static const struct bvec_test_range bvec_test_ranges[] = {
+	{ 0, 0x0002, 0x0002 },
+	{ 1, 0x0027, 0x0893 },
+	{ 2, 0x0193, 0x0794 },
+	{ 3, 0x0000, 0x1000 },
+	{ 4, 0x0000, 0x1000 },
+	{ 5, 0x0000, 0x1000 },
+	{ 6, 0x0000, 0x0ffb },
+	{ 6, 0x0ffd, 0x0ffe },
+	{ -1, -1, -1 }
+};
+
+static void __init iov_kunit_load_bvec(struct kunit *test,
+				       struct iov_iter *iter, int dir,
+				       struct bio_vec *bvec, unsigned int bvmax,
+				       struct page **pages, size_t npages,
+				       size_t bufsize,
+				       const struct bvec_test_range *pr)
+{
+	struct page *can_merge = NULL, *page;
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < bvmax; i++, pr++) {
+		if (pr->from < 0)
+			break;
+		KUNIT_ASSERT_LT(test, pr->page, npages);
+		KUNIT_ASSERT_LT(test, pr->page * PAGE_SIZE, bufsize);
+		KUNIT_ASSERT_GE(test, pr->from, 0);
+		KUNIT_ASSERT_GE(test, pr->to, pr->from);
+		KUNIT_ASSERT_LE(test, pr->to, PAGE_SIZE);
+
+		page = pages[pr->page];
+		if (pr->from == 0 && pr->from != pr->to && page == can_merge) {
+			i--;
+			bvec[i].bv_len += pr->to;
+		} else {
+			bvec_set_page(&bvec[i], page, pr->to - pr->from, pr->from);
+		}
+
+		size += pr->to - pr->from;
+		if ((pr->to & ~PAGE_MASK) == 0)
+			can_merge = page + pr->to / PAGE_SIZE;
+		else
+			can_merge = NULL;
+	}
+
+	iov_iter_bvec(iter, dir, bvec, i, size);
+}
+
+/*
+ * Test copying to a ITER_BVEC-type iterator.
+ */
+static void __init iov_kunit_copy_to_bvec(struct kunit *test)
+{
+	const struct bvec_test_range *pr;
+	struct iov_iter iter;
+	struct bio_vec bvec[8];
+	struct page **spages, **bpages;
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, b, patt;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	for (i = 0; i < bufsize; i++)
+		scratch[i] = pattern(i);
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	memset(buffer, 0, bufsize);
+
+	iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec),
+			    bpages, npages, bufsize, bvec_test_ranges);
+	size = iter.count;
+
+	copied = copy_to_iter(scratch, size, &iter);
+
+	KUNIT_EXPECT_EQ(test, copied, size);
+	KUNIT_EXPECT_EQ(test, iter.count, 0);
+	KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+	/* Build the expected image in the scratch buffer. */
+	b = 0;
+	patt = 0;
+	memset(scratch, 0, bufsize);
+	for (pr = bvec_test_ranges; pr->from >= 0; pr++, b++) {
+		u8 *p = scratch + pr->page * PAGE_SIZE;
+
+		for (i = pr->from; i < pr->to; i++)
+			p[i] = pattern(patt++);
+	}
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+		if (buffer[i] != scratch[i])
+			return;
+	}
+
+	KUNIT_SUCCEED();
+}
+
+/*
+ * Test copying from a ITER_BVEC-type iterator.
+ */
+static void __init iov_kunit_copy_from_bvec(struct kunit *test)
+{
+	const struct bvec_test_range *pr;
+	struct iov_iter iter;
+	struct bio_vec bvec[8];
+	struct page **spages, **bpages;
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, j;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	for (i = 0; i < bufsize; i++)
+		buffer[i] = pattern(i);
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	memset(scratch, 0, bufsize);
+
+	iov_kunit_load_bvec(test, &iter, WRITE, bvec, ARRAY_SIZE(bvec),
+			    bpages, npages, bufsize, bvec_test_ranges);
+	size = iter.count;
+
+	copied = copy_from_iter(scratch, size, &iter);
+
+	KUNIT_EXPECT_EQ(test, copied, size);
+	KUNIT_EXPECT_EQ(test, iter.count, 0);
+	KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+	/* Build the expected image in the main buffer. */
+	i = 0;
+	memset(buffer, 0, bufsize);
+	for (pr = bvec_test_ranges; pr->from >= 0; pr++) {
+		size_t patt = pr->page * PAGE_SIZE;
+
+		for (j = pr->from; j < pr->to; j++) {
+			buffer[i++] = pattern(patt + j);
+			if (i >= bufsize)
+				goto stop;
+		}
+	}
+stop:
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+		if (scratch[i] != buffer[i])
+			return;
+	}
+
+	KUNIT_SUCCEED();
+}
+
+static void iov_kunit_destroy_xarray(void *data)
+{
+	struct xarray *xarray = data;
+
+	xa_destroy(xarray);
+	kfree(xarray);
+}
+
+static void __init iov_kunit_load_xarray(struct kunit *test,
+					 struct iov_iter *iter, int dir,
+					 struct xarray *xarray,
+					 struct page **pages, size_t npages)
+{
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		void *x = xa_store(xarray, i, pages[i], GFP_KERNEL);
+
+		KUNIT_ASSERT_FALSE(test, xa_is_err(x));
+		size += PAGE_SIZE;
+	}
+	iov_iter_xarray(iter, dir, xarray, 0, size);
+}
+
+static struct xarray *iov_kunit_create_xarray(struct kunit *test)
+{
+	struct xarray *xarray;
+
+	xarray = kzalloc(sizeof(struct xarray), GFP_KERNEL);
+	xa_init(xarray);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xarray);
+	kunit_add_action_or_reset(test, iov_kunit_destroy_xarray, xarray);
+	return xarray;
+}
+
+/*
+ * Test copying to a ITER_XARRAY-type iterator.
+ */
+static void __init iov_kunit_copy_to_xarray(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct xarray *xarray;
+	struct page **spages, **bpages;
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, patt;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	xarray = iov_kunit_create_xarray(test);
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	for (i = 0; i < bufsize; i++)
+		scratch[i] = pattern(i);
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	memset(buffer, 0, bufsize);
+
+	iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
+
+	i = 0;
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		size = pr->to - pr->from;
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+		iov_iter_xarray(&iter, READ, xarray, pr->from, size);
+		copied = copy_to_iter(scratch + i, size, &iter);
+
+		KUNIT_EXPECT_EQ(test, copied, size);
+		KUNIT_EXPECT_EQ(test, iter.count, 0);
+		KUNIT_EXPECT_EQ(test, iter.iov_offset, size);
+		i += size;
+	}
+
+	/* Build the expected image in the scratch buffer. */
+	patt = 0;
+	memset(scratch, 0, bufsize);
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++)
+		for (i = pr->from; i < pr->to; i++)
+			scratch[i] = pattern(patt++);
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+		if (buffer[i] != scratch[i])
+			return;
+	}
+
+	KUNIT_SUCCEED();
+}
+
+/*
+ * Test copying from a ITER_XARRAY-type iterator.
+ */
+static void __init iov_kunit_copy_from_xarray(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct xarray *xarray;
+	struct page **spages, **bpages;
+	u8 *scratch, *buffer;
+	size_t bufsize, npages, size, copied;
+	int i, j;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	xarray = iov_kunit_create_xarray(test);
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+	for (i = 0; i < bufsize; i++)
+		buffer[i] = pattern(i);
+
+	scratch = iov_kunit_create_buffer(test, &spages, npages);
+	memset(scratch, 0, bufsize);
+
+	iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
+
+	i = 0;
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		size = pr->to - pr->from;
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+		iov_iter_xarray(&iter, WRITE, xarray, pr->from, size);
+		copied = copy_from_iter(scratch + i, size, &iter);
+
+		KUNIT_EXPECT_EQ(test, copied, size);
+		KUNIT_EXPECT_EQ(test, iter.count, 0);
+		KUNIT_EXPECT_EQ(test, iter.iov_offset, size);
+		i += size;
+	}
+
+	/* Build the expected image in the main buffer. */
+	i = 0;
+	memset(buffer, 0, bufsize);
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		for (j = pr->from; j < pr->to; j++) {
+			buffer[i++] = pattern(j);
+			if (i >= bufsize)
+				goto stop;
+		}
+	}
+stop:
+
+	/* Compare the images */
+	for (i = 0; i < bufsize; i++) {
+		KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+		if (scratch[i] != buffer[i])
+			return;
+	}
+
+	KUNIT_SUCCEED();
+}
+
+/*
+ * Test the extraction of ITER_KVEC-type iterators.
+ */
+static void __init iov_kunit_extract_pages_kvec(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct page **bpages, *pagelist[8], **pages = pagelist;
+	struct kvec kvec[8];
+	u8 *buffer;
+	ssize_t len;
+	size_t bufsize, size = 0, npages;
+	int i, from;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	buffer = iov_kunit_create_buffer(test, &bpages, npages);
+
+	iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec),
+			    buffer, bufsize, kvec_test_ranges);
+	size = iter.count;
+
+	pr = kvec_test_ranges;
+	from = pr->from;
+	do {
+		size_t offset0 = LONG_MAX;
+
+		for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+			pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+		len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+					     ARRAY_SIZE(pagelist), 0, &offset0);
+		KUNIT_EXPECT_GE(test, len, 0);
+		if (len < 0)
+			break;
+		KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+		KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+		KUNIT_EXPECT_LE(test, len, size);
+		KUNIT_EXPECT_EQ(test, iter.count, size - len);
+		size -= len;
+
+		if (len == 0)
+			break;
+
+		for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+			struct page *p;
+			ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+			int ix;
+
+			KUNIT_ASSERT_GE(test, part, 0);
+			while (from == pr->to) {
+				pr++;
+				from = pr->from;
+				if (from < 0)
+					goto stop;
+			}
+			ix = from / PAGE_SIZE;
+			KUNIT_ASSERT_LT(test, ix, npages);
+			p = bpages[ix];
+			KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+			KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+			from += part;
+			len -= part;
+			KUNIT_ASSERT_GE(test, len, 0);
+			if (len == 0)
+				break;
+			offset0 = 0;
+		}
+
+		if (test->status == KUNIT_FAILURE)
+			break;
+	} while (iov_iter_count(&iter) > 0);
+
+stop:
+	KUNIT_EXPECT_EQ(test, size, 0);
+	KUNIT_EXPECT_EQ(test, iter.count, 0);
+	KUNIT_SUCCEED();
+}
+
+/*
+ * Test the extraction of ITER_BVEC-type iterators.
+ */
+static void __init iov_kunit_extract_pages_bvec(struct kunit *test)
+{
+	const struct bvec_test_range *pr;
+	struct iov_iter iter;
+	struct page **bpages, *pagelist[8], **pages = pagelist;
+	struct bio_vec bvec[8];
+	ssize_t len;
+	size_t bufsize, size = 0, npages;
+	int i, from;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	iov_kunit_create_buffer(test, &bpages, npages);
+	iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec),
+			    bpages, npages, bufsize, bvec_test_ranges);
+	size = iter.count;
+
+	pr = bvec_test_ranges;
+	from = pr->from;
+	do {
+		size_t offset0 = LONG_MAX;
+
+		for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+			pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+		len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+					     ARRAY_SIZE(pagelist), 0, &offset0);
+		KUNIT_EXPECT_GE(test, len, 0);
+		if (len < 0)
+			break;
+		KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+		KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+		KUNIT_EXPECT_LE(test, len, size);
+		KUNIT_EXPECT_EQ(test, iter.count, size - len);
+		size -= len;
+
+		if (len == 0)
+			break;
+
+		for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+			struct page *p;
+			ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+			int ix;
+
+			KUNIT_ASSERT_GE(test, part, 0);
+			while (from == pr->to) {
+				pr++;
+				from = pr->from;
+				if (from < 0)
+					goto stop;
+			}
+			ix = pr->page + from / PAGE_SIZE;
+			KUNIT_ASSERT_LT(test, ix, npages);
+			p = bpages[ix];
+			KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+			KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+			from += part;
+			len -= part;
+			KUNIT_ASSERT_GE(test, len, 0);
+			if (len == 0)
+				break;
+			offset0 = 0;
+		}
+
+		if (test->status == KUNIT_FAILURE)
+			break;
+	} while (iov_iter_count(&iter) > 0);
+
+stop:
+	KUNIT_EXPECT_EQ(test, size, 0);
+	KUNIT_EXPECT_EQ(test, iter.count, 0);
+	KUNIT_SUCCEED();
+}
+
+/*
+ * Test the extraction of ITER_XARRAY-type iterators.
+ */
+static void __init iov_kunit_extract_pages_xarray(struct kunit *test)
+{
+	const struct kvec_test_range *pr;
+	struct iov_iter iter;
+	struct xarray *xarray;
+	struct page **bpages, *pagelist[8], **pages = pagelist;
+	ssize_t len;
+	size_t bufsize, size = 0, npages;
+	int i, from;
+
+	bufsize = 0x100000;
+	npages = bufsize / PAGE_SIZE;
+
+	xarray = iov_kunit_create_xarray(test);
+
+	iov_kunit_create_buffer(test, &bpages, npages);
+	iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
+
+	for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+		from = pr->from;
+		size = pr->to - from;
+		KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+		iov_iter_xarray(&iter, WRITE, xarray, from, size);
+
+		do {
+			size_t offset0 = LONG_MAX;
+
+			for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+				pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+			len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+						     ARRAY_SIZE(pagelist), 0, &offset0);
+			KUNIT_EXPECT_GE(test, len, 0);
+			if (len < 0)
+				break;
+			KUNIT_EXPECT_LE(test, len, size);
+			KUNIT_EXPECT_EQ(test, iter.count, size - len);
+			if (len == 0)
+				break;
+			size -= len;
+			KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+			KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+
+			for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+				struct page *p;
+				ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+				int ix;
+
+				KUNIT_ASSERT_GE(test, part, 0);
+				ix = from / PAGE_SIZE;
+				KUNIT_ASSERT_LT(test, ix, npages);
+				p = bpages[ix];
+				KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+				KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+				from += part;
+				len -= part;
+				KUNIT_ASSERT_GE(test, len, 0);
+				if (len == 0)
+					break;
+				offset0 = 0;
+			}
+
+			if (test->status == KUNIT_FAILURE)
+				goto stop;
+		} while (iov_iter_count(&iter) > 0);
+
+		KUNIT_EXPECT_EQ(test, size, 0);
+		KUNIT_EXPECT_EQ(test, iter.count, 0);
+		KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to - pr->from);
+	}
+
+stop:
+	KUNIT_SUCCEED();
+}
+
+static struct kunit_case __refdata iov_kunit_cases[] = {
+	KUNIT_CASE(iov_kunit_copy_to_kvec),
+	KUNIT_CASE(iov_kunit_copy_from_kvec),
+	KUNIT_CASE(iov_kunit_copy_to_bvec),
+	KUNIT_CASE(iov_kunit_copy_from_bvec),
+	KUNIT_CASE(iov_kunit_copy_to_xarray),
+	KUNIT_CASE(iov_kunit_copy_from_xarray),
+	KUNIT_CASE(iov_kunit_extract_pages_kvec),
+	KUNIT_CASE(iov_kunit_extract_pages_bvec),
+	KUNIT_CASE(iov_kunit_extract_pages_xarray),
+	{}
+};
+
+static struct kunit_suite iov_kunit_suite = {
+	.name = "iov_iter",
+	.test_cases = iov_kunit_cases,
+};
+
+kunit_test_suites(&iov_kunit_suite);
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index ee1ff0c59fd7..0e00a84e8e8f 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -256,6 +256,22 @@ bool mas_is_err(struct ma_state *mas)
 	return xa_is_err(mas->node);
 }
 
+static __always_inline bool mas_is_overflow(struct ma_state *mas)
+{
+	if (unlikely(mas->node == MAS_OVERFLOW))
+		return true;
+
+	return false;
+}
+
+static __always_inline bool mas_is_underflow(struct ma_state *mas)
+{
+	if (unlikely(mas->node == MAS_UNDERFLOW))
+		return true;
+
+	return false;
+}
+
 static inline bool mas_searchable(struct ma_state *mas)
 {
 	if (mas_is_none(mas))
@@ -4415,10 +4431,13 @@ no_entry:
  *
  * @mas: The maple state
  * @max: The minimum starting range
+ * @empty: Can be empty
+ * @set_underflow: Set the @mas->node to underflow state on limit.
  *
  * Return: The entry in the previous slot which is possibly NULL
  */
-static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
+static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty,
+			   bool set_underflow)
 {
 	void *entry;
 	void __rcu **slots;
@@ -4435,7 +4454,6 @@ retry:
 	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
 		goto retry;
 
-again:
 	if (mas->min <= min) {
 		pivot = mas_safe_min(mas, pivots, mas->offset);
 
@@ -4443,9 +4461,10 @@ again:
 			goto retry;
 
 		if (pivot <= min)
-			return NULL;
+			goto underflow;
 	}
 
+again:
 	if (likely(mas->offset)) {
 		mas->offset--;
 		mas->last = mas->index - 1;
@@ -4457,7 +4476,7 @@ again:
 		}
 
 		if (mas_is_none(mas))
-			return NULL;
+			goto underflow;
 
 		mas->last = mas->max;
 		node = mas_mn(mas);
@@ -4474,10 +4493,19 @@ again:
 	if (likely(entry))
 		return entry;
 
-	if (!empty)
+	if (!empty) {
+		if (mas->index <= min)
+			goto underflow;
+
 		goto again;
+	}
 
 	return entry;
+
+underflow:
+	if (set_underflow)
+		mas->node = MAS_UNDERFLOW;
+	return NULL;
 }
 
 /*
@@ -4567,10 +4595,13 @@ no_entry:
  * @mas: The maple state
  * @max: The maximum starting range
  * @empty: Can be empty
+ * @set_overflow: Should @mas->node be set to overflow when the limit is
+ * reached.
  *
  * Return: The entry in the next slot which is possibly NULL
  */
-static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
+static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty,
+			   bool set_overflow)
 {
 	void __rcu **slots;
 	unsigned long *pivots;
@@ -4589,22 +4620,22 @@ retry:
 	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
 		goto retry;
 
-again:
 	if (mas->max >= max) {
 		if (likely(mas->offset < data_end))
 			pivot = pivots[mas->offset];
 		else
-			return NULL; /* must be mas->max */
+			goto overflow;
 
 		if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
 			goto retry;
 
 		if (pivot >= max)
-			return NULL;
+			goto overflow;
 	}
 
 	if (likely(mas->offset < data_end)) {
 		mas->index = pivots[mas->offset] + 1;
+again:
 		mas->offset++;
 		if (likely(mas->offset < data_end))
 			mas->last = pivots[mas->offset];
@@ -4616,8 +4647,11 @@ again:
 			goto retry;
 		}
 
-		if (mas_is_none(mas))
+		if (WARN_ON_ONCE(mas_is_none(mas))) {
+			mas->node = MAS_OVERFLOW;
 			return NULL;
+			goto overflow;
+		}
 
 		mas->offset = 0;
 		mas->index = mas->min;
@@ -4636,12 +4670,20 @@ again:
 		return entry;
 
 	if (!empty) {
-		if (!mas->offset)
-			data_end = 2;
+		if (mas->last >= max)
+			goto overflow;
+
+		mas->index = mas->last + 1;
+		/* Node cannot end on NULL, so it's safe to short-cut here */
 		goto again;
 	}
 
 	return entry;
+
+overflow:
+	if (set_overflow)
+		mas->node = MAS_OVERFLOW;
+	return NULL;
 }
 
 /*
@@ -4651,17 +4693,20 @@ again:
  *
  * Set the @mas->node to the next entry and the range_start to
  * the beginning value for the entry.  Does not check beyond @limit.
- * Sets @mas->index and @mas->last to the limit if it is hit.
+ * Sets @mas->index and @mas->last to the range, Does not update @mas->index and
+ * @mas->last on overflow.
  * Restarts on dead nodes.
  *
  * Return: the next entry or %NULL.
  */
 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
 {
-	if (mas->last >= limit)
+	if (mas->last >= limit) {
+		mas->node = MAS_OVERFLOW;
 		return NULL;
+	}
 
-	return mas_next_slot(mas, limit, false);
+	return mas_next_slot(mas, limit, false, true);
 }
 
 /*
@@ -4837,7 +4882,7 @@ void *mas_walk(struct ma_state *mas)
 {
 	void *entry;
 
-	if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas))
+	if (!mas_is_active(mas) || !mas_is_start(mas))
 		mas->node = MAS_START;
 retry:
 	entry = mas_state_walk(mas);
@@ -5294,14 +5339,22 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
 
 static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
 {
-	if (mas_is_start(wr_mas->mas))
-		return;
+	if (!mas_is_active(wr_mas->mas)) {
+		if (mas_is_start(wr_mas->mas))
+			return;
 
-	if (unlikely(mas_is_paused(wr_mas->mas)))
-		goto reset;
+		if (unlikely(mas_is_paused(wr_mas->mas)))
+			goto reset;
 
-	if (unlikely(mas_is_none(wr_mas->mas)))
-		goto reset;
+		if (unlikely(mas_is_none(wr_mas->mas)))
+			goto reset;
+
+		if (unlikely(mas_is_overflow(wr_mas->mas)))
+			goto reset;
+
+		if (unlikely(mas_is_underflow(wr_mas->mas)))
+			goto reset;
+	}
 
 	/*
 	 * A less strict version of mas_is_span_wr() where we allow spanning
@@ -5595,8 +5648,25 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max,
 {
 	bool was_none = mas_is_none(mas);
 
-	if (mas_is_none(mas) || mas_is_paused(mas))
+	if (unlikely(mas->last >= max)) {
+		mas->node = MAS_OVERFLOW;
+		return true;
+	}
+
+	if (mas_is_active(mas))
+		return false;
+
+	if (mas_is_none(mas) || mas_is_paused(mas)) {
+		mas->node = MAS_START;
+	} else if (mas_is_overflow(mas)) {
+		/* Overflowed before, but the max changed */
 		mas->node = MAS_START;
+	} else if (mas_is_underflow(mas)) {
+		mas->node = MAS_START;
+		*entry = mas_walk(mas);
+		if (*entry)
+			return true;
+	}
 
 	if (mas_is_start(mas))
 		*entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
@@ -5615,6 +5685,7 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max,
 
 	if (mas_is_none(mas))
 		return true;
+
 	return false;
 }
 
@@ -5637,7 +5708,7 @@ void *mas_next(struct ma_state *mas, unsigned long max)
 		return entry;
 
 	/* Retries on dead nodes handled by mas_next_slot */
-	return mas_next_slot(mas, max, false);
+	return mas_next_slot(mas, max, false, true);
 }
 EXPORT_SYMBOL_GPL(mas_next);
 
@@ -5660,7 +5731,7 @@ void *mas_next_range(struct ma_state *mas, unsigned long max)
 		return entry;
 
 	/* Retries on dead nodes handled by mas_next_slot */
-	return mas_next_slot(mas, max, true);
+	return mas_next_slot(mas, max, true, true);
 }
 EXPORT_SYMBOL_GPL(mas_next_range);
 
@@ -5691,18 +5762,31 @@ EXPORT_SYMBOL_GPL(mt_next);
 static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min,
 		void **entry)
 {
-	if (mas->index <= min)
-		goto none;
+	if (unlikely(mas->index <= min)) {
+		mas->node = MAS_UNDERFLOW;
+		return true;
+	}
 
-	if (mas_is_none(mas) || mas_is_paused(mas))
+	if (mas_is_active(mas))
+		return false;
+
+	if (mas_is_overflow(mas)) {
 		mas->node = MAS_START;
+		*entry = mas_walk(mas);
+		if (*entry)
+			return true;
+	}
 
-	if (mas_is_start(mas)) {
-		mas_walk(mas);
-		if (!mas->index)
-			goto none;
+	if (mas_is_none(mas) || mas_is_paused(mas)) {
+		mas->node = MAS_START;
+	} else if (mas_is_underflow(mas)) {
+		/* underflowed before but the min changed */
+		mas->node = MAS_START;
 	}
 
+	if (mas_is_start(mas))
+		mas_walk(mas);
+
 	if (unlikely(mas_is_ptr(mas))) {
 		if (!mas->index)
 			goto none;
@@ -5747,7 +5831,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min)
 	if (mas_prev_setup(mas, min, &entry))
 		return entry;
 
-	return mas_prev_slot(mas, min, false);
+	return mas_prev_slot(mas, min, false, true);
 }
 EXPORT_SYMBOL_GPL(mas_prev);
 
@@ -5770,7 +5854,7 @@ void *mas_prev_range(struct ma_state *mas, unsigned long min)
 	if (mas_prev_setup(mas, min, &entry))
 		return entry;
 
-	return mas_prev_slot(mas, min, true);
+	return mas_prev_slot(mas, min, true, true);
 }
 EXPORT_SYMBOL_GPL(mas_prev_range);
 
@@ -5828,24 +5912,35 @@ EXPORT_SYMBOL_GPL(mas_pause);
 static inline bool mas_find_setup(struct ma_state *mas, unsigned long max,
 		void **entry)
 {
-	*entry = NULL;
+	if (mas_is_active(mas)) {
+		if (mas->last < max)
+			return false;
 
-	if (unlikely(mas_is_none(mas))) {
+		return true;
+	}
+
+	if (mas_is_paused(mas)) {
 		if (unlikely(mas->last >= max))
 			return true;
 
-		mas->index = mas->last;
+		mas->index = ++mas->last;
 		mas->node = MAS_START;
-	} else if (unlikely(mas_is_paused(mas))) {
+	} else if (mas_is_none(mas)) {
 		if (unlikely(mas->last >= max))
 			return true;
 
+		mas->index = mas->last;
 		mas->node = MAS_START;
-		mas->index = ++mas->last;
-	} else if (unlikely(mas_is_ptr(mas)))
-		goto ptr_out_of_range;
+	} else if (mas_is_overflow(mas) || mas_is_underflow(mas)) {
+		if (mas->index > max) {
+			mas->node = MAS_OVERFLOW;
+			return true;
+		}
+
+		mas->node = MAS_START;
+	}
 
-	if (unlikely(mas_is_start(mas))) {
+	if (mas_is_start(mas)) {
 		/* First run or continue */
 		if (mas->index > max)
 			return true;
@@ -5895,7 +5990,7 @@ void *mas_find(struct ma_state *mas, unsigned long max)
 		return entry;
 
 	/* Retries on dead nodes handled by mas_next_slot */
-	return mas_next_slot(mas, max, false);
+	return mas_next_slot(mas, max, false, false);
 }
 EXPORT_SYMBOL_GPL(mas_find);
 
@@ -5913,13 +6008,13 @@ EXPORT_SYMBOL_GPL(mas_find);
  */
 void *mas_find_range(struct ma_state *mas, unsigned long max)
 {
-	void *entry;
+	void *entry = NULL;
 
 	if (mas_find_setup(mas, max, &entry))
 		return entry;
 
 	/* Retries on dead nodes handled by mas_next_slot */
-	return mas_next_slot(mas, max, true);
+	return mas_next_slot(mas, max, true, false);
 }
 EXPORT_SYMBOL_GPL(mas_find_range);
 
@@ -5934,26 +6029,36 @@ EXPORT_SYMBOL_GPL(mas_find_range);
 static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
 		void **entry)
 {
-	*entry = NULL;
-
-	if (unlikely(mas_is_none(mas))) {
-		if (mas->index <= min)
-			goto none;
+	if (mas_is_active(mas)) {
+		if (mas->index > min)
+			return false;
 
-		mas->last = mas->index;
-		mas->node = MAS_START;
+		return true;
 	}
 
-	if (unlikely(mas_is_paused(mas))) {
+	if (mas_is_paused(mas)) {
 		if (unlikely(mas->index <= min)) {
 			mas->node = MAS_NONE;
 			return true;
 		}
 		mas->node = MAS_START;
 		mas->last = --mas->index;
+	} else if (mas_is_none(mas)) {
+		if (mas->index <= min)
+			goto none;
+
+		mas->last = mas->index;
+		mas->node = MAS_START;
+	} else if (mas_is_underflow(mas) || mas_is_overflow(mas)) {
+		if (mas->last <= min) {
+			mas->node = MAS_UNDERFLOW;
+			return true;
+		}
+
+		mas->node = MAS_START;
 	}
 
-	if (unlikely(mas_is_start(mas))) {
+	if (mas_is_start(mas)) {
 		/* First run or continue */
 		if (mas->index < min)
 			return true;
@@ -6004,13 +6109,13 @@ none:
  */
 void *mas_find_rev(struct ma_state *mas, unsigned long min)
 {
-	void *entry;
+	void *entry = NULL;
 
 	if (mas_find_rev_setup(mas, min, &entry))
 		return entry;
 
 	/* Retries on dead nodes handled by mas_prev_slot */
-	return mas_prev_slot(mas, min, false);
+	return mas_prev_slot(mas, min, false, false);
 
 }
 EXPORT_SYMBOL_GPL(mas_find_rev);
@@ -6030,13 +6135,13 @@ EXPORT_SYMBOL_GPL(mas_find_rev);
  */
 void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
 {
-	void *entry;
+	void *entry = NULL;
 
 	if (mas_find_rev_setup(mas, min, &entry))
 		return entry;
 
 	/* Retries on dead nodes handled by mas_prev_slot */
-	return mas_prev_slot(mas, min, true);
+	return mas_prev_slot(mas, min, true, false);
 }
 EXPORT_SYMBOL_GPL(mas_find_range_rev);
 
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 5004463c4f9f..9073430dc865 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -151,48 +151,72 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
-			  struct lock_class_key *key)
+int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
+			       gfp_t gfp, u32 nr_counters,
+			       struct lock_class_key *key)
 {
 	unsigned long flags __maybe_unused;
-
-	raw_spin_lock_init(&fbc->lock);
-	lockdep_set_class(&fbc->lock, key);
-	fbc->count = amount;
-	fbc->counters = alloc_percpu_gfp(s32, gfp);
-	if (!fbc->counters)
+	size_t counter_size;
+	s32 __percpu *counters;
+	u32 i;
+
+	counter_size = ALIGN(sizeof(*counters), __alignof__(*counters));
+	counters = __alloc_percpu_gfp(nr_counters * counter_size,
+				      __alignof__(*counters), gfp);
+	if (!counters) {
+		fbc[0].counters = NULL;
 		return -ENOMEM;
+	}
 
-	debug_percpu_counter_activate(fbc);
+	for (i = 0; i < nr_counters; i++) {
+		raw_spin_lock_init(&fbc[i].lock);
+		lockdep_set_class(&fbc[i].lock, key);
+#ifdef CONFIG_HOTPLUG_CPU
+		INIT_LIST_HEAD(&fbc[i].list);
+#endif
+		fbc[i].count = amount;
+		fbc[i].counters = (void *)counters + (i * counter_size);
+
+		debug_percpu_counter_activate(&fbc[i]);
+	}
 
 #ifdef CONFIG_HOTPLUG_CPU
-	INIT_LIST_HEAD(&fbc->list);
 	spin_lock_irqsave(&percpu_counters_lock, flags);
-	list_add(&fbc->list, &percpu_counters);
+	for (i = 0; i < nr_counters; i++)
+		list_add(&fbc[i].list, &percpu_counters);
 	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
 	return 0;
 }
-EXPORT_SYMBOL(__percpu_counter_init);
+EXPORT_SYMBOL(__percpu_counter_init_many);
 
-void percpu_counter_destroy(struct percpu_counter *fbc)
+void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters)
 {
 	unsigned long flags __maybe_unused;
+	u32 i;
+
+	if (WARN_ON_ONCE(!fbc))
+		return;
 
-	if (!fbc->counters)
+	if (!fbc[0].counters)
 		return;
 
-	debug_percpu_counter_deactivate(fbc);
+	for (i = 0; i < nr_counters; i++)
+		debug_percpu_counter_deactivate(&fbc[i]);
 
 #ifdef CONFIG_HOTPLUG_CPU
 	spin_lock_irqsave(&percpu_counters_lock, flags);
-	list_del(&fbc->list);
+	for (i = 0; i < nr_counters; i++)
+		list_del(&fbc[i].list);
 	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
-	free_percpu(fbc->counters);
-	fbc->counters = NULL;
+
+	free_percpu(fbc[0].counters);
+
+	for (i = 0; i < nr_counters; i++)
+		fbc[i].counters = NULL;
 }
-EXPORT_SYMBOL(percpu_counter_destroy);
+EXPORT_SYMBOL(percpu_counter_destroy_many);
 
 int percpu_counter_batch __read_mostly = 32;
 EXPORT_SYMBOL(percpu_counter_batch);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 45e17619422b..035b0a4db476 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
                               vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
 
 hostprogs	+= mktables
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index a22a05c9af8a..0ec534faf019 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_neonx2,
 	&raid6_neonx1,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+	&raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+	&raid6_lsx,
+#endif
+#endif
 #if defined(__ia64__)
 	&raid6_intx32,
 	&raid6_intx16,
@@ -104,6 +112,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if defined(CONFIG_KERNEL_MODE_NEON)
 	&raid6_recov_neon,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+	&raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+	&raid6_recov_lsx,
+#endif
+#endif
 	&raid6_recov_intx1,
 	NULL
 };
diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h
new file mode 100644
index 000000000000..acfc33ce7056
--- /dev/null
+++ b/lib/raid6/loongarch.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX	(1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX	(1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx	(getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx	(getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c
new file mode 100644
index 000000000000..aa5d9f924ca3
--- /dev/null
+++ b/lib/raid6/loongarch_simd.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+	return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	/*
+	 * $vr0, $vr1, $vr2, $vr3: wp
+	 * $vr4, $vr5, $vr6, $vr7: wq
+	 * $vr8, $vr9, $vr10, $vr11: wd
+	 * $vr12, $vr13, $vr14, $vr15: w2
+	 * $vr16, $vr17, $vr18, $vr19: w1
+	 */
+	for (d = 0; d < bytes; d += NSIZE*4) {
+		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+		asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+		asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+		asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+		asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+		asm volatile("vori.b $vr4, $vr0, 0");
+		asm volatile("vori.b $vr5, $vr1, 0");
+		asm volatile("vori.b $vr6, $vr2, 0");
+		asm volatile("vori.b $vr7, $vr3, 0");
+		for (z = z0-1; z >= 0; z--) {
+			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+			asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+			asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+			asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+			asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+			/* wp$$ ^= wd$$; */
+			asm volatile("vxor.v $vr0, $vr0, $vr8");
+			asm volatile("vxor.v $vr1, $vr1, $vr9");
+			asm volatile("vxor.v $vr2, $vr2, $vr10");
+			asm volatile("vxor.v $vr3, $vr3, $vr11");
+			/* w2$$ = MASK(wq$$); */
+			asm volatile("vslti.b $vr12, $vr4, 0");
+			asm volatile("vslti.b $vr13, $vr5, 0");
+			asm volatile("vslti.b $vr14, $vr6, 0");
+			asm volatile("vslti.b $vr15, $vr7, 0");
+			/* w1$$ = SHLBYTE(wq$$); */
+			asm volatile("vslli.b $vr16, $vr4, 1");
+			asm volatile("vslli.b $vr17, $vr5, 1");
+			asm volatile("vslli.b $vr18, $vr6, 1");
+			asm volatile("vslli.b $vr19, $vr7, 1");
+			/* w2$$ &= NBYTES(0x1d); */
+			asm volatile("vandi.b $vr12, $vr12, 0x1d");
+			asm volatile("vandi.b $vr13, $vr13, 0x1d");
+			asm volatile("vandi.b $vr14, $vr14, 0x1d");
+			asm volatile("vandi.b $vr15, $vr15, 0x1d");
+			/* w1$$ ^= w2$$; */
+			asm volatile("vxor.v $vr16, $vr16, $vr12");
+			asm volatile("vxor.v $vr17, $vr17, $vr13");
+			asm volatile("vxor.v $vr18, $vr18, $vr14");
+			asm volatile("vxor.v $vr19, $vr19, $vr15");
+			/* wq$$ = w1$$ ^ wd$$; */
+			asm volatile("vxor.v $vr4, $vr16, $vr8");
+			asm volatile("vxor.v $vr5, $vr17, $vr9");
+			asm volatile("vxor.v $vr6, $vr18, $vr10");
+			asm volatile("vxor.v $vr7, $vr19, $vr11");
+		}
+		/* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+		asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+		asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+		asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+		asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+		/* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+		asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+		asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+		asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+		asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+				   size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = stop;		/* P/Q right side optimization */
+	p = dptr[disks-2];	/* XOR parity */
+	q = dptr[disks-1];	/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	/*
+	 * $vr0, $vr1, $vr2, $vr3: wp
+	 * $vr4, $vr5, $vr6, $vr7: wq
+	 * $vr8, $vr9, $vr10, $vr11: wd
+	 * $vr12, $vr13, $vr14, $vr15: w2
+	 * $vr16, $vr17, $vr18, $vr19: w1
+	 */
+	for (d = 0; d < bytes; d += NSIZE*4) {
+		/* P/Q data pages */
+		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+		asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+		asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+		asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+		asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+		asm volatile("vori.b $vr4, $vr0, 0");
+		asm volatile("vori.b $vr5, $vr1, 0");
+		asm volatile("vori.b $vr6, $vr2, 0");
+		asm volatile("vori.b $vr7, $vr3, 0");
+		for (z = z0-1; z >= start; z--) {
+			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+			asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+			asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+			asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+			asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+			/* wp$$ ^= wd$$; */
+			asm volatile("vxor.v $vr0, $vr0, $vr8");
+			asm volatile("vxor.v $vr1, $vr1, $vr9");
+			asm volatile("vxor.v $vr2, $vr2, $vr10");
+			asm volatile("vxor.v $vr3, $vr3, $vr11");
+			/* w2$$ = MASK(wq$$); */
+			asm volatile("vslti.b $vr12, $vr4, 0");
+			asm volatile("vslti.b $vr13, $vr5, 0");
+			asm volatile("vslti.b $vr14, $vr6, 0");
+			asm volatile("vslti.b $vr15, $vr7, 0");
+			/* w1$$ = SHLBYTE(wq$$); */
+			asm volatile("vslli.b $vr16, $vr4, 1");
+			asm volatile("vslli.b $vr17, $vr5, 1");
+			asm volatile("vslli.b $vr18, $vr6, 1");
+			asm volatile("vslli.b $vr19, $vr7, 1");
+			/* w2$$ &= NBYTES(0x1d); */
+			asm volatile("vandi.b $vr12, $vr12, 0x1d");
+			asm volatile("vandi.b $vr13, $vr13, 0x1d");
+			asm volatile("vandi.b $vr14, $vr14, 0x1d");
+			asm volatile("vandi.b $vr15, $vr15, 0x1d");
+			/* w1$$ ^= w2$$; */
+			asm volatile("vxor.v $vr16, $vr16, $vr12");
+			asm volatile("vxor.v $vr17, $vr17, $vr13");
+			asm volatile("vxor.v $vr18, $vr18, $vr14");
+			asm volatile("vxor.v $vr19, $vr19, $vr15");
+			/* wq$$ = w1$$ ^ wd$$; */
+			asm volatile("vxor.v $vr4, $vr16, $vr8");
+			asm volatile("vxor.v $vr5, $vr17, $vr9");
+			asm volatile("vxor.v $vr6, $vr18, $vr10");
+			asm volatile("vxor.v $vr7, $vr19, $vr11");
+		}
+
+		/* P/Q left side optimization */
+		for (z = start-1; z >= 0; z--) {
+			/* w2$$ = MASK(wq$$); */
+			asm volatile("vslti.b $vr12, $vr4, 0");
+			asm volatile("vslti.b $vr13, $vr5, 0");
+			asm volatile("vslti.b $vr14, $vr6, 0");
+			asm volatile("vslti.b $vr15, $vr7, 0");
+			/* w1$$ = SHLBYTE(wq$$); */
+			asm volatile("vslli.b $vr16, $vr4, 1");
+			asm volatile("vslli.b $vr17, $vr5, 1");
+			asm volatile("vslli.b $vr18, $vr6, 1");
+			asm volatile("vslli.b $vr19, $vr7, 1");
+			/* w2$$ &= NBYTES(0x1d); */
+			asm volatile("vandi.b $vr12, $vr12, 0x1d");
+			asm volatile("vandi.b $vr13, $vr13, 0x1d");
+			asm volatile("vandi.b $vr14, $vr14, 0x1d");
+			asm volatile("vandi.b $vr15, $vr15, 0x1d");
+			/* wq$$ = w1$$ ^ w2$$; */
+			asm volatile("vxor.v $vr4, $vr16, $vr12");
+			asm volatile("vxor.v $vr5, $vr17, $vr13");
+			asm volatile("vxor.v $vr6, $vr18, $vr14");
+			asm volatile("vxor.v $vr7, $vr19, $vr15");
+		}
+		/*
+		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+		 */
+		asm volatile(
+			"vld $vr20, %0\n\t"
+			"vld $vr21, %1\n\t"
+			"vld $vr22, %2\n\t"
+			"vld $vr23, %3\n\t"
+			"vld $vr24, %4\n\t"
+			"vld $vr25, %5\n\t"
+			"vld $vr26, %6\n\t"
+			"vld $vr27, %7\n\t"
+			"vxor.v $vr20, $vr20, $vr0\n\t"
+			"vxor.v $vr21, $vr21, $vr1\n\t"
+			"vxor.v $vr22, $vr22, $vr2\n\t"
+			"vxor.v $vr23, $vr23, $vr3\n\t"
+			"vxor.v $vr24, $vr24, $vr4\n\t"
+			"vxor.v $vr25, $vr25, $vr5\n\t"
+			"vxor.v $vr26, $vr26, $vr6\n\t"
+			"vxor.v $vr27, $vr27, $vr7\n\t"
+			"vst $vr20, %0\n\t"
+			"vst $vr21, %1\n\t"
+			"vst $vr22, %2\n\t"
+			"vst $vr23, %3\n\t"
+			"vst $vr24, %4\n\t"
+			"vst $vr25, %5\n\t"
+			"vst $vr26, %6\n\t"
+			"vst $vr27, %7\n\t"
+			: "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+			  "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+			  "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+			  "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+		);
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+	raid6_lsx_gen_syndrome,
+	raid6_lsx_xor_syndrome,
+	raid6_has_lsx,
+	"lsx",
+	.priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+	return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	/*
+	 * $xr0, $xr1: wp
+	 * $xr2, $xr3: wq
+	 * $xr4, $xr5: wd
+	 * $xr6, $xr7: w2
+	 * $xr8, $xr9: w1
+	 */
+	for (d = 0; d < bytes; d += NSIZE*2) {
+		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+		asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+		asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+		asm volatile("xvori.b $xr2, $xr0, 0");
+		asm volatile("xvori.b $xr3, $xr1, 0");
+		for (z = z0-1; z >= 0; z--) {
+			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+			asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+			asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+			/* wp$$ ^= wd$$; */
+			asm volatile("xvxor.v $xr0, $xr0, $xr4");
+			asm volatile("xvxor.v $xr1, $xr1, $xr5");
+			/* w2$$ = MASK(wq$$); */
+			asm volatile("xvslti.b $xr6, $xr2, 0");
+			asm volatile("xvslti.b $xr7, $xr3, 0");
+			/* w1$$ = SHLBYTE(wq$$); */
+			asm volatile("xvslli.b $xr8, $xr2, 1");
+			asm volatile("xvslli.b $xr9, $xr3, 1");
+			/* w2$$ &= NBYTES(0x1d); */
+			asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+			asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+			/* w1$$ ^= w2$$; */
+			asm volatile("xvxor.v $xr8, $xr8, $xr6");
+			asm volatile("xvxor.v $xr9, $xr9, $xr7");
+			/* wq$$ = w1$$ ^ wd$$; */
+			asm volatile("xvxor.v $xr2, $xr8, $xr4");
+			asm volatile("xvxor.v $xr3, $xr9, $xr5");
+		}
+		/* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+		asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+		asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+		/* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+		asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+		asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+				    size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = stop;		/* P/Q right side optimization */
+	p = dptr[disks-2];	/* XOR parity */
+	q = dptr[disks-1];	/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	/*
+	 * $xr0, $xr1: wp
+	 * $xr2, $xr3: wq
+	 * $xr4, $xr5: wd
+	 * $xr6, $xr7: w2
+	 * $xr8, $xr9: w1
+	 */
+	for (d = 0; d < bytes; d += NSIZE*2) {
+		/* P/Q data pages */
+		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+		asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+		asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+		asm volatile("xvori.b $xr2, $xr0, 0");
+		asm volatile("xvori.b $xr3, $xr1, 0");
+		for (z = z0-1; z >= start; z--) {
+			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+			asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+			asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+			/* wp$$ ^= wd$$; */
+			asm volatile("xvxor.v $xr0, $xr0, $xr4");
+			asm volatile("xvxor.v $xr1, $xr1, $xr5");
+			/* w2$$ = MASK(wq$$); */
+			asm volatile("xvslti.b $xr6, $xr2, 0");
+			asm volatile("xvslti.b $xr7, $xr3, 0");
+			/* w1$$ = SHLBYTE(wq$$); */
+			asm volatile("xvslli.b $xr8, $xr2, 1");
+			asm volatile("xvslli.b $xr9, $xr3, 1");
+			/* w2$$ &= NBYTES(0x1d); */
+			asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+			asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+			/* w1$$ ^= w2$$; */
+			asm volatile("xvxor.v $xr8, $xr8, $xr6");
+			asm volatile("xvxor.v $xr9, $xr9, $xr7");
+			/* wq$$ = w1$$ ^ wd$$; */
+			asm volatile("xvxor.v $xr2, $xr8, $xr4");
+			asm volatile("xvxor.v $xr3, $xr9, $xr5");
+		}
+
+		/* P/Q left side optimization */
+		for (z = start-1; z >= 0; z--) {
+			/* w2$$ = MASK(wq$$); */
+			asm volatile("xvslti.b $xr6, $xr2, 0");
+			asm volatile("xvslti.b $xr7, $xr3, 0");
+			/* w1$$ = SHLBYTE(wq$$); */
+			asm volatile("xvslli.b $xr8, $xr2, 1");
+			asm volatile("xvslli.b $xr9, $xr3, 1");
+			/* w2$$ &= NBYTES(0x1d); */
+			asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+			asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+			/* wq$$ = w1$$ ^ w2$$; */
+			asm volatile("xvxor.v $xr2, $xr8, $xr6");
+			asm volatile("xvxor.v $xr3, $xr9, $xr7");
+		}
+		/*
+		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+		 */
+		asm volatile(
+			"xvld $xr10, %0\n\t"
+			"xvld $xr11, %1\n\t"
+			"xvld $xr12, %2\n\t"
+			"xvld $xr13, %3\n\t"
+			"xvxor.v $xr10, $xr10, $xr0\n\t"
+			"xvxor.v $xr11, $xr11, $xr1\n\t"
+			"xvxor.v $xr12, $xr12, $xr2\n\t"
+			"xvxor.v $xr13, $xr13, $xr3\n\t"
+			"xvst $xr10, %0\n\t"
+			"xvst $xr11, %1\n\t"
+			"xvst $xr12, %2\n\t"
+			"xvst $xr13, %3\n\t"
+			: "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+			  "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+		);
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+	raid6_lasx_gen_syndrome,
+	raid6_lasx_xor_syndrome,
+	raid6_has_lasx,
+	"lasx",
+	.priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c
new file mode 100644
index 000000000000..94aeac85e6f7
--- /dev/null
+++ b/lib/raid6/recov_loongarch_simd.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+	return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+				  int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+
+	p = (u8 *)ptrs[disks - 2];
+	q = (u8 *)ptrs[disks - 1];
+
+	/*
+	 * Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for
+	 * delta p and delta q
+	 */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila] = dp;
+	ptrs[failb] = dq;
+	ptrs[disks - 2] = p;
+	ptrs[disks - 1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	/*
+	 * vr20, vr21: qmul
+	 * vr22, vr23: pbmul
+	 */
+	asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+	asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+	asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+	asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+	while (bytes) {
+		/* vr4 - vr7: Q */
+		asm volatile("vld $vr4, %0" : : "m" (q[0]));
+		asm volatile("vld $vr5, %0" : : "m" (q[16]));
+		asm volatile("vld $vr6, %0" : : "m" (q[32]));
+		asm volatile("vld $vr7, %0" : : "m" (q[48]));
+		/*  vr4 - vr7: Q + Qxy */
+		asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+		asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+		asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+		asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+		asm volatile("vxor.v $vr4, $vr4, $vr8");
+		asm volatile("vxor.v $vr5, $vr5, $vr9");
+		asm volatile("vxor.v $vr6, $vr6, $vr10");
+		asm volatile("vxor.v $vr7, $vr7, $vr11");
+		/* vr0 - vr3: P */
+		asm volatile("vld $vr0, %0" : : "m" (p[0]));
+		asm volatile("vld $vr1, %0" : : "m" (p[16]));
+		asm volatile("vld $vr2, %0" : : "m" (p[32]));
+		asm volatile("vld $vr3, %0" : : "m" (p[48]));
+		/* vr0 - vr3: P + Pxy */
+		asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+		asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+		asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+		asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+		asm volatile("vxor.v $vr0, $vr0, $vr8");
+		asm volatile("vxor.v $vr1, $vr1, $vr9");
+		asm volatile("vxor.v $vr2, $vr2, $vr10");
+		asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+		/* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+		asm volatile("vsrli.b $vr8, $vr4, 4");
+		asm volatile("vsrli.b $vr9, $vr5, 4");
+		asm volatile("vsrli.b $vr10, $vr6, 4");
+		asm volatile("vsrli.b $vr11, $vr7, 4");
+		/* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+		asm volatile("vandi.b $vr4, $vr4, 0x0f");
+		asm volatile("vandi.b $vr5, $vr5, 0x0f");
+		asm volatile("vandi.b $vr6, $vr6, 0x0f");
+		asm volatile("vandi.b $vr7, $vr7, 0x0f");
+		/* lookup from qmul[0] */
+		asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+		asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+		asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+		asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+		/* lookup from qmul[16] */
+		asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+		asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+		asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+		asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+		/* vr16 - vr19: B(Q + Qxy) */
+		asm volatile("vxor.v $vr16, $vr8, $vr4");
+		asm volatile("vxor.v $vr17, $vr9, $vr5");
+		asm volatile("vxor.v $vr18, $vr10, $vr6");
+		asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+		/* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+		asm volatile("vsrli.b $vr4, $vr0, 4");
+		asm volatile("vsrli.b $vr5, $vr1, 4");
+		asm volatile("vsrli.b $vr6, $vr2, 4");
+		asm volatile("vsrli.b $vr7, $vr3, 4");
+		/* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+		asm volatile("vandi.b $vr12, $vr0, 0x0f");
+		asm volatile("vandi.b $vr13, $vr1, 0x0f");
+		asm volatile("vandi.b $vr14, $vr2, 0x0f");
+		asm volatile("vandi.b $vr15, $vr3, 0x0f");
+		/* lookup from pbmul[0] */
+		asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+		asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+		asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+		asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+		/* lookup from pbmul[16] */
+		asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+		asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+		asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+		asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+		/* vr4 - vr7: A(P + Pxy) */
+		asm volatile("vxor.v $vr4, $vr4, $vr12");
+		asm volatile("vxor.v $vr5, $vr5, $vr13");
+		asm volatile("vxor.v $vr6, $vr6, $vr14");
+		asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+		/* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+		asm volatile("vxor.v $vr4, $vr4, $vr16");
+		asm volatile("vxor.v $vr5, $vr5, $vr17");
+		asm volatile("vxor.v $vr6, $vr6, $vr18");
+		asm volatile("vxor.v $vr7, $vr7, $vr19");
+		asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+		asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+		asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+		asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+		/* vr0 - vr3: P + Pxy + Dx = Dy */
+		asm volatile("vxor.v $vr0, $vr0, $vr4");
+		asm volatile("vxor.v $vr1, $vr1, $vr5");
+		asm volatile("vxor.v $vr2, $vr2, $vr6");
+		asm volatile("vxor.v $vr3, $vr3, $vr7");
+		asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+		asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+		asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+		asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dp += 64;
+		dq += 64;
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+				  void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+
+	p = (u8 *)ptrs[disks - 2];
+	q = (u8 *)ptrs[disks - 1];
+
+	/*
+	 * Compute syndrome with zero for the missing data page
+	 * Use the dead data page as temporary storage for delta q
+	 */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila] = dq;
+	ptrs[disks - 1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	/* vr22, vr23: qmul */
+	asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+	asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+	while (bytes) {
+		/* vr0 - vr3: P + Dx */
+		asm volatile("vld $vr0, %0" : : "m" (p[0]));
+		asm volatile("vld $vr1, %0" : : "m" (p[16]));
+		asm volatile("vld $vr2, %0" : : "m" (p[32]));
+		asm volatile("vld $vr3, %0" : : "m" (p[48]));
+		/* vr4 - vr7: Qx */
+		asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+		asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+		asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+		asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+		/* vr4 - vr7: Q + Qx */
+		asm volatile("vld $vr8, %0" : : "m" (q[0]));
+		asm volatile("vld $vr9, %0" : : "m" (q[16]));
+		asm volatile("vld $vr10, %0" : : "m" (q[32]));
+		asm volatile("vld $vr11, %0" : : "m" (q[48]));
+		asm volatile("vxor.v $vr4, $vr4, $vr8");
+		asm volatile("vxor.v $vr5, $vr5, $vr9");
+		asm volatile("vxor.v $vr6, $vr6, $vr10");
+		asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+		/* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+		asm volatile("vsrli.b $vr8, $vr4, 4");
+		asm volatile("vsrli.b $vr9, $vr5, 4");
+		asm volatile("vsrli.b $vr10, $vr6, 4");
+		asm volatile("vsrli.b $vr11, $vr7, 4");
+		/* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+		asm volatile("vandi.b $vr4, $vr4, 0x0f");
+		asm volatile("vandi.b $vr5, $vr5, 0x0f");
+		asm volatile("vandi.b $vr6, $vr6, 0x0f");
+		asm volatile("vandi.b $vr7, $vr7, 0x0f");
+		/* lookup from qmul[0] */
+		asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+		asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+		asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+		asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+		/* lookup from qmul[16] */
+		asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+		asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+		asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+		asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+		/* vr4 - vr7: qmul(Q + Qx) = Dx */
+		asm volatile("vxor.v $vr4, $vr4, $vr8");
+		asm volatile("vxor.v $vr5, $vr5, $vr9");
+		asm volatile("vxor.v $vr6, $vr6, $vr10");
+		asm volatile("vxor.v $vr7, $vr7, $vr11");
+		asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+		asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+		asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+		asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+		/* vr0 - vr3: P + Dx + Dx = P */
+		asm volatile("vxor.v $vr0, $vr0, $vr4");
+		asm volatile("vxor.v $vr1, $vr1, $vr5");
+		asm volatile("vxor.v $vr2, $vr2, $vr6");
+		asm volatile("vxor.v $vr3, $vr3, $vr7");
+		asm volatile("vst $vr0, %0" : "=m" (p[0]));
+		asm volatile("vst $vr1, %0" : "=m" (p[16]));
+		asm volatile("vst $vr2, %0" : "=m" (p[32]));
+		asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dq += 64;
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+	.data2 = raid6_2data_recov_lsx,
+	.datap = raid6_datap_recov_lsx,
+	.valid = raid6_has_lsx,
+	.name = "lsx",
+	.priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+	return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+				   int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+
+	p = (u8 *)ptrs[disks - 2];
+	q = (u8 *)ptrs[disks - 1];
+
+	/*
+	 * Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for
+	 * delta p and delta q
+	 */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila] = dp;
+	ptrs[failb] = dq;
+	ptrs[disks - 2] = p;
+	ptrs[disks - 1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	/*
+	 * xr20, xr21: qmul
+	 * xr22, xr23: pbmul
+	 */
+	asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+	asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+	asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+	asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+	asm volatile("xvreplve0.q $xr20, $xr20");
+	asm volatile("xvreplve0.q $xr21, $xr21");
+	asm volatile("xvreplve0.q $xr22, $xr22");
+	asm volatile("xvreplve0.q $xr23, $xr23");
+
+	while (bytes) {
+		/* xr0, xr1: Q */
+		asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+		asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+		/* xr0, xr1: Q + Qxy */
+		asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+		asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+		asm volatile("xvxor.v $xr0, $xr0, $xr4");
+		asm volatile("xvxor.v $xr1, $xr1, $xr5");
+		/* xr2, xr3: P */
+		asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+		asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+		/* xr2, xr3: P + Pxy */
+		asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+		asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+		asm volatile("xvxor.v $xr2, $xr2, $xr4");
+		asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+		/* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+		asm volatile("xvsrli.b $xr4, $xr0, 4");
+		asm volatile("xvsrli.b $xr5, $xr1, 4");
+		/* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+		asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+		asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+		/* lookup from qmul[0] */
+		asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+		asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+		/* lookup from qmul[16] */
+		asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+		asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+		/* xr6, xr7: B(Q + Qxy) */
+		asm volatile("xvxor.v $xr6, $xr4, $xr0");
+		asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+		/* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+		asm volatile("xvsrli.b $xr4, $xr2, 4");
+		asm volatile("xvsrli.b $xr5, $xr3, 4");
+		/* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+		asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+		asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+		/* lookup from pbmul[0] */
+		asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+		asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+		/* lookup from pbmul[16] */
+		asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+		asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+		/* xr0, xr1: A(P + Pxy) */
+		asm volatile("xvxor.v $xr0, $xr0, $xr4");
+		asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+		/* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+		asm volatile("xvxor.v $xr0, $xr0, $xr6");
+		asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+		/* xr2, xr3: P + Pxy + Dx = Dy */
+		asm volatile("xvxor.v $xr2, $xr2, $xr0");
+		asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+		asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+		asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+		asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+		asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dp += 64;
+		dq += 64;
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+				   void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+
+	p = (u8 *)ptrs[disks - 2];
+	q = (u8 *)ptrs[disks - 1];
+
+	/*
+	 * Compute syndrome with zero for the missing data page
+	 * Use the dead data page as temporary storage for delta q
+	 */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila] = dq;
+	ptrs[disks - 1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	/* xr22, xr23: qmul */
+	asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+	asm volatile("xvreplve0.q $xr22, $xr22");
+	asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+	asm volatile("xvreplve0.q $xr23, $xr23");
+
+	while (bytes) {
+		/* xr0, xr1: P + Dx */
+		asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+		asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+		/* xr2, xr3: Qx */
+		asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+		asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+		/* xr2, xr3: Q + Qx */
+		asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+		asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+		asm volatile("xvxor.v $xr2, $xr2, $xr4");
+		asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+		/* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+		asm volatile("xvsrli.b $xr4, $xr2, 4");
+		asm volatile("xvsrli.b $xr5, $xr3, 4");
+		/* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+		asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+		asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+		/* lookup from qmul[0] */
+		asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+		asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+		/* lookup from qmul[16] */
+		asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+		asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+		/* xr2, xr3: qmul(Q + Qx) = Dx */
+		asm volatile("xvxor.v $xr2, $xr2, $xr4");
+		asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+		/* xr0, xr1: P + Dx + Dx = P */
+		asm volatile("xvxor.v $xr0, $xr0, $xr2");
+		asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+		asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+		asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+		asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+		asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dq += 64;
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+	.data2 = raid6_2data_recov_lasx,
+	.datap = raid6_datap_recov_lasx,
+	.valid = raid6_has_lasx,
+	.name = "lasx",
+	.priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 1f693ea3b980..2abe0076a636 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
                          gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 endif
 
+ifeq ($(ARCH),loongarch64)
+        CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+        CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' |         \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+        CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' |        \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
 ifeq ($(IS_X86),yes)
         OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
         CFLAGS += -DCONFIG_X86
@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
         CFLAGS += -DCONFIG_ALTIVEC
         OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
                 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+        OBJS += loongarch_simd.o recov_loongarch_simd.o
 endif
 
 .c.o:
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c65566b4dc66..68b45c82c37a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -265,7 +265,8 @@ EXPORT_SYMBOL(sg_free_table);
  * @table:	The sg table header to use
  * @nents:	Number of entries in sg list
  * @max_ents:	The maximum number of entries the allocator returns per call
- * @nents_first_chunk: Number of entries int the (preallocated) first
+ * @first_chunk: first SGL if preallocated (may be %NULL)
+ * @nents_first_chunk: Number of entries in the (preallocated) first
  * 	scatterlist chunk, 0 means no such preallocated chunk provided by user
  * @gfp_mask:	GFP allocation mask
  * @alloc_fn:	Allocator to use
@@ -788,6 +789,7 @@ EXPORT_SYMBOL(__sg_page_iter_dma_next);
  * @miter: sg mapping iter to be started
  * @sgl: sg list to iterate over
  * @nents: number of sg entries
+ * @flags: sg iterator flags
  *
  * Description:
  *   Starts mapping iterator @miter.
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 0674aebd4423..06959165e2f9 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -2166,7 +2166,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt)
 	MT_BUG_ON(mt, val != NULL);
 	MT_BUG_ON(mt, mas.index != 0);
 	MT_BUG_ON(mt, mas.last != 5);
-	MT_BUG_ON(mt, mas.node != MAS_NONE);
+	MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
 
 	mas.index = 0;
 	mas.last = 5;
@@ -2917,6 +2917,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
  *		exists	MAS_NONE	active		range
  *		exists	active		active		range
  *		DNE	active		active		set to last range
+ *		ERANGE	active		MAS_OVERFLOW	last range
  *
  * Function	ENTRY	Start		Result		index & last
  * mas_prev()
@@ -2945,6 +2946,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
  *		any	MAS_ROOT	MAS_NONE	0
  *		exists	active		active		range
  *		DNE	active		active		last range
+ *		ERANGE	active		MAS_UNDERFLOW	last range
  *
  * Function	ENTRY	Start		Result		index & last
  * mas_find()
@@ -2955,7 +2957,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
  *		DNE	MAS_START	MAS_NONE	0
  *		DNE	MAS_PAUSE	MAS_NONE	0
  *		DNE	MAS_ROOT	MAS_NONE	0
- *		DNE	MAS_NONE	MAS_NONE	0
+ *		DNE	MAS_NONE	MAS_NONE	1
  *				if index ==  0
  *		exists	MAS_START	MAS_ROOT	0
  *		exists	MAS_PAUSE	MAS_ROOT	0
@@ -2967,7 +2969,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
  *		DNE	MAS_START	active		set to max
  *		exists	MAS_PAUSE	active		range
  *		DNE	MAS_PAUSE	active		set to max
- *		exists	MAS_NONE	active		range
+ *		exists	MAS_NONE	active		range (start at last)
  *		exists	active		active		range
  *		DNE	active		active		last range (max < last)
  *
@@ -2992,7 +2994,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt)
  *		DNE	MAS_START	active		set to min
  *		exists	MAS_PAUSE	active		range
  *		DNE	MAS_PAUSE	active		set to min
- *		exists	MAS_NONE	active		range
+ *		exists	MAS_NONE	active		range (start at index)
  *		exists	active		active		range
  *		DNE	active		active		last range (min > index)
  *
@@ -3039,10 +3041,10 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL);
 
 	mas_lock(&mas);
-	/* prev: Start -> none */
+	/* prev: Start -> underflow*/
 	entry = mas_prev(&mas, 0);
 	MT_BUG_ON(mt, entry != NULL);
-	MT_BUG_ON(mt, mas.node != MAS_NONE);
+	MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
 
 	/* prev: Start -> root */
 	mas_set(&mas, 10);
@@ -3069,7 +3071,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	MT_BUG_ON(mt, entry != NULL);
 	MT_BUG_ON(mt, mas.node != MAS_NONE);
 
-	/* next: start -> none */
+	/* next: start -> none*/
 	mas_set(&mas, 10);
 	entry = mas_next(&mas, ULONG_MAX);
 	MT_BUG_ON(mt, mas.index != 1);
@@ -3268,25 +3270,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	MT_BUG_ON(mt, mas.last != 0x2500);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* next:active -> active out of range*/
+	/* next:active -> active beyond data */
 	entry = mas_next(&mas, 0x2999);
 	MT_BUG_ON(mt, entry != NULL);
 	MT_BUG_ON(mt, mas.index != 0x2501);
 	MT_BUG_ON(mt, mas.last != 0x2fff);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* Continue after out of range*/
+	/* Continue after last range ends after max */
 	entry = mas_next(&mas, ULONG_MAX);
 	MT_BUG_ON(mt, entry != ptr3);
 	MT_BUG_ON(mt, mas.index != 0x3000);
 	MT_BUG_ON(mt, mas.last != 0x3500);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* next:active -> active out of range*/
+	/* next:active -> active continued */
+	entry = mas_next(&mas, ULONG_MAX);
+	MT_BUG_ON(mt, entry != NULL);
+	MT_BUG_ON(mt, mas.index != 0x3501);
+	MT_BUG_ON(mt, mas.last != ULONG_MAX);
+	MT_BUG_ON(mt, !mas_active(mas));
+
+	/* next:active -> overflow  */
 	entry = mas_next(&mas, ULONG_MAX);
 	MT_BUG_ON(mt, entry != NULL);
 	MT_BUG_ON(mt, mas.index != 0x3501);
 	MT_BUG_ON(mt, mas.last != ULONG_MAX);
+	MT_BUG_ON(mt, mas.node != MAS_OVERFLOW);
+
+	/* next:overflow -> overflow  */
+	entry = mas_next(&mas, ULONG_MAX);
+	MT_BUG_ON(mt, entry != NULL);
+	MT_BUG_ON(mt, mas.index != 0x3501);
+	MT_BUG_ON(mt, mas.last != ULONG_MAX);
+	MT_BUG_ON(mt, mas.node != MAS_OVERFLOW);
+
+	/* prev:overflow -> active  */
+	entry = mas_prev(&mas, 0);
+	MT_BUG_ON(mt, entry != ptr3);
+	MT_BUG_ON(mt, mas.index != 0x3000);
+	MT_BUG_ON(mt, mas.last != 0x3500);
 	MT_BUG_ON(mt, !mas_active(mas));
 
 	/* next: none -> active, skip value at location */
@@ -3307,11 +3330,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	MT_BUG_ON(mt, mas.last != 0x1500);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* prev:active -> active out of range*/
+	/* prev:active -> active spanning end range */
+	entry = mas_prev(&mas, 0x0100);
+	MT_BUG_ON(mt, entry != NULL);
+	MT_BUG_ON(mt, mas.index != 0);
+	MT_BUG_ON(mt, mas.last != 0x0FFF);
+	MT_BUG_ON(mt, !mas_active(mas));
+
+	/* prev:active -> underflow */
+	entry = mas_prev(&mas, 0);
+	MT_BUG_ON(mt, entry != NULL);
+	MT_BUG_ON(mt, mas.index != 0);
+	MT_BUG_ON(mt, mas.last != 0x0FFF);
+	MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
+
+	/* prev:underflow -> underflow */
 	entry = mas_prev(&mas, 0);
 	MT_BUG_ON(mt, entry != NULL);
 	MT_BUG_ON(mt, mas.index != 0);
 	MT_BUG_ON(mt, mas.last != 0x0FFF);
+	MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
+
+	/* next:underflow -> active */
+	entry = mas_next(&mas, ULONG_MAX);
+	MT_BUG_ON(mt, entry != ptr);
+	MT_BUG_ON(mt, mas.index != 0x1000);
+	MT_BUG_ON(mt, mas.last != 0x1500);
+	MT_BUG_ON(mt, !mas_active(mas));
+
+	/* prev:first value -> underflow */
+	entry = mas_prev(&mas, 0x1000);
+	MT_BUG_ON(mt, entry != NULL);
+	MT_BUG_ON(mt, mas.index != 0x1000);
+	MT_BUG_ON(mt, mas.last != 0x1500);
+	MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW);
+
+	/* find:underflow -> first value */
+	entry = mas_find(&mas, ULONG_MAX);
+	MT_BUG_ON(mt, entry != ptr);
+	MT_BUG_ON(mt, mas.index != 0x1000);
+	MT_BUG_ON(mt, mas.last != 0x1500);
 	MT_BUG_ON(mt, !mas_active(mas));
 
 	/* prev: pause ->active */
@@ -3325,14 +3383,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	MT_BUG_ON(mt, mas.last != 0x2500);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* prev:active -> active out of range*/
+	/* prev:active -> active spanning min */
 	entry = mas_prev(&mas, 0x1600);
 	MT_BUG_ON(mt, entry != NULL);
 	MT_BUG_ON(mt, mas.index != 0x1501);
 	MT_BUG_ON(mt, mas.last != 0x1FFF);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* prev: active ->active, continue*/
+	/* prev: active ->active, continue */
 	entry = mas_prev(&mas, 0);
 	MT_BUG_ON(mt, entry != ptr);
 	MT_BUG_ON(mt, mas.index != 0x1000);
@@ -3379,7 +3437,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	MT_BUG_ON(mt, mas.last != 0x2FFF);
 	MT_BUG_ON(mt, !mas_active(mas));
 
-	/* find: none ->active */
+	/* find: overflow ->active */
 	entry = mas_find(&mas, 0x5000);
 	MT_BUG_ON(mt, entry != ptr3);
 	MT_BUG_ON(mt, mas.index != 0x3000);
@@ -3778,7 +3836,6 @@ static int __init maple_tree_seed(void)
 	check_empty_area_fill(&tree);
 	mtree_destroy(&tree);
 
-
 	mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
 	check_state_handling(&tree);
 	mtree_destroy(&tree);
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index b620cf7de503..a2707af2951a 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -606,7 +606,7 @@ static void __init numbers_slice(void)
 #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn)	\
 do {										\
 	const T expect[2] = { expect0, expect1 };				\
-	T result[2] = {~expect[0], ~expect[1]};					\
+	T result[2] = { (T)~expect[0], (T)~expect[1] };				\
 										\
 	_test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]);	\
 } while (0)
diff --git a/lib/xarray.c b/lib/xarray.c
index 2071a3718f4e..39f07bfc4dcc 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -206,7 +206,7 @@ static void *xas_descend(struct xa_state *xas, struct xa_node *node)
 	void *entry = xa_entry(xas->xa, node, offset);
 
 	xas->xa_node = node;
-	if (xa_is_sibling(entry)) {
+	while (xa_is_sibling(entry)) {
 		offset = xa_to_sibling(entry);
 		entry = xa_entry(xas->xa, node, offset);
 		if (node->shift && xa_is_node(entry))
@@ -1802,6 +1802,9 @@ EXPORT_SYMBOL(xa_get_order);
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -1850,6 +1853,9 @@ EXPORT_SYMBOL(__xa_alloc);
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the