diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig | 3 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 9 | ||||
| -rw-r--r-- | lib/Makefile | 2 | ||||
| -rw-r--r-- | lib/cache_maint.c | 138 | ||||
| -rw-r--r-- | lib/hweight.c | 4 | ||||
| -rw-r--r-- | lib/raid6/recov_rvv.c | 7 | ||||
| -rw-r--r-- | lib/raid6/rvv.c | 299 | ||||
| -rw-r--r-- | lib/raid6/rvv.h | 17 | ||||
| -rw-r--r-- | lib/raid6/test/Makefile | 8 |
9 files changed, 327 insertions, 160 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index e629449dd2a3..2923924bea78 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -542,6 +542,9 @@ config MEMREGION config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION bool +config GENERIC_CPU_CACHE_MAINTENANCE + bool + config ARCH_HAS_MEMREMAP_COMPAT_ALIGN bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 742b23ef0d8b..c2654075377e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -688,7 +688,7 @@ choice help This selects the default access restrictions for debugfs. It can be overridden with kernel command line option - debugfs=[on,no-mount,off]. The restrictions apply for API access + debugfs=[on,off]. The restrictions apply for API access and filesystem registration. config DEBUG_FS_ALLOW_ALL @@ -697,13 +697,6 @@ config DEBUG_FS_ALLOW_ALL No restrictions apply. Both API and filesystem registration is on. This is the normal default operation. -config DEBUG_FS_DISALLOW_MOUNT - bool "Do not register debugfs as filesystem" - help - The API is open but filesystem is not loaded. Clients can still do - their work and read with debug tools that do not need - debugfs filesystem. - config DEBUG_FS_ALLOW_NONE bool "No access" help diff --git a/lib/Makefile b/lib/Makefile index 1ab2c4be3b66..aaf677cf4527 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -127,6 +127,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-$(CONFIG_GENERIC_CPU_CACHE_MAINTENANCE) += cache_maint.o + lib-y += logic_pio.o lib-$(CONFIG_INDIRECT_IOMEM) += logic_iomem.o diff --git a/lib/cache_maint.c b/lib/cache_maint.c new file mode 100644 index 000000000000..9256a9ffc34c --- /dev/null +++ b/lib/cache_maint.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic support for Memory System Cache Maintenance operations. + * + * Coherency maintenance drivers register with this simple framework that will + * iterate over each registered instance to first kick off invalidation and + * then to wait until it is complete. + * + * If no implementations are registered yet cpu_cache_has_invalidate_memregion() + * will return false. If this runs concurrently with unregistration then a + * race exists but this is no worse than the case where the operations instance + * responsible for a given memory region has not yet registered. + */ +#include <linux/cache_coherency.h> +#include <linux/cleanup.h> +#include <linux/container_of.h> +#include <linux/export.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/memregion.h> +#include <linux/module.h> +#include <linux/rwsem.h> +#include <linux/slab.h> + +static LIST_HEAD(cache_ops_instance_list); +static DECLARE_RWSEM(cache_ops_instance_list_lock); + +static void __cache_coherency_ops_instance_free(struct kref *kref) +{ + struct cache_coherency_ops_inst *cci = + container_of(kref, struct cache_coherency_ops_inst, kref); + kfree(cci); +} + +void cache_coherency_ops_instance_put(struct cache_coherency_ops_inst *cci) +{ + kref_put(&cci->kref, __cache_coherency_ops_instance_free); +} +EXPORT_SYMBOL_GPL(cache_coherency_ops_instance_put); + +static int cache_inval_one(struct cache_coherency_ops_inst *cci, void *data) +{ + if (!cci->ops) + return -EINVAL; + + return cci->ops->wbinv(cci, data); +} + +static int cache_inval_done_one(struct cache_coherency_ops_inst *cci) +{ + if (!cci->ops) + return -EINVAL; + + if (!cci->ops->done) + return 0; + + return cci->ops->done(cci); +} + +static int cache_invalidate_memregion(phys_addr_t addr, size_t size) +{ + int ret; + struct cache_coherency_ops_inst *cci; + struct cc_inval_params params = { + .addr = addr, + .size = size, + }; + + guard(rwsem_read)(&cache_ops_instance_list_lock); + list_for_each_entry(cci, &cache_ops_instance_list, node) { + ret = cache_inval_one(cci, ¶ms); + if (ret) + return ret; + } + list_for_each_entry(cci, &cache_ops_instance_list, node) { + ret = cache_inval_done_one(cci); + if (ret) + return ret; + } + + return 0; +} + +struct cache_coherency_ops_inst * +_cache_coherency_ops_instance_alloc(const struct cache_coherency_ops *ops, + size_t size) +{ + struct cache_coherency_ops_inst *cci; + + if (!ops || !ops->wbinv) + return NULL; + + cci = kzalloc(size, GFP_KERNEL); + if (!cci) + return NULL; + + cci->ops = ops; + INIT_LIST_HEAD(&cci->node); + kref_init(&cci->kref); + + return cci; +} +EXPORT_SYMBOL_NS_GPL(_cache_coherency_ops_instance_alloc, "CACHE_COHERENCY"); + +int cache_coherency_ops_instance_register(struct cache_coherency_ops_inst *cci) +{ + guard(rwsem_write)(&cache_ops_instance_list_lock); + list_add(&cci->node, &cache_ops_instance_list); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cache_coherency_ops_instance_register, "CACHE_COHERENCY"); + +void cache_coherency_ops_instance_unregister(struct cache_coherency_ops_inst *cci) +{ + guard(rwsem_write)(&cache_ops_instance_list_lock); + list_del(&cci->node); +} +EXPORT_SYMBOL_NS_GPL(cache_coherency_ops_instance_unregister, "CACHE_COHERENCY"); + +int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len) +{ + return cache_invalidate_memregion(start, len); +} +EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, "DEVMEM"); + +/* + * Used for optimization / debug purposes only as removal can race + * + * Machines that do not support invalidation, e.g. VMs, will not have any + * operations instance to register and so this will always return false. + */ +bool cpu_cache_has_invalidate_memregion(void) +{ + guard(rwsem_read)(&cache_ops_instance_list_lock); + return !list_empty(&cache_ops_instance_list); +} +EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM"); diff --git a/lib/hweight.c b/lib/hweight.c index c94586b62551..0dfcafc3fd39 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -4,8 +4,8 @@ #include <asm/types.h> /** - * hweightN - returns the hamming weight of a N-bit word - * @x: the word to weigh + * DOC: __sw_hweightN - returns the hamming weight of a N-bit word + * @w: the word to weigh * * The Hamming Weight of a number is the total number of bits set in it. */ diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index 5f779719c3d3..40c393206b6a 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -4,13 +4,8 @@ * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn> */ -#include <asm/vector.h> #include <linux/raid/pq.h> - -static int rvv_has_vector(void) -{ - return has_vector(); -} +#include "rvv.h" static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, u8 *dq, const u8 *pbmul, diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index 89da5fc247aa..75c9dafedb28 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -9,22 +9,17 @@ * Copyright 2002-2004 H. Peter Anvin */ -#include <asm/vector.h> -#include <linux/raid/pq.h> #include "rvv.h" -#define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ - -static int rvv_has_vector(void) -{ - return has_vector(); -} +#ifdef __riscv_vector +#error "This code must be built without compiler support for vector" +#endif static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -38,8 +33,10 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ - for (d = 0; d < bytes; d += NSIZE * 1) { + for (d = 0; d < bytes; d += nsize * 1) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -47,7 +44,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v1, v0\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]) ); for (z = z0 - 1 ; z >= 0 ; z--) { @@ -71,7 +68,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v0, v0, v2\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), [x1d]"r"(0x1d) ); } @@ -86,8 +83,8 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v1, (%[wq0])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]) ); } } @@ -97,7 +94,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -111,8 +108,10 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ - for (d = 0 ; d < bytes ; d += NSIZE * 1) { + for (d = 0 ; d < bytes ; d += nsize * 1) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -120,7 +119,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v1, v0\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]) ); /* P/Q data pages */ @@ -145,7 +144,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v0, v0, v2\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), [x1d]"r"(0x1d) ); } @@ -185,8 +184,8 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, "vse8.v v3, (%[wq0])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]) ); } } @@ -195,7 +194,7 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -209,11 +208,13 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 */ - for (d = 0; d < bytes; d += NSIZE * 2) { + for (d = 0; d < bytes; d += nsize * 2) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -223,8 +224,8 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v5, v4\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]) ); for (z = z0 - 1; z >= 0; z--) { @@ -256,8 +257,8 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v4, v4, v6\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), [x1d]"r"(0x1d) ); } @@ -274,10 +275,10 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v5, (%[wq1])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]) ); } } @@ -287,7 +288,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -301,11 +302,13 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 */ - for (d = 0; d < bytes; d += NSIZE * 2) { + for (d = 0; d < bytes; d += nsize * 2) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -315,8 +318,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v5, v4\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]) ); /* P/Q data pages */ @@ -349,8 +352,8 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v4, v4, v6\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), [x1d]"r"(0x1d) ); } @@ -403,10 +406,10 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, "vse8.v v7, (%[wq1])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]) ); } } @@ -415,7 +418,7 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -429,13 +432,15 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 */ - for (d = 0; d < bytes; d += NSIZE * 4) { + for (d = 0; d < bytes; d += nsize * 4) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -449,10 +454,10 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v13, v12\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]) ); for (z = z0 - 1; z >= 0; z--) { @@ -500,10 +505,10 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v12, v12, v14\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), [x1d]"r"(0x1d) ); } @@ -524,14 +529,14 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v13, (%[wq3])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]) ); } } @@ -541,7 +546,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -555,13 +560,15 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 */ - for (d = 0; d < bytes; d += NSIZE * 4) { + for (d = 0; d < bytes; d += nsize * 4) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -575,10 +582,10 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v13, v12\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]) ); /* P/Q data pages */ @@ -627,10 +634,10 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v12, v12, v14\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), [x1d]"r"(0x1d) ); } @@ -709,14 +716,14 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, "vse8.v v15, (%[wq3])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]) ); } } @@ -725,7 +732,7 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = disks - 3; /* Highest data disk */ @@ -739,6 +746,8 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 @@ -749,7 +758,7 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 */ - for (d = 0; d < bytes; d += NSIZE * 8) { + for (d = 0; d < bytes; d += nsize * 8) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -771,14 +780,14 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** "vmv.v.v v29, v28\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), - [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), - [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), - [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), - [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]), + [wp4]"r"(&dptr[z0][d + 4 * nsize]), + [wp5]"r"(&dptr[z0][d + 5 * nsize]), + [wp6]"r"(&dptr[z0][d + 6 * nsize]), + [wp7]"r"(&dptr[z0][d + 7 * nsize]) ); for (z = z0 - 1; z >= 0; z--) { @@ -858,14 +867,14 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** "vxor.vv v28, v28, v30\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), - [wd4]"r"(&dptr[z][d + 4 * NSIZE]), - [wd5]"r"(&dptr[z][d + 5 * NSIZE]), - [wd6]"r"(&dptr[z][d + 6 * NSIZE]), - [wd7]"r"(&dptr[z][d + 7 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), + [wd4]"r"(&dptr[z][d + 4 * nsize]), + [wd5]"r"(&dptr[z][d + 5 * nsize]), + [wd6]"r"(&dptr[z][d + 6 * nsize]), + [wd7]"r"(&dptr[z][d + 7 * nsize]), [x1d]"r"(0x1d) ); } @@ -894,22 +903,22 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** "vse8.v v29, (%[wq7])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]), - [wp4]"r"(&p[d + NSIZE * 4]), - [wq4]"r"(&q[d + NSIZE * 4]), - [wp5]"r"(&p[d + NSIZE * 5]), - [wq5]"r"(&q[d + NSIZE * 5]), - [wp6]"r"(&p[d + NSIZE * 6]), - [wq6]"r"(&q[d + NSIZE * 6]), - [wp7]"r"(&p[d + NSIZE * 7]), - [wq7]"r"(&q[d + NSIZE * 7]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]), + [wp4]"r"(&p[d + nsize * 4]), + [wq4]"r"(&q[d + nsize * 4]), + [wp5]"r"(&p[d + nsize * 5]), + [wq5]"r"(&q[d + nsize * 5]), + [wp6]"r"(&p[d + nsize * 6]), + [wq6]"r"(&q[d + nsize * 6]), + [wp7]"r"(&p[d + nsize * 7]), + [wq7]"r"(&q[d + nsize * 7]) ); } } @@ -919,7 +928,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long vl, d; + unsigned long vl, d, nsize; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -933,6 +942,8 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, : "=&r" (vl) ); + nsize = vl; + /* * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 @@ -943,7 +954,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 */ - for (d = 0; d < bytes; d += NSIZE * 8) { + for (d = 0; d < bytes; d += nsize * 8) { /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ asm volatile (".option push\n" ".option arch,+v\n" @@ -965,14 +976,14 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, "vmv.v.v v29, v28\n" ".option pop\n" : : - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), - [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), - [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), - [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), - [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) + [wp0]"r"(&dptr[z0][d + 0 * nsize]), + [wp1]"r"(&dptr[z0][d + 1 * nsize]), + [wp2]"r"(&dptr[z0][d + 2 * nsize]), + [wp3]"r"(&dptr[z0][d + 3 * nsize]), + [wp4]"r"(&dptr[z0][d + 4 * nsize]), + [wp5]"r"(&dptr[z0][d + 5 * nsize]), + [wp6]"r"(&dptr[z0][d + 6 * nsize]), + [wp7]"r"(&dptr[z0][d + 7 * nsize]) ); /* P/Q data pages */ @@ -1053,14 +1064,14 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, "vxor.vv v28, v28, v30\n" ".option pop\n" : : - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), - [wd4]"r"(&dptr[z][d + 4 * NSIZE]), - [wd5]"r"(&dptr[z][d + 5 * NSIZE]), - [wd6]"r"(&dptr[z][d + 6 * NSIZE]), - [wd7]"r"(&dptr[z][d + 7 * NSIZE]), + [wd0]"r"(&dptr[z][d + 0 * nsize]), + [wd1]"r"(&dptr[z][d + 1 * nsize]), + [wd2]"r"(&dptr[z][d + 2 * nsize]), + [wd3]"r"(&dptr[z][d + 3 * nsize]), + [wd4]"r"(&dptr[z][d + 4 * nsize]), + [wd5]"r"(&dptr[z][d + 5 * nsize]), + [wd6]"r"(&dptr[z][d + 6 * nsize]), + [wd7]"r"(&dptr[z][d + 7 * nsize]), [x1d]"r"(0x1d) ); } @@ -1191,22 +1202,22 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, "vse8.v v31, (%[wq7])\n" ".option pop\n" : : - [wp0]"r"(&p[d + NSIZE * 0]), - [wq0]"r"(&q[d + NSIZE * 0]), - [wp1]"r"(&p[d + NSIZE * 1]), - [wq1]"r"(&q[d + NSIZE * 1]), - [wp2]"r"(&p[d + NSIZE * 2]), - [wq2]"r"(&q[d + NSIZE * 2]), - [wp3]"r"(&p[d + NSIZE * 3]), - [wq3]"r"(&q[d + NSIZE * 3]), - [wp4]"r"(&p[d + NSIZE * 4]), - [wq4]"r"(&q[d + NSIZE * 4]), - [wp5]"r"(&p[d + NSIZE * 5]), - [wq5]"r"(&q[d + NSIZE * 5]), - [wp6]"r"(&p[d + NSIZE * 6]), - [wq6]"r"(&q[d + NSIZE * 6]), - [wp7]"r"(&p[d + NSIZE * 7]), - [wq7]"r"(&q[d + NSIZE * 7]) + [wp0]"r"(&p[d + nsize * 0]), + [wq0]"r"(&q[d + nsize * 0]), + [wp1]"r"(&p[d + nsize * 1]), + [wq1]"r"(&q[d + nsize * 1]), + [wp2]"r"(&p[d + nsize * 2]), + [wq2]"r"(&q[d + nsize * 2]), + [wp3]"r"(&p[d + nsize * 3]), + [wq3]"r"(&q[d + nsize * 3]), + [wp4]"r"(&p[d + nsize * 4]), + [wq4]"r"(&q[d + nsize * 4]), + [wp5]"r"(&p[d + nsize * 5]), + [wq5]"r"(&q[d + nsize * 5]), + [wp6]"r"(&p[d + nsize * 6]), + [wq6]"r"(&q[d + nsize * 6]), + [wp7]"r"(&p[d + nsize * 7]), + [wq7]"r"(&q[d + nsize * 7]) ); } } diff --git a/lib/raid6/rvv.h b/lib/raid6/rvv.h index 94044a1b707b..6d0708a2c8a4 100644 --- a/lib/raid6/rvv.h +++ b/lib/raid6/rvv.h @@ -7,6 +7,23 @@ * Definitions for RISC-V RAID-6 code */ +#ifdef __KERNEL__ +#include <asm/vector.h> +#else +#define kernel_vector_begin() +#define kernel_vector_end() +#include <sys/auxv.h> +#include <asm/hwcap.h> +#define has_vector() (getauxval(AT_HWCAP) & COMPAT_HWCAP_ISA_V) +#endif + +#include <linux/raid/pq.h> + +static int rvv_has_vector(void) +{ + return has_vector(); +} + #define RAID6_RVV_WRAPPER(_n) \ static void raid6_rvv ## _n ## _gen_syndrome(int disks, \ size_t bytes, void **ptrs) \ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 8f2dd2210ba8..09bbe2b14cce 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -35,6 +35,11 @@ ifeq ($(ARCH),aarch64) HAS_NEON = yes endif +ifeq ($(findstring riscv,$(ARCH)),riscv) + CFLAGS += -I../../../arch/riscv/include -DCONFIG_RISCV=1 + HAS_RVV = yes +endif + ifeq ($(findstring ppc,$(ARCH)),ppc) CFLAGS += -I../../../arch/powerpc/include HAS_ALTIVEC := $(shell printf '$(pound)include <altivec.h>\nvector int a;\n' |\ @@ -63,6 +68,9 @@ else ifeq ($(HAS_ALTIVEC),yes) vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o else ifeq ($(ARCH),loongarch64) OBJS += loongarch_simd.o recov_loongarch_simd.o +else ifeq ($(HAS_RVV),yes) + OBJS += rvv.o recov_rvv.o + CFLAGS += -DCONFIG_RISCV_ISA_V=1 endif .c.o: |
