diff options
author | Chunyan Zhang <zhangchunyan@iscas.ac.cn> | 2025-07-18 15:27:08 +0800 |
---|---|---|
committer | Paul Walmsley <pjw@kernel.org> | 2025-09-16 16:43:27 -0600 |
commit | 2dfb75cd5695fa9db2ad90d1339330eda7a0239d (patch) | |
tree | b99fefd45b7edbff99e9fb806770f881f61c86e1 /lib | |
parent | f8a03516a530cc36bc9015c84ba7540ee3e8d7bd (diff) |
raid6: riscv: replace one load with a move to speed up the caculation
Since wp$$==wq$$, it doesn't need to load the same data twice, use move
instruction to replace one of the loads to let the program run faster.
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
Link: https://lore.kernel.org/r/20250718072711.3865118-3-zhangchunyan@iscas.ac.cn
Signed-off-by: Paul Walmsley <pjw@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/raid6/rvv.c | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index b193ea176d5d..89da5fc247aa 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -44,7 +44,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) @@ -117,7 +117,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) @@ -218,9 +218,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -310,9 +310,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -440,13 +440,13 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -566,13 +566,13 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -754,21 +754,21 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" "vle8.v v16, (%[wp4])\n" - "vle8.v v17, (%[wp4])\n" + "vmv.v.v v17, v16\n" "vle8.v v20, (%[wp5])\n" - "vle8.v v21, (%[wp5])\n" + "vmv.v.v v21, v20\n" "vle8.v v24, (%[wp6])\n" - "vle8.v v25, (%[wp6])\n" + "vmv.v.v v25, v24\n" "vle8.v v28, (%[wp7])\n" - "vle8.v v29, (%[wp7])\n" + "vmv.v.v v29, v28\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -948,21 +948,21 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" "vle8.v v16, (%[wp4])\n" - "vle8.v v17, (%[wp4])\n" + "vmv.v.v v17, v16\n" "vle8.v v20, (%[wp5])\n" - "vle8.v v21, (%[wp5])\n" + "vmv.v.v v21, v20\n" "vle8.v v24, (%[wp6])\n" - "vle8.v v25, (%[wp6])\n" + "vmv.v.v v25, v24\n" "vle8.v v28, (%[wp7])\n" - "vle8.v v29, (%[wp7])\n" + "vmv.v.v v29, v28\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), |