summaryrefslogtreecommitdiff
path: root/arch/riscv/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/lib')
-rw-r--r--arch/riscv/lib/Makefile6
-rw-r--r--arch/riscv/lib/crc-clmul-consts.h122
-rw-r--r--arch/riscv/lib/crc-clmul-template.h265
-rw-r--r--arch/riscv/lib/crc-clmul.h23
-rw-r--r--arch/riscv/lib/crc-t10dif.c24
-rw-r--r--arch/riscv/lib/crc16_msb.c18
-rw-r--r--arch/riscv/lib/crc32-riscv.c311
-rw-r--r--arch/riscv/lib/crc32.c53
-rw-r--r--arch/riscv/lib/crc32_lsb.c18
-rw-r--r--arch/riscv/lib/crc32_msb.c18
-rw-r--r--arch/riscv/lib/crc64.c34
-rw-r--r--arch/riscv/lib/crc64_lsb.c18
-rw-r--r--arch/riscv/lib/crc64_msb.c18
-rw-r--r--arch/riscv/lib/crypto/Kconfig16
-rw-r--r--arch/riscv/lib/crypto/Makefile7
-rw-r--r--arch/riscv/lib/crypto/chacha-riscv64-glue.c75
-rw-r--r--arch/riscv/lib/crypto/chacha-riscv64-zvkb.S297
-rw-r--r--arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S225
-rw-r--r--arch/riscv/lib/crypto/sha256.c67
-rw-r--r--arch/riscv/lib/csum.c21
-rw-r--r--arch/riscv/lib/strcmp.S5
-rw-r--r--arch/riscv/lib/strlen.S5
-rw-r--r--arch/riscv/lib/strncmp.S5
23 files changed, 1316 insertions, 335 deletions
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 79368a895fee..0baec92d2f55 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+obj-y += crypto/
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
@@ -16,6 +17,11 @@ lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
+crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o
+obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o
+crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o
+obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
+crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h
new file mode 100644
index 000000000000..8d73449235ef
--- /dev/null
+++ b/arch/riscv/lib/crc-clmul-consts.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CRC constants generated by:
+ *
+ * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
+ *
+ * Do not edit manually.
+ */
+
+struct crc_clmul_consts {
+ unsigned long fold_across_2_longs_const_hi;
+ unsigned long fold_across_2_longs_const_lo;
+ unsigned long barrett_reduction_const_1;
+ unsigned long barrett_reduction_const_2;
+};
+
+/*
+ * Constants generated for most-significant-bit-first CRC-16 using
+ * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+ .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
+ .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
+ .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
+ .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
+#else
+ .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
+ .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
+ .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
+ .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
+#endif
+};
+
+/*
+ * Constants generated for most-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
+ * x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+ .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
+ .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
+ .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
+ .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
+#else
+ .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
+ .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
+ .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
+ .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
+#endif
+};
+
+/*
+ * Constants generated for least-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
+ * x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+ .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
+ .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
+ .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
+ .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
+#else
+ .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
+ .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
+ .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
+ .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
+#endif
+};
+
+/*
+ * Constants generated for least-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
+ * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
+ */
+static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+ .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
+ .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
+ .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
+ .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
+#else
+ .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
+ .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
+ .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
+ .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
+#endif
+};
+
+/*
+ * Constants generated for most-significant-bit-first CRC-64 using
+ * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x^1 + x^0
+ */
+#ifdef CONFIG_64BIT
+static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
+ .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
+ .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
+ .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
+ .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
+};
+#endif
+
+/*
+ * Constants generated for least-significant-bit-first CRC-64 using
+ * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
+ * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
+ * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
+ * x^4 + x^3 + x^0
+ */
+#ifdef CONFIG_64BIT
+static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
+ .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
+ .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
+ .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
+ .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
+};
+#endif
diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h
new file mode 100644
index 000000000000..77187e7f1762
--- /dev/null
+++ b/arch/riscv/lib/crc-clmul-template.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2025 Google LLC */
+
+/*
+ * This file is a "template" that generates a CRC function optimized using the
+ * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this
+ * file must define the following parameters to specify the type of CRC:
+ *
+ * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
+ * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
+ * mapping between bits and polynomial coefficients
+ * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected
+ * mapping between bits and polynomial coefficients
+ */
+
+#include <asm/byteorder.h>
+#include <linux/minmax.h>
+
+#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */
+
+static inline unsigned long clmul(unsigned long a, unsigned long b)
+{
+ unsigned long res;
+
+ asm(".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %1, %2\n"
+ ".option pop\n"
+ : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+static inline unsigned long clmulh(unsigned long a, unsigned long b)
+{
+ unsigned long res;
+
+ asm(".option push\n"
+ ".option arch,+zbc\n"
+ "clmulh %0, %1, %2\n"
+ ".option pop\n"
+ : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+static inline unsigned long clmulr(unsigned long a, unsigned long b)
+{
+ unsigned long res;
+
+ asm(".option push\n"
+ ".option arch,+zbc\n"
+ "clmulr %0, %1, %2\n"
+ ".option pop\n"
+ : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/*
+ * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
+ * polynomial whose bit order matches the CRC's bit order.
+ */
+#ifdef CONFIG_64BIT
+# if LSB_CRC
+# define crc_load_long(x) le64_to_cpup(x)
+# else
+# define crc_load_long(x) be64_to_cpup(x)
+# endif
+#else
+# if LSB_CRC
+# define crc_load_long(x) le32_to_cpup(x)
+# else
+# define crc_load_long(x) be32_to_cpup(x)
+# endif
+#endif
+
+/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
+static inline unsigned long
+crc_clmul_prep(crc_t crc, unsigned long msgpoly)
+{
+#if LSB_CRC
+ return msgpoly ^ crc;
+#else
+ return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
+#endif
+}
+
+/*
+ * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
+ * modulo the generator polynomial G. This gives the CRC of @msgpoly.
+ */
+static inline crc_t
+crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
+{
+ unsigned long tmp;
+
+ /*
+ * First step of Barrett reduction with integrated multiplication by
+ * x^n: calculate floor((msgpoly * x^n) / G). This is the value by
+ * which G needs to be multiplied to cancel out the x^n and higher terms
+ * of msgpoly * x^n. Do it using the following formula:
+ *
+ * msb-first:
+ * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
+ * lsb-first:
+ * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
+ *
+ * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
+ * which fits a long exactly. Using any lower power of x there would
+ * not carry enough precision through the calculation, while using any
+ * higher power of x would require extra instructions to handle a wider
+ * multiplication. In the msb-first case, using this power of x results
+ * in needing a floored division by x^(BITS_PER_LONG-1), which matches
+ * what clmulr produces. In the lsb-first case, a factor of x gets
+ * implicitly introduced by each carryless multiplication (shown as
+ * '* x' above), and the floored division instead needs to be by
+ * x^BITS_PER_LONG which matches what clmul produces.
+ */
+#if LSB_CRC
+ tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
+#else
+ tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
+#endif
+
+ /*
+ * Second step of Barrett reduction:
+ *
+ * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
+ *
+ * This reduces (msgpoly * x^n) modulo G by adding the appropriate
+ * multiple of G to it. The result uses only the x^0..x^(n-1) terms.
+ * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
+ * terms in the first place, it is more efficient to do the equivalent:
+ *
+ * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
+ *
+ * In the lsb-first case further modify it to the following which avoids
+ * a shift, as the crc ends up in the physically low n bits from clmulr:
+ *
+ * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
+ * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
+ *
+ * barrett_reduction_const_2 contains the constant multiplier (G - x^n)
+ * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The
+ * cast of the result to crc_t is essential, as it applies the mod x^n!
+ */
+#if LSB_CRC
+ return clmulr(tmp, consts->barrett_reduction_const_2);
+#else
+ return clmul(tmp, consts->barrett_reduction_const_2);
+#endif
+}
+
+/* Update @crc with the data from @msgpoly. */
+static inline crc_t
+crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
+ const struct crc_clmul_consts *consts)
+{
+ return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
+}
+
+/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
+static inline crc_t
+crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ unsigned long msgpoly;
+ size_t i;
+
+#if LSB_CRC
+ msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
+ for (i = 1; i < len; i++)
+ msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
+#else
+ msgpoly = p[0];
+ for (i = 1; i < len; i++)
+ msgpoly = (msgpoly << 8) ^ p[i];
+#endif
+
+ if (len >= sizeof(crc_t)) {
+ #if LSB_CRC
+ msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
+ #else
+ msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
+ #endif
+ return crc_clmul_long(msgpoly, consts);
+ }
+#if LSB_CRC
+ msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
+ return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
+#else
+ msgpoly ^= crc >> (CRC_BITS - 8*len);
+ return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
+#endif
+}
+
+static inline crc_t
+crc_clmul(crc_t crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ size_t align;
+
+ /* This implementation assumes that the CRC fits in an unsigned long. */
+ BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
+
+ /* If the buffer is not long-aligned, align it. */
+ align = (unsigned long)p % sizeof(unsigned long);
+ if (align && len) {
+ align = min(sizeof(unsigned long) - align, len);
+ crc = crc_clmul_update_partial(crc, p, align, consts);
+ p += align;
+ len -= align;
+ }
+
+ if (len >= 4 * sizeof(unsigned long)) {
+ unsigned long m0, m1;
+
+ m0 = crc_clmul_prep(crc, crc_load_long(p));
+ m1 = crc_load_long(p + sizeof(unsigned long));
+ p += 2 * sizeof(unsigned long);
+ len -= 2 * sizeof(unsigned long);
+ /*
+ * Main loop. Each iteration starts with a message polynomial
+ * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
+ * more longs of data to form x^(3*BITS_PER_LONG)*m0 +
+ * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
+ * "folds" that back into a congruent (modulo G) value that uses
+ * just m0 and m1 again. This is done by multiplying m0 by the
+ * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
+ * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
+ * adding the results to m2 and m3 as appropriate. Each such
+ * multiplication produces a result twice the length of a long,
+ * which in RISC-V is two instructions clmul and clmulh.
+ *
+ * This could be changed to fold across more than 2 longs at a
+ * time if there is a CPU that can take advantage of it.
+ */
+ do {
+ unsigned long p0, p1, p2, p3;
+
+ p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
+ p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
+ p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
+ p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
+ m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
+ m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
+ crc_load_long(p + sizeof(unsigned long));
+
+ p += 2 * sizeof(unsigned long);
+ len -= 2 * sizeof(unsigned long);
+ } while (len >= 2 * sizeof(unsigned long));
+
+ crc = crc_clmul_long(m0, consts);
+ crc = crc_clmul_update_long(crc, m1, consts);
+ }
+
+ while (len >= sizeof(unsigned long)) {
+ crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
+ p += sizeof(unsigned long);
+ len -= sizeof(unsigned long);
+ }
+
+ if (len)
+ crc = crc_clmul_update_partial(crc, p, len, consts);
+
+ return crc;
+}
diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h
new file mode 100644
index 000000000000..dd1736245815
--- /dev/null
+++ b/arch/riscv/lib/crc-clmul.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2025 Google LLC */
+
+#ifndef _RISCV_CRC_CLMUL_H
+#define _RISCV_CRC_CLMUL_H
+
+#include <linux/types.h>
+#include "crc-clmul-consts.h"
+
+u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts);
+u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts);
+u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts);
+#ifdef CONFIG_64BIT
+u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts);
+u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts);
+#endif
+
+#endif /* _RISCV_CRC_CLMUL_H */
diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c
new file mode 100644
index 000000000000..e6b0051ccd86
--- /dev/null
+++ b/arch/riscv/lib/crc-t10dif.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized CRC-T10DIF function
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <linux/crc-t10dif.h>
+#include <linux/module.h>
+
+#include "crc-clmul.h"
+
+u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
+ return crc_t10dif_generic(crc, p, len);
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c
new file mode 100644
index 000000000000..554d295e95f5
--- /dev/null
+++ b/arch/riscv/lib/crc16_msb.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized most-significant-bit-first CRC16
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u16 crc_t;
+#define LSB_CRC 0
+#include "crc-clmul-template.h"
+
+u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ return crc_clmul(crc, p, len, consts);
+}
diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c
deleted file mode 100644
index 53d56ab422c7..000000000000
--- a/arch/riscv/lib/crc32-riscv.c
+++ /dev/null
@@ -1,311 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC32 implementation with Zbc extension.
- *
- * Copyright (C) 2024 Intel Corporation
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <asm/byteorder.h>
-
-#include <linux/types.h>
-#include <linux/minmax.h>
-#include <linux/crc32poly.h>
-#include <linux/crc32.h>
-#include <linux/byteorder/generic.h>
-#include <linux/module.h>
-
-/*
- * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
- * better understanding of how this math works.
- *
- * let "+" denotes polynomial add (XOR)
- * let "-" denotes polynomial sub (XOR)
- * let "*" denotes polynomial multiplication
- * let "/" denotes polynomial floor division
- * let "S" denotes source data, XLEN bit wide
- * let "P" denotes CRC32 polynomial
- * let "T" denotes 2^(XLEN+32)
- * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
- *
- * crc32(S, P)
- * => S * (2^32) - S * (2^32) / P * P
- * => lowest 32 bits of: S * (2^32) / P * P
- * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
- * => lowest 32 bits of: S * (2^32) * quotient / T * P
- * => lowest 32 bits of: S * quotient / 2^XLEN * P
- * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
- * => clmul_low_part(clmul_high_part(S, QT) + S, P)
- *
- * In terms of below implementations, the BE case is more intuitive, since the
- * higher order bit sits at more significant position.
- */
-
-#if __riscv_xlen == 64
-/* Slide by XLEN bits per iteration */
-# define STEP_ORDER 3
-
-/* Each below polynomial quotient has an implicit bit for 2^XLEN */
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
-# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
-# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
- * the same as the bit-reversed version of CRC32_POLY_QT_LE
- */
-# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
-
-static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
-{
- return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
-}
-
-static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
-{
- u32 crc;
-
- /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
- asm volatile (".option push\n"
- ".option arch,+zbc\n"
- "clmul %0, %1, %2\n"
- "slli %0, %0, 1\n"
- "xor %0, %0, %1\n"
- "clmulr %0, %0, %3\n"
- "srli %0, %0, 32\n"
- ".option pop\n"
- : "=&r" (crc)
- : "r" (s),
- "r" (poly_qt),
- "r" ((u64)poly << 32)
- :);
- return crc;
-}
-
-static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
-{
- return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
-}
-
-#elif __riscv_xlen == 32
-# define STEP_ORDER 2
-/* Each quotient should match the upper half of its analog in RV64 */
-# define CRC32_POLY_QT_LE 0xfb808b20
-# define CRC32C_POLY_QT_LE 0x6f5389f8
-# define CRC32_POLY_QT_BE 0x04d101df
-
-static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
-{
- return crc ^ (__force u32)__cpu_to_le32(*ptr);
-}
-
-static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
-{
- u32 crc;
-
- /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
- asm volatile (".option push\n"
- ".option arch,+zbc\n"
- "clmul %0, %1, %2\n"
- "slli %0, %0, 1\n"
- "xor %0, %0, %1\n"
- "clmulr %0, %0, %3\n"
- ".option pop\n"
- : "=&r" (crc)
- : "r" (s),
- "r" (poly_qt),
- "r" (poly)
- :);
- return crc;
-}
-
-static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
-{
- return crc ^ (__force u32)__cpu_to_be32(*ptr);
-}
-
-#else
-# error "Unexpected __riscv_xlen"
-#endif
-
-static inline u32 crc32_be_zbc(unsigned long s)
-{
- u32 crc;
-
- asm volatile (".option push\n"
- ".option arch,+zbc\n"
- "clmulh %0, %1, %2\n"
- "xor %0, %0, %1\n"
- "clmul %0, %0, %3\n"
- ".option pop\n"
- : "=&r" (crc)
- : "r" (s),
- "r" (CRC32_POLY_QT_BE),
- "r" (CRC32_POLY_BE)
- :);
- return crc;
-}
-
-#define STEP (1 << STEP_ORDER)
-#define OFFSET_MASK (STEP - 1)
-
-typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
-
-static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
- size_t len, u32 poly,
- unsigned long poly_qt)
-{
- size_t bits = len * 8;
- unsigned long s = 0;
- u32 crc_low = 0;
-
- for (int i = 0; i < len; i++)
- s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
-
- s ^= (unsigned long)crc << (__riscv_xlen - bits);
- if (__riscv_xlen == 32 || len < sizeof(u32))
- crc_low = crc >> bits;
-
- crc = crc32_le_zbc(s, poly, poly_qt);
- crc ^= crc_low;
-
- return crc;
-}
-
-static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
- size_t len, u32 poly,
- unsigned long poly_qt,
- fallback crc_fb)
-{
- size_t offset, head_len, tail_len;
- unsigned long const *p_ul;
- unsigned long s;
-
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBC, 1)
- : : : : legacy);
-
- /* Handle the unaligned head. */
- offset = (unsigned long)p & OFFSET_MASK;
- if (offset && len) {
- head_len = min(STEP - offset, len);
- crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
- p += head_len;
- len -= head_len;
- }
-
- tail_len = len & OFFSET_MASK;
- len = len >> STEP_ORDER;
- p_ul = (unsigned long const *)p;
-
- for (int i = 0; i < len; i++) {
- s = crc32_le_prep(crc, p_ul);
- crc = crc32_le_zbc(s, poly, poly_qt);
- p_ul++;
- }
-
- /* Handle the tail bytes. */
- p = (unsigned char const *)p_ul;
- if (tail_len)
- crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
-
- return crc;
-
-legacy:
- return crc_fb(crc, p, len);
-}
-
-u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
- crc32_le_base);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
- CRC32C_POLY_QT_LE, crc32c_le_base);
-}
-EXPORT_SYMBOL(crc32c_le_arch);
-
-static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
- size_t len)
-{
- size_t bits = len * 8;
- unsigned long s = 0;
- u32 crc_low = 0;
-
- s = 0;
- for (int i = 0; i < len; i++)
- s = *p++ | (s << 8);
-
- if (__riscv_xlen == 32 || len < sizeof(u32)) {
- s ^= crc >> (32 - bits);
- crc_low = crc << bits;
- } else {
- s ^= (unsigned long)crc << (bits - 32);
- }
-
- crc = crc32_be_zbc(s);
- crc ^= crc_low;
-
- return crc;
-}
-
-u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- size_t offset, head_len, tail_len;
- unsigned long const *p_ul;
- unsigned long s;
-
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBC, 1)
- : : : : legacy);
-
- /* Handle the unaligned head. */
- offset = (unsigned long)p & OFFSET_MASK;
- if (offset && len) {
- head_len = min(STEP - offset, len);
- crc = crc32_be_unaligned(crc, p, head_len);
- p += head_len;
- len -= head_len;
- }
-
- tail_len = len & OFFSET_MASK;
- len = len >> STEP_ORDER;
- p_ul = (unsigned long const *)p;
-
- for (int i = 0; i < len; i++) {
- s = crc32_be_prep(crc, p_ul);
- crc = crc32_be_zbc(s);
- p_ul++;
- }
-
- /* Handle the tail bytes. */
- p = (unsigned char const *)p_ul;
- if (tail_len)
- crc = crc32_be_unaligned(crc, p, tail_len);
-
- return crc;
-
-legacy:
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-u32 crc32_optimizations(void)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return CRC32_LE_OPTIMIZATION |
- CRC32_BE_OPTIMIZATION |
- CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c
new file mode 100644
index 000000000000..a3188b7d9c40
--- /dev/null
+++ b/arch/riscv/lib/crc32.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized CRC32 functions
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <linux/crc32.h>
+#include <linux/module.h>
+
+#include "crc-clmul.h"
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return crc32_lsb_clmul(crc, p, len,
+ &crc32_lsb_0xedb88320_consts);
+ return crc32_le_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return crc32_msb_clmul(crc, p, len,
+ &crc32_msb_0x04c11db7_consts);
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return crc32_lsb_clmul(crc, p, len,
+ &crc32_lsb_0x82f63b78_consts);
+ return crc32c_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32c_arch);
+
+u32 crc32_optimizations(void)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return CRC32_LE_OPTIMIZATION |
+ CRC32_BE_OPTIMIZATION |
+ CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_DESCRIPTION("RISC-V optimized CRC32 functions");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c
new file mode 100644
index 000000000000..72fd67e7470c
--- /dev/null
+++ b/arch/riscv/lib/crc32_lsb.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized least-significant-bit-first CRC32
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u32 crc_t;
+#define LSB_CRC 1
+#include "crc-clmul-template.h"
+
+u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ return crc_clmul(crc, p, len, consts);
+}
diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c
new file mode 100644
index 000000000000..fdbeaccc369f
--- /dev/null
+++ b/arch/riscv/lib/crc32_msb.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized most-significant-bit-first CRC32
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u32 crc_t;
+#define LSB_CRC 0
+#include "crc-clmul-template.h"
+
+u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ return crc_clmul(crc, p, len, consts);
+}
diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c
new file mode 100644
index 000000000000..f0015a27836a
--- /dev/null
+++ b/arch/riscv/lib/crc64.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized CRC64 functions
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <linux/crc64.h>
+#include <linux/module.h>
+
+#include "crc-clmul.h"
+
+u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return crc64_msb_clmul(crc, p, len,
+ &crc64_msb_0x42f0e1eba9ea3693_consts);
+ return crc64_be_generic(crc, p, len);
+}
+EXPORT_SYMBOL(crc64_be_arch);
+
+u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return crc64_lsb_clmul(crc, p, len,
+ &crc64_lsb_0x9a6c9329ac4bc9b5_consts);
+ return crc64_nvme_generic(crc, p, len);
+}
+EXPORT_SYMBOL(crc64_nvme_arch);
+
+MODULE_DESCRIPTION("RISC-V optimized CRC64 functions");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c
new file mode 100644
index 000000000000..c5371bb85d90
--- /dev/null
+++ b/arch/riscv/lib/crc64_lsb.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized least-significant-bit-first CRC64
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u64 crc_t;
+#define LSB_CRC 1
+#include "crc-clmul-template.h"
+
+u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ return crc_clmul(crc, p, len, consts);
+}
diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c
new file mode 100644
index 000000000000..1925d1dbe225
--- /dev/null
+++ b/arch/riscv/lib/crc64_msb.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized most-significant-bit-first CRC64
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u64 crc_t;
+#define LSB_CRC 0
+#include "crc-clmul-template.h"
+
+u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
+ const struct crc_clmul_consts *consts)
+{
+ return crc_clmul(crc, p, len, consts);
+}
diff --git a/arch/riscv/lib/crypto/Kconfig b/arch/riscv/lib/crypto/Kconfig
new file mode 100644
index 000000000000..47c99ea97ce2
--- /dev/null
+++ b/arch/riscv/lib/crypto/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_RISCV64
+ tristate
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ default CRYPTO_LIB_CHACHA
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
+
+config CRYPTO_SHA256_RISCV64
+ tristate
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ default CRYPTO_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256
+ select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
+ select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/riscv/lib/crypto/Makefile b/arch/riscv/lib/crypto/Makefile
new file mode 100644
index 000000000000..b7cb877a2c07
--- /dev/null
+++ b/arch/riscv/lib/crypto/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
+chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
+sha256-riscv64-y := sha256.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-glue.c b/arch/riscv/lib/crypto/chacha-riscv64-glue.c
new file mode 100644
index 000000000000..8c3f11d79be3
--- /dev/null
+++ b/arch/riscv/lib/crypto/chacha-riscv64-glue.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ChaCha stream cipher (RISC-V optimized)
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
+
+asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
+ size_t nblocks, int nrounds);
+
+void hchacha_block_arch(const struct chacha_state *state,
+ u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+ hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 block_buffer[CHACHA_BLOCK_SIZE];
+ unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
+ unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
+
+ if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_vector_begin();
+ if (full_blocks) {
+ chacha_zvkb(state, src, dst, full_blocks, nrounds);
+ src += full_blocks * CHACHA_BLOCK_SIZE;
+ dst += full_blocks * CHACHA_BLOCK_SIZE;
+ }
+ if (tail_bytes) {
+ memcpy(block_buffer, src, tail_bytes);
+ chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
+ memcpy(dst, block_buffer, tail_bytes);
+ }
+ kernel_vector_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return static_key_enabled(&use_zvkb);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init riscv64_chacha_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ static_branch_enable(&use_zvkb);
+ return 0;
+}
+subsys_initcall(riscv64_chacha_mod_init);
+
+static void __exit riscv64_chacha_mod_exit(void)
+{
+}
+module_exit(riscv64_chacha_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
new file mode 100644
index 000000000000..b777d0b4e379
--- /dev/null
+++ b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkb
+
+#define STATEP a0
+#define INP a1
+#define OUTP a2
+#define NBLOCKS a3
+#define NROUNDS a4
+
+#define CONSTS0 a5
+#define CONSTS1 a6
+#define CONSTS2 a7
+#define CONSTS3 t0
+#define TMP t1
+#define VL t2
+#define STRIDE t3
+#define ROUND_CTR t4
+#define KEY0 s0
+#define KEY1 s1
+#define KEY2 s2
+#define KEY3 s3
+#define KEY4 s4
+#define KEY5 s5
+#define KEY6 s6
+#define KEY7 s7
+#define COUNTER s8
+#define NONCE0 s9
+#define NONCE1 s10
+#define NONCE2 s11
+
+.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
+ a2, b2, c2, d2, a3, b3, c3, d3
+ // a += b; d ^= a; d = rol(d, 16);
+ vadd.vv \a0, \a0, \b0
+ vadd.vv \a1, \a1, \b1
+ vadd.vv \a2, \a2, \b2
+ vadd.vv \a3, \a3, \b3
+ vxor.vv \d0, \d0, \a0
+ vxor.vv \d1, \d1, \a1
+ vxor.vv \d2, \d2, \a2
+ vxor.vv \d3, \d3, \a3
+ vror.vi \d0, \d0, 32 - 16
+ vror.vi \d1, \d1, 32 - 16
+ vror.vi \d2, \d2, 32 - 16
+ vror.vi \d3, \d3, 32 - 16
+
+ // c += d; b ^= c; b = rol(b, 12);
+ vadd.vv \c0, \c0, \d0
+ vadd.vv \c1, \c1, \d1
+ vadd.vv \c2, \c2, \d2
+ vadd.vv \c3, \c3, \d3
+ vxor.vv \b0, \b0, \c0
+ vxor.vv \b1, \b1, \c1
+ vxor.vv \b2, \b2, \c2
+ vxor.vv \b3, \b3, \c3
+ vror.vi \b0, \b0, 32 - 12
+ vror.vi \b1, \b1, 32 - 12
+ vror.vi \b2, \b2, 32 - 12
+ vror.vi \b3, \b3, 32 - 12
+
+ // a += b; d ^= a; d = rol(d, 8);
+ vadd.vv \a0, \a0, \b0
+ vadd.vv \a1, \a1, \b1
+ vadd.vv \a2, \a2, \b2
+ vadd.vv \a3, \a3, \b3
+ vxor.vv \d0, \d0, \a0
+ vxor.vv \d1, \d1, \a1
+ vxor.vv \d2, \d2, \a2
+ vxor.vv \d3, \d3, \a3
+ vror.vi \d0, \d0, 32 - 8
+ vror.vi \d1, \d1, 32 - 8
+ vror.vi \d2, \d2, 32 - 8
+ vror.vi \d3, \d3, 32 - 8
+
+ // c += d; b ^= c; b = rol(b, 7);
+ vadd.vv \c0, \c0, \d0
+ vadd.vv \c1, \c1, \d1
+ vadd.vv \c2, \c2, \d2
+ vadd.vv \c3, \c3, \d3
+ vxor.vv \b0, \b0, \c0
+ vxor.vv \b1, \b1, \c1
+ vxor.vv \b2, \b2, \c2
+ vxor.vv \b3, \b3, \c3
+ vror.vi \b0, \b0, 32 - 7
+ vror.vi \b1, \b1, 32 - 7
+ vror.vi \b2, \b2, 32 - 7
+ vror.vi \b3, \b3, 32 - 7
+.endm
+
+// void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
+// size_t nblocks, int nrounds);
+//
+// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
+//
+// |state| gives the ChaCha state matrix, including the 32-bit counter in
+// state->x[12] following the RFC7539 convention; note that this differs from
+// the original Salsa20 paper which uses a 64-bit counter in state->x[12..13].
+// The updated 32-bit counter is written back to state->x[12] before returning.
+SYM_FUNC_START(chacha_zvkb)
+ addi sp, sp, -96
+ sd s0, 0(sp)
+ sd s1, 8(sp)
+ sd s2, 16(sp)
+ sd s3, 24(sp)
+ sd s4, 32(sp)
+ sd s5, 40(sp)
+ sd s6, 48(sp)
+ sd s7, 56(sp)
+ sd s8, 64(sp)
+ sd s9, 72(sp)
+ sd s10, 80(sp)
+ sd s11, 88(sp)
+
+ li STRIDE, 64
+
+ // Set up the initial state matrix in scalar registers.
+ lw CONSTS0, 0(STATEP)
+ lw CONSTS1, 4(STATEP)
+ lw CONSTS2, 8(STATEP)
+ lw CONSTS3, 12(STATEP)
+ lw KEY0, 16(STATEP)
+ lw KEY1, 20(STATEP)
+ lw KEY2, 24(STATEP)
+ lw KEY3, 28(STATEP)
+ lw KEY4, 32(STATEP)
+ lw KEY5, 36(STATEP)
+ lw KEY6, 40(STATEP)
+ lw KEY7, 44(STATEP)
+ lw COUNTER, 48(STATEP)
+ lw NONCE0, 52(STATEP)
+ lw NONCE1, 56(STATEP)
+ lw NONCE2, 60(STATEP)
+
+.Lblock_loop:
+ // Set vl to the number of blocks to process in this iteration.
+ vsetvli VL, NBLOCKS, e32, m1, ta, ma
+
+ // Set up the initial state matrix for the next VL blocks in v0-v15.
+ // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
+ // Note that only the counter word, at index 12, differs across blocks.
+ vmv.v.x v0, CONSTS0
+ vmv.v.x v1, CONSTS1
+ vmv.v.x v2, CONSTS2
+ vmv.v.x v3, CONSTS3
+ vmv.v.x v4, KEY0
+ vmv.v.x v5, KEY1
+ vmv.v.x v6, KEY2
+ vmv.v.x v7, KEY3
+ vmv.v.x v8, KEY4
+ vmv.v.x v9, KEY5
+ vmv.v.x v10, KEY6
+ vmv.v.x v11, KEY7
+ vid.v v12
+ vadd.vx v12, v12, COUNTER
+ vmv.v.x v13, NONCE0
+ vmv.v.x v14, NONCE1
+ vmv.v.x v15, NONCE2
+
+ // Load the first half of the input data for each block into v16-v23.
+ // v{16+i} holds the i'th 32-bit word for all blocks.
+ vlsseg8e32.v v16, (INP), STRIDE
+
+ mv ROUND_CTR, NROUNDS
+.Lnext_doubleround:
+ addi ROUND_CTR, ROUND_CTR, -2
+ // column round
+ chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
+ v2, v6, v10, v14, v3, v7, v11, v15
+ // diagonal round
+ chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
+ v2, v7, v8, v13, v3, v4, v9, v14
+ bnez ROUND_CTR, .Lnext_doubleround
+
+ // Load the second half of the input data for each block into v24-v31.
+ // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
+ addi TMP, INP, 32
+ vlsseg8e32.v v24, (TMP), STRIDE
+
+ // Finalize the first half of the keystream for each block.
+ vadd.vx v0, v0, CONSTS0
+ vadd.vx v1, v1, CONSTS1
+ vadd.vx v2, v2, CONSTS2
+ vadd.vx v3, v3, CONSTS3
+ vadd.vx v4, v4, KEY0
+ vadd.vx v5, v5, KEY1
+ vadd.vx v6, v6, KEY2
+ vadd.vx v7, v7, KEY3
+
+ // Encrypt/decrypt the first half of the data for each block.
+ vxor.vv v16, v16, v0
+ vxor.vv v17, v17, v1
+ vxor.vv v18, v18, v2
+ vxor.vv v19, v19, v3
+ vxor.vv v20, v20, v4
+ vxor.vv v21, v21, v5
+ vxor.vv v22, v22, v6
+ vxor.vv v23, v23, v7
+
+ // Store the first half of the output data for each block.
+ vssseg8e32.v v16, (OUTP), STRIDE
+
+ // Finalize the second half of the keystream for each block.
+ vadd.vx v8, v8, KEY4
+ vadd.vx v9, v9, KEY5
+ vadd.vx v10, v10, KEY6
+ vadd.vx v11, v11, KEY7
+ vid.v v0
+ vadd.vx v12, v12, COUNTER
+ vadd.vx v13, v13, NONCE0
+ vadd.vx v14, v14, NONCE1
+ vadd.vx v15, v15, NONCE2
+ vadd.vv v12, v12, v0
+
+ // Encrypt/decrypt the second half of the data for each block.
+ vxor.vv v24, v24, v8
+ vxor.vv v25, v25, v9
+ vxor.vv v26, v26, v10
+ vxor.vv v27, v27, v11
+ vxor.vv v29, v29, v13
+ vxor.vv v28, v28, v12
+ vxor.vv v30, v30, v14
+ vxor.vv v31, v31, v15
+
+ // Store the second half of the output data for each block.
+ addi TMP, OUTP, 32
+ vssseg8e32.v v24, (TMP), STRIDE
+
+ // Update the counter, the remaining number of blocks, and the input and
+ // output pointers according to the number of blocks processed (VL).
+ add COUNTER, COUNTER, VL
+ sub NBLOCKS, NBLOCKS, VL
+ slli TMP, VL, 6
+ add OUTP, OUTP, TMP
+ add INP, INP, TMP
+ bnez NBLOCKS, .Lblock_loop
+
+ sw COUNTER, 48(STATEP)
+ ld s0, 0(sp)
+ ld s1, 8(sp)
+ ld s2, 16(sp)
+ ld s3, 24(sp)
+ ld s4, 32(sp)
+ ld s5, 40(sp)
+ ld s6, 48(sp)
+ ld s7, 56(sp)
+ ld s8, 64(sp)
+ ld s9, 72(sp)
+ ld s10, 80(sp)
+ ld s11, 88(sp)
+ addi sp, sp, 96
+ ret
+SYM_FUNC_END(chacha_zvkb)
diff --git a/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
new file mode 100644
index 000000000000..fad501ad0617
--- /dev/null
+++ b/arch/riscv/lib/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvknha, +zvkb
+
+#define STATEP a0
+#define DATA a1
+#define NUM_BLOCKS a2
+
+#define STATEP_C a3
+
+#define MASK v0
+#define INDICES v1
+#define W0 v2
+#define W1 v3
+#define W2 v4
+#define W3 v5
+#define VTMP v6
+#define FEBA v7
+#define HGDC v8
+#define K0 v10
+#define K1 v11
+#define K2 v12
+#define K3 v13
+#define K4 v14
+#define K5 v15
+#define K6 v16
+#define K7 v17
+#define K8 v18
+#define K9 v19
+#define K10 v20
+#define K11 v21
+#define K12 v22
+#define K13 v23
+#define K14 v24
+#define K15 v25
+#define PREV_FEBA v26
+#define PREV_HGDC v27
+
+// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words.
+//
+// If not all the message schedule words have been computed yet, then this also
+// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
+// message schedule words; this macro computes the group after w3 and writes it
+// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
+// w0), so the caller must cycle through the registers accordingly.
+.macro sha256_4rounds last, k, w0, w1, w2, w3
+ vadd.vv VTMP, \k, \w0
+ vsha2cl.vv HGDC, FEBA, VTMP
+ vsha2ch.vv FEBA, HGDC, VTMP
+.if !\last
+ vmerge.vvm VTMP, \w2, \w1, MASK
+ vsha2ms.vv \w0, VTMP, \w3
+.endif
+.endm
+
+.macro sha256_16rounds last, k0, k1, k2, k3
+ sha256_4rounds \last, \k0, W0, W1, W2, W3
+ sha256_4rounds \last, \k1, W1, W2, W3, W0
+ sha256_4rounds \last, \k2, W2, W3, W0, W1
+ sha256_4rounds \last, \k3, W3, W0, W1, W2
+.endm
+
+// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[SHA256_STATE_WORDS],
+// const u8 *data, size_t nblocks);
+SYM_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
+
+ // Load the round constants into K0-K15.
+ vsetivli zero, 4, e32, m1, ta, ma
+ la t0, K256
+ vle32.v K0, (t0)
+ addi t0, t0, 16
+ vle32.v K1, (t0)
+ addi t0, t0, 16
+ vle32.v K2, (t0)
+ addi t0, t0, 16
+ vle32.v K3, (t0)
+ addi t0, t0, 16
+ vle32.v K4, (t0)
+ addi t0, t0, 16
+ vle32.v K5, (t0)
+ addi t0, t0, 16
+ vle32.v K6, (t0)
+ addi t0, t0, 16
+ vle32.v K7, (t0)
+ addi t0, t0, 16
+ vle32.v K8, (t0)
+ addi t0, t0, 16
+ vle32.v K9, (t0)
+ addi t0, t0, 16
+ vle32.v K10, (t0)
+ addi t0, t0, 16
+ vle32.v K11, (t0)
+ addi t0, t0, 16
+ vle32.v K12, (t0)
+ addi t0, t0, 16
+ vle32.v K13, (t0)
+ addi t0, t0, 16
+ vle32.v K14, (t0)
+ addi t0, t0, 16
+ vle32.v K15, (t0)
+
+ // Setup mask for the vmerge to replace the first word (idx==0) in
+ // message scheduling. There are 4 words, so an 8-bit mask suffices.
+ vsetivli zero, 1, e8, m1, ta, ma
+ vmv.v.i MASK, 0x01
+
+ // Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
+ // need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype
+ // is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0},
+ // loaded using the 32-bit little endian value 0x00041014.
+ li t0, 0x00041014
+ vsetivli zero, 1, e32, m1, ta, ma
+ vmv.v.x INDICES, t0
+ addi STATEP_C, STATEP, 8
+ vsetivli zero, 4, e32, m1, ta, ma
+ vluxei8.v FEBA, (STATEP), INDICES
+ vluxei8.v HGDC, (STATEP_C), INDICES
+
+.Lnext_block:
+ addi NUM_BLOCKS, NUM_BLOCKS, -1
+
+ // Save the previous state, as it's needed later.
+ vmv.v.v PREV_FEBA, FEBA
+ vmv.v.v PREV_HGDC, HGDC
+
+ // Load the next 512-bit message block and endian-swap each 32-bit word.
+ vle32.v W0, (DATA)
+ vrev8.v W0, W0
+ addi DATA, DATA, 16
+ vle32.v W1, (DATA)
+ vrev8.v W1, W1
+ addi DATA, DATA, 16
+ vle32.v W2, (DATA)
+ vrev8.v W2, W2
+ addi DATA, DATA, 16
+ vle32.v W3, (DATA)
+ vrev8.v W3, W3
+ addi DATA, DATA, 16
+
+ // Do the 64 rounds of SHA-256.
+ sha256_16rounds 0, K0, K1, K2, K3
+ sha256_16rounds 0, K4, K5, K6, K7
+ sha256_16rounds 0, K8, K9, K10, K11
+ sha256_16rounds 1, K12, K13, K14, K15
+
+ // Add the previous state.
+ vadd.vv FEBA, FEBA, PREV_FEBA
+ vadd.vv HGDC, HGDC, PREV_HGDC
+
+ // Repeat if more blocks remain.
+ bnez NUM_BLOCKS, .Lnext_block
+
+ // Store the new state and return.
+ vsuxei8.v FEBA, (STATEP), INDICES
+ vsuxei8.v HGDC, (STATEP_C), INDICES
+ ret
+SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
+
+.section ".rodata"
+.p2align 2
+.type K256, @object
+K256:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+.size K256, . - K256
diff --git a/arch/riscv/lib/crypto/sha256.c b/arch/riscv/lib/crypto/sha256.c
new file mode 100644
index 000000000000..71808397dff4
--- /dev/null
+++ b/arch/riscv/lib/crypto/sha256.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 (RISC-V accelerated)
+ *
+ * Copyright (C) 2022 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/vector.h>
+#include <crypto/internal/sha2.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
+ u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions);
+
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ if (static_branch_likely(&have_extensions)) {
+ kernel_vector_begin();
+ sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks);
+ kernel_vector_end();
+ } else {
+ sha256_blocks_generic(state, data, nblocks);
+ }
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+ const u8 *data, size_t nblocks)
+{
+ sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+ return static_key_enabled(&have_extensions);
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init riscv64_sha256_mod_init(void)
+{
+ /* Both zvknha and zvknhb provide the SHA-256 instructions. */
+ if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
+ riscv_isa_extension_available(NULL, ZVKNHB)) &&
+ riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ static_branch_enable(&have_extensions);
+ return 0;
+}
+subsys_initcall(riscv64_sha256_mod_init);
+
+static void __exit riscv64_sha256_mod_exit(void)
+{
+}
+module_exit(riscv64_sha256_mod_exit);
+
+MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index 7fb12c59e571..9408f50ca59a 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -40,12 +40,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
uproto = (__force unsigned int)htonl(proto);
sum += uproto;
- /*
- * Zbb support saves 4 instructions, so not worth checking without
- * alternatives if supported
- */
- if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
- IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
unsigned long fold_temp;
/*
@@ -157,12 +152,7 @@ do_csum_with_alignment(const unsigned char *buff, int len)
csum = do_csum_common(ptr, end, data);
#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
- /*
- * Zbb support saves 6 instructions, so not worth checking without
- * alternatives if supported
- */
- if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
- IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
unsigned long fold_temp;
/*
@@ -244,12 +234,7 @@ do_csum_no_alignment(const unsigned char *buff, int len)
end = (const unsigned long *)(buff + len);
csum = do_csum_common(ptr, end, data);
- /*
- * Zbb support saves 6 instructions, so not worth checking without
- * alternatives if supported
- */
- if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
- IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB)) {
unsigned long fold_temp;
/*
diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index 57a5c0066231..65027e742af1 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -8,7 +8,8 @@
/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)
- ALTERNATIVE("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
+ __ALTERNATIVE_CFG("nop", "j strcmp_zbb", 0, RISCV_ISA_EXT_ZBB,
+ IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
/*
* Returns
@@ -43,7 +44,7 @@ SYM_FUNC_START(strcmp)
* The code was published as part of the bitmanip manual
* in Appendix A.
*/
-#ifdef CONFIG_RISCV_ISA_ZBB
+#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
strcmp_zbb:
.option push
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 962983b73251..eb4d2b7ed22b 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -8,7 +8,8 @@
/* int strlen(const char *s) */
SYM_FUNC_START(strlen)
- ALTERNATIVE("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
+ __ALTERNATIVE_CFG("nop", "j strlen_zbb", 0, RISCV_ISA_EXT_ZBB,
+ IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
/*
* Returns
@@ -33,7 +34,7 @@ SYM_FUNC_START(strlen)
/*
* Variant of strlen using the ZBB extension if available
*/
-#ifdef CONFIG_RISCV_ISA_ZBB
+#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
strlen_zbb:
#ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index 7b2d0ff9ed6c..062000c468c8 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -8,7 +8,8 @@
/* int strncmp(const char *cs, const char *ct, size_t count) */
SYM_FUNC_START(strncmp)
- ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
+ __ALTERNATIVE_CFG("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB,
+ IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
/*
* Returns
@@ -46,7 +47,7 @@ SYM_FUNC_START(strncmp)
/*
* Variant of strncmp using the ZBB extension if available
*/
-#ifdef CONFIG_RISCV_ISA_ZBB
+#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
strncmp_zbb:
.option push