diff options
Diffstat (limited to 'arch/s390/lib')
-rw-r--r-- | arch/s390/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/s390/lib/crc32.c (renamed from arch/s390/lib/crc32-glue.c) | 21 | ||||
-rw-r--r-- | arch/s390/lib/crypto/Kconfig | 13 | ||||
-rw-r--r-- | arch/s390/lib/crypto/Makefile | 6 | ||||
-rw-r--r-- | arch/s390/lib/crypto/chacha-glue.c | 56 | ||||
-rw-r--r-- | arch/s390/lib/crypto/chacha-s390.S | 908 | ||||
-rw-r--r-- | arch/s390/lib/crypto/chacha-s390.h | 14 | ||||
-rw-r--r-- | arch/s390/lib/crypto/sha256.c | 47 | ||||
-rw-r--r-- | arch/s390/lib/string.c | 47 | ||||
-rw-r--r-- | arch/s390/lib/uaccess.c | 5 |
10 files changed, 1052 insertions, 68 deletions
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 14bbfe50033c..cd35cdbfa871 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,6 +3,7 @@ # Makefile for s390-specific library files.. # +obj-y += crypto/ lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o lib-y += csum-partial.o obj-y += mem.o xor.o @@ -26,4 +27,4 @@ lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o -crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o +crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32.c index 124214a27340..3c4b344417c1 100644 --- a/arch/s390/lib/crc32-glue.c +++ b/arch/s390/lib/crc32.c @@ -18,8 +18,6 @@ #define VX_ALIGNMENT 16L #define VX_ALIGN_MASK (VX_ALIGNMENT - 1) -static DEFINE_STATIC_KEY_FALSE(have_vxrs); - /* * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension * @@ -34,8 +32,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs); unsigned long prealign, aligned, remaining; \ DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \ - !static_branch_likely(&have_vxrs)) \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx()) \ return ___crc32_sw(crc, data, datalen); \ \ if ((unsigned long)data & VX_ALIGN_MASK) { \ @@ -64,25 +61,13 @@ DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) -static int __init crc32_s390_init(void) -{ - if (cpu_have_feature(S390_CPU_FEATURE_VXRS)) - static_branch_enable(&have_vxrs); - return 0; -} -arch_initcall(crc32_s390_init); - -static void __exit crc32_s390_exit(void) -{ -} -module_exit(crc32_s390_exit); - u32 crc32_optimizations(void) { - if (static_key_enabled(&have_vxrs)) + if (cpu_has_vx()) { return CRC32_LE_OPTIMIZATION | CRC32_BE_OPTIMIZATION | CRC32C_OPTIMIZATION; + } return 0; } EXPORT_SYMBOL(crc32_optimizations); diff --git a/arch/s390/lib/crypto/Kconfig b/arch/s390/lib/crypto/Kconfig new file mode 100644 index 000000000000..e3f855ef4393 --- /dev/null +++ b/arch/s390/lib/crypto/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config CRYPTO_CHACHA_S390 + tristate + default CRYPTO_LIB_CHACHA + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_SHA256_S390 + tristate + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 + select CRYPTO_LIB_SHA256_GENERIC diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile new file mode 100644 index 000000000000..920197967f46 --- /dev/null +++ b/arch/s390/lib/crypto/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o +chacha_s390-y := chacha-glue.o chacha-s390.o + +obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256.o diff --git a/arch/s390/lib/crypto/chacha-glue.c b/arch/s390/lib/crypto/chacha-glue.c new file mode 100644 index 000000000000..f95ba3483bbc --- /dev/null +++ b/arch/s390/lib/crypto/chacha-glue.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ChaCha stream cipher (s390 optimized) + * + * Copyright IBM Corp. 2021 + */ + +#define KMSG_COMPONENT "chacha_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <crypto/chacha.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sizes.h> +#include <asm/fpu.h> +#include "chacha-s390.h" + +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + /* TODO: implement hchacha_block_arch() in assembly */ + hchacha_block_generic(state, out, nrounds); +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* s390 chacha20 implementation has 20 rounds hard-coded, + * it cannot handle a block of data or less, but otherwise + * it can handle data of arbitrary size + */ + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { + chacha_crypt_generic(state, dst, src, bytes, nrounds); + } else { + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); + kernel_fpu_end(&vxstate, KERNEL_VXR); + + state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / + CHACHA_BLOCK_SIZE; + } +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + return cpu_has_vx(); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/s390/lib/crypto/chacha-s390.S b/arch/s390/lib/crypto/chacha-s390.S new file mode 100644 index 000000000000..63f3102678c0 --- /dev/null +++ b/arch/s390/lib/crypto/chacha-s390.S @@ -0,0 +1,908 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Original implementation written by Andy Polyakov, @dot-asm. + * This is an adaptation of the original code for kernel use. + * + * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. + */ + +#include <linux/linkage.h> +#include <asm/nospec-insn.h> +#include <asm/fpu-insn.h> + +#define SP %r15 +#define FRAME (16 * 8 + 4 * 8) + + .data + .balign 32 + +SYM_DATA_START_LOCAL(sigma) + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral + .long 1,0,0,0 + .long 2,0,0,0 + .long 3,0,0,0 + .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap + + .long 0,1,2,3 + .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma + .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e + .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 + .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 +SYM_DATA_END(sigma) + + .previous + + GEN_BR_THUNK %r14 + + .text + +############################################################################# +# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len, +# counst u32 *key, const u32 *counter) + +#define OUT %r2 +#define INP %r3 +#define LEN %r4 +#define KEY %r5 +#define COUNTER %r6 + +#define BEPERM %v31 +#define CTR %v26 + +#define K0 %v16 +#define K1 %v17 +#define K2 %v18 +#define K3 %v19 + +#define XA0 %v0 +#define XA1 %v1 +#define XA2 %v2 +#define XA3 %v3 + +#define XB0 %v4 +#define XB1 %v5 +#define XB2 %v6 +#define XB3 %v7 + +#define XC0 %v8 +#define XC1 %v9 +#define XC2 %v10 +#define XC3 %v11 + +#define XD0 %v12 +#define XD1 %v13 +#define XD2 %v14 +#define XD3 %v15 + +#define XT0 %v27 +#define XT1 %v28 +#define XT2 %v29 +#define XT3 %v30 + +SYM_FUNC_START(chacha20_vx_4x) + stmg %r6,%r7,6*8(SP) + + larl %r7,sigma + lhi %r0,10 + lhi %r1,0 + + VL K0,0,,%r7 # load sigma + VL K1,0,,KEY # load key + VL K2,16,,KEY + VL K3,0,,COUNTER # load counter + + VL BEPERM,0x40,,%r7 + VL CTR,0x50,,%r7 + + VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma + + VREPF XB0,K1,0 # smash the key + VREPF XB1,K1,1 + VREPF XB2,K1,2 + VREPF XB3,K1,3 + + VREPF XD0,K3,0 + VREPF XD1,K3,1 + VREPF XD2,K3,2 + VREPF XD3,K3,3 + VAF XD0,XD0,CTR + + VREPF XC0,K2,0 + VREPF XC1,K2,1 + VREPF XC2,K2,2 + VREPF XC3,K2,3 + +.Loop_4x: + VAF XA0,XA0,XB0 + VX XD0,XD0,XA0 + VERLLF XD0,XD0,16 + + VAF XA1,XA1,XB1 + VX XD1,XD1,XA1 + VERLLF XD1,XD1,16 + + VAF XA2,XA2,XB2 + VX XD2,XD2,XA2 + VERLLF XD2,XD2,16 + + VAF XA3,XA3,XB3 + VX XD3,XD3,XA3 + VERLLF XD3,XD3,16 + + VAF XC0,XC0,XD0 + VX XB0,XB0,XC0 + VERLLF XB0,XB0,12 + + VAF XC1,XC1,XD1 + VX XB1,XB1,XC1 + VERLLF XB1,XB1,12 + + VAF XC2,XC2,XD2 + VX XB2,XB2,XC2 + VERLLF XB2,XB2,12 + + VAF XC3,XC3,XD3 + VX XB3,XB3,XC3 + VERLLF XB3,XB3,12 + + VAF XA0,XA0,XB0 + VX XD0,XD0,XA0 + VERLLF XD0,XD0,8 + + VAF XA1,XA1,XB1 + VX XD1,XD1,XA1 + VERLLF XD1,XD1,8 + + VAF XA2,XA2,XB2 + VX XD2,XD2,XA2 + VERLLF XD2,XD2,8 + + VAF XA3,XA3,XB3 + VX XD3,XD3,XA3 + VERLLF XD3,XD3,8 + + VAF XC0,XC0,XD0 + VX XB0,XB0,XC0 + VERLLF XB0,XB0,7 + + VAF XC1,XC1,XD1 + VX XB1,XB1,XC1 + VERLLF XB1,XB1,7 + + VAF XC2,XC2,XD2 + VX XB2,XB2,XC2 + VERLLF XB2,XB2,7 + + VAF XC3,XC3,XD3 + VX XB3,XB3,XC3 + VERLLF XB3,XB3,7 + + VAF XA0,XA0,XB1 + VX XD3,XD3,XA0 + VERLLF XD3,XD3,16 + + VAF XA1,XA1,XB2 + VX XD0,XD0,XA1 + VERLLF XD0,XD0,16 + + VAF XA2,XA2,XB3 + VX XD1,XD1,XA2 + VERLLF XD1,XD1,16 + + VAF XA3,XA3,XB0 + VX XD2,XD2,XA3 + VERLLF XD2,XD2,16 + + VAF XC2,XC2,XD3 + VX XB1,XB1,XC2 + VERLLF XB1,XB1,12 + + VAF XC3,XC3,XD0 + VX XB2,XB2,XC3 + VERLLF XB2,XB2,12 + + VAF XC0,XC0,XD1 + VX XB3,XB3,XC0 + VERLLF XB3,XB3,12 + + VAF XC1,XC1,XD2 + VX XB0,XB0,XC1 + VERLLF XB0,XB0,12 + + VAF XA0,XA0,XB1 + VX XD3,XD3,XA0 + VERLLF XD3,XD3,8 + + VAF XA1,XA1,XB2 + VX XD0,XD0,XA1 + VERLLF XD0,XD0,8 + + VAF XA2,XA2,XB3 + VX XD1,XD1,XA2 + VERLLF XD1,XD1,8 + + VAF XA3,XA3,XB0 + VX XD2,XD2,XA3 + VERLLF XD2,XD2,8 + + VAF XC2,XC2,XD3 + VX XB1,XB1,XC2 + VERLLF XB1,XB1,7 + + VAF XC3,XC3,XD0 + VX XB2,XB2,XC3 + VERLLF XB2,XB2,7 + + VAF XC0,XC0,XD1 + VX XB3,XB3,XC0 + VERLLF XB3,XB3,7 + + VAF XC1,XC1,XD2 + VX XB0,XB0,XC1 + VERLLF XB0,XB0,7 + brct %r0,.Loop_4x + + VAF XD0,XD0,CTR + + VMRHF XT0,XA0,XA1 # transpose data + VMRHF XT1,XA2,XA3 + VMRLF XT2,XA0,XA1 + VMRLF XT3,XA2,XA3 + VPDI XA0,XT0,XT1,0b0000 + VPDI XA1,XT0,XT1,0b0101 + VPDI XA2,XT2,XT3,0b0000 + VPDI XA3,XT2,XT3,0b0101 + + VMRHF XT0,XB0,XB1 + VMRHF XT1,XB2,XB3 + VMRLF XT2,XB0,XB1 + VMRLF XT3,XB2,XB3 + VPDI XB0,XT0,XT1,0b0000 + VPDI XB1,XT0,XT1,0b0101 + VPDI XB2,XT2,XT3,0b0000 + VPDI XB3,XT2,XT3,0b0101 + + VMRHF XT0,XC0,XC1 + VMRHF XT1,XC2,XC3 + VMRLF XT2,XC0,XC1 + VMRLF XT3,XC2,XC3 + VPDI XC0,XT0,XT1,0b0000 + VPDI XC1,XT0,XT1,0b0101 + VPDI XC2,XT2,XT3,0b0000 + VPDI XC3,XT2,XT3,0b0101 + + VMRHF XT0,XD0,XD1 + VMRHF XT1,XD2,XD3 + VMRLF XT2,XD0,XD1 + VMRLF XT3,XD2,XD3 + VPDI XD0,XT0,XT1,0b0000 + VPDI XD1,XT0,XT1,0b0101 + VPDI XD2,XT2,XT3,0b0000 + VPDI XD3,XT2,XT3,0b0101 + + VAF XA0,XA0,K0 + VAF XB0,XB0,K1 + VAF XC0,XC0,K2 + VAF XD0,XD0,K3 + + VPERM XA0,XA0,XA0,BEPERM + VPERM XB0,XB0,XB0,BEPERM + VPERM XC0,XC0,XC0,BEPERM + VPERM XD0,XD0,XD0,BEPERM + + VLM XT0,XT3,0,INP,0 + + VX XT0,XT0,XA0 + VX XT1,XT1,XB0 + VX XT2,XT2,XC0 + VX XT3,XT3,XD0 + + VSTM XT0,XT3,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + + VAF XA0,XA1,K0 + VAF XB0,XB1,K1 + VAF XC0,XC1,K2 + VAF XD0,XD1,K3 + + VPERM XA0,XA0,XA0,BEPERM + VPERM XB0,XB0,XB0,BEPERM + VPERM XC0,XC0,XC0,BEPERM + VPERM XD0,XD0,XD0,BEPERM + + clgfi LEN,0x40 + jl .Ltail_4x + + VLM XT0,XT3,0,INP,0 + + VX XT0,XT0,XA0 + VX XT1,XT1,XB0 + VX XT2,XT2,XC0 + VX XT3,XT3,XD0 + + VSTM XT0,XT3,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_4x + + VAF XA0,XA2,K0 + VAF XB0,XB2,K1 + VAF XC0,XC2,K2 + VAF XD0,XD2,K3 + + VPERM XA0,XA0,XA0,BEPERM + VPERM XB0,XB0,XB0,BEPERM + VPERM XC0,XC0,XC0,BEPERM + VPERM XD0,XD0,XD0,BEPERM + + clgfi LEN,0x40 + jl .Ltail_4x + + VLM XT0,XT3,0,INP,0 + + VX XT0,XT0,XA0 + VX XT1,XT1,XB0 + VX XT2,XT2,XC0 + VX XT3,XT3,XD0 + + VSTM XT0,XT3,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_4x + + VAF XA0,XA3,K0 + VAF XB0,XB3,K1 + VAF XC0,XC3,K2 + VAF XD0,XD3,K3 + + VPERM XA0,XA0,XA0,BEPERM + VPERM XB0,XB0,XB0,BEPERM + VPERM XC0,XC0,XC0,BEPERM + VPERM XD0,XD0,XD0,BEPERM + + clgfi LEN,0x40 + jl .Ltail_4x + + VLM XT0,XT3,0,INP,0 + + VX XT0,XT0,XA0 + VX XT1,XT1,XB0 + VX XT2,XT2,XC0 + VX XT3,XT3,XD0 + + VSTM XT0,XT3,0,OUT,0 + +.Ldone_4x: + lmg %r6,%r7,6*8(SP) + BR_EX %r14 + +.Ltail_4x: + VLR XT0,XC0 + VLR XT1,XD0 + + VST XA0,8*8+0x00,,SP + VST XB0,8*8+0x10,,SP + VST XT0,8*8+0x20,,SP + VST XT1,8*8+0x30,,SP + + lghi %r1,0 + +.Loop_tail_4x: + llgc %r5,0(%r1,INP) + llgc %r6,8*8(%r1,SP) + xr %r6,%r5 + stc %r6,0(%r1,OUT) + la %r1,1(%r1) + brct LEN,.Loop_tail_4x + + lmg %r6,%r7,6*8(SP) + BR_EX %r14 +SYM_FUNC_END(chacha20_vx_4x) + +#undef OUT +#undef INP +#undef LEN +#undef KEY +#undef COUNTER + +#undef BEPERM + +#undef K0 +#undef K1 +#undef K2 +#undef K3 + + +############################################################################# +# void chacha20_vx(u8 *out, counst u8 *inp, size_t len, +# counst u32 *key, const u32 *counter) + +#define OUT %r2 +#define INP %r3 +#define LEN %r4 +#define KEY %r5 +#define COUNTER %r6 + +#define BEPERM %v31 + +#define K0 %v27 +#define K1 %v24 +#define K2 %v25 +#define K3 %v26 + +#define A0 %v0 +#define B0 %v1 +#define C0 %v2 +#define D0 %v3 + +#define A1 %v4 +#define B1 %v5 +#define C1 %v6 +#define D1 %v7 + +#define A2 %v8 +#define B2 %v9 +#define C2 %v10 +#define D2 %v11 + +#define A3 %v12 +#define B3 %v13 +#define C3 %v14 +#define D3 %v15 + +#define A4 %v16 +#define B4 %v17 +#define C4 %v18 +#define D4 %v19 + +#define A5 %v20 +#define B5 %v21 +#define C5 %v22 +#define D5 %v23 + +#define T0 %v27 +#define T1 %v28 +#define T2 %v29 +#define T3 %v30 + +SYM_FUNC_START(chacha20_vx) + clgfi LEN,256 + jle chacha20_vx_4x + stmg %r6,%r7,6*8(SP) + + lghi %r1,-FRAME + lgr %r0,SP + la SP,0(%r1,SP) + stg %r0,0(SP) # back-chain + + larl %r7,sigma + lhi %r0,10 + + VLM K1,K2,0,KEY,0 # load key + VL K3,0,,COUNTER # load counter + + VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ... + +.Loop_outer_vx: + VLR A0,K0 + VLR B0,K1 + VLR A1,K0 + VLR B1,K1 + VLR A2,K0 + VLR B2,K1 + VLR A3,K0 + VLR B3,K1 + VLR A4,K0 + VLR B4,K1 + VLR A5,K0 + VLR B5,K1 + + VLR D0,K3 + VAF D1,K3,T1 # K[3]+1 + VAF D2,K3,T2 # K[3]+2 + VAF D3,K3,T3 # K[3]+3 + VAF D4,D2,T2 # K[3]+4 + VAF D5,D2,T3 # K[3]+5 + + VLR C0,K2 + VLR C1,K2 + VLR C2,K2 + VLR C3,K2 + VLR C4,K2 + VLR C5,K2 + + VLR T1,D1 + VLR T2,D2 + VLR T3,D3 + +.Loop_vx: + VAF A0,A0,B0 + VAF A1,A1,B1 + VAF A2,A2,B2 + VAF A3,A3,B3 + VAF A4,A4,B4 + VAF A5,A5,B5 + VX D0,D0,A0 + VX D1,D1,A1 + VX D2,D2,A2 + VX D3,D3,A3 + VX D4,D4,A4 + VX D5,D5,A5 + VERLLF D0,D0,16 + VERLLF D1,D1,16 + VERLLF D2,D2,16 + VERLLF D3,D3,16 + VERLLF D4,D4,16 + VERLLF D5,D5,16 + + VAF C0,C0,D0 + VAF C1,C1,D1 + VAF C2,C2,D2 + VAF C3,C3,D3 + VAF C4,C4,D4 + VAF C5,C5,D5 + VX B0,B0,C0 + VX B1,B1,C1 + VX B2,B2,C2 + VX B3,B3,C3 + VX B4,B4,C4 + VX B5,B5,C5 + VERLLF B0,B0,12 + VERLLF B1,B1,12 + VERLLF B2,B2,12 + VERLLF B3,B3,12 + VERLLF B4,B4,12 + VERLLF B5,B5,12 + + VAF A0,A0,B0 + VAF A1,A1,B1 + VAF A2,A2,B2 + VAF A3,A3,B3 + VAF A4,A4,B4 + VAF A5,A5,B5 + VX D0,D0,A0 + VX D1,D1,A1 + VX D2,D2,A2 + VX D3,D3,A3 + VX D4,D4,A4 + VX D5,D5,A5 + VERLLF D0,D0,8 + VERLLF D1,D1,8 + VERLLF D2,D2,8 + VERLLF D3,D3,8 + VERLLF D4,D4,8 + VERLLF D5,D5,8 + + VAF C0,C0,D0 + VAF C1,C1,D1 + VAF C2,C2,D2 + VAF C3,C3,D3 + VAF C4,C4,D4 + VAF C5,C5,D5 + VX B0,B0,C0 + VX B1,B1,C1 + VX B2,B2,C2 + VX B3,B3,C3 + VX B4,B4,C4 + VX B5,B5,C5 + VERLLF B0,B0,7 + VERLLF B1,B1,7 + VERLLF B2,B2,7 + VERLLF B3,B3,7 + VERLLF B4,B4,7 + VERLLF B5,B5,7 + + VSLDB C0,C0,C0,8 + VSLDB C1,C1,C1,8 + VSLDB C2,C2,C2,8 + VSLDB C3,C3,C3,8 + VSLDB C4,C4,C4,8 + VSLDB C5,C5,C5,8 + VSLDB B0,B0,B0,4 + VSLDB B1,B1,B1,4 + VSLDB B2,B2,B2,4 + VSLDB B3,B3,B3,4 + VSLDB B4,B4,B4,4 + VSLDB B5,B5,B5,4 + VSLDB D0,D0,D0,12 + VSLDB D1,D1,D1,12 + VSLDB D2,D2,D2,12 + VSLDB D3,D3,D3,12 + VSLDB D4,D4,D4,12 + VSLDB D5,D5,D5,12 + + VAF A0,A0,B0 + VAF A1,A1,B1 + VAF A2,A2,B2 + VAF A3,A3,B3 + VAF A4,A4,B4 + VAF A5,A5,B5 + VX D0,D0,A0 + VX D1,D1,A1 + VX D2,D2,A2 + VX D3,D3,A3 + VX D4,D4,A4 + VX D5,D5,A5 + VERLLF D0,D0,16 + VERLLF D1,D1,16 + VERLLF D2,D2,16 + VERLLF D3,D3,16 + VERLLF D4,D4,16 + VERLLF D5,D5,16 + + VAF C0,C0,D0 + VAF C1,C1,D1 + VAF C2,C2,D2 + VAF C3,C3,D3 + VAF C4,C4,D4 + VAF C5,C5,D5 + VX B0,B0,C0 + VX B1,B1,C1 + VX B2,B2,C2 + VX B3,B3,C3 + VX B4,B4,C4 + VX B5,B5,C5 + VERLLF B0,B0,12 + VERLLF B1,B1,12 + VERLLF B2,B2,12 + VERLLF B3,B3,12 + VERLLF B4,B4,12 + VERLLF B5,B5,12 + + VAF A0,A0,B0 + VAF A1,A1,B1 + VAF A2,A2,B2 + VAF A3,A3,B3 + VAF A4,A4,B4 + VAF A5,A5,B5 + VX D0,D0,A0 + VX D1,D1,A1 + VX D2,D2,A2 + VX D3,D3,A3 + VX D4,D4,A4 + VX D5,D5,A5 + VERLLF D0,D0,8 + VERLLF D1,D1,8 + VERLLF D2,D2,8 + VERLLF D3,D3,8 + VERLLF D4,D4,8 + VERLLF D5,D5,8 + + VAF C0,C0,D0 + VAF C1,C1,D1 + VAF C2,C2,D2 + VAF C3,C3,D3 + VAF C4,C4,D4 + VAF C5,C5,D5 + VX B0,B0,C0 + VX B1,B1,C1 + VX B2,B2,C2 + VX B3,B3,C3 + VX B4,B4,C4 + VX B5,B5,C5 + VERLLF B0,B0,7 + VERLLF B1,B1,7 + VERLLF B2,B2,7 + VERLLF B3,B3,7 + VERLLF B4,B4,7 + VERLLF B5,B5,7 + + VSLDB C0,C0,C0,8 + VSLDB C1,C1,C1,8 + VSLDB C2,C2,C2,8 + VSLDB C3,C3,C3,8 + VSLDB C4,C4,C4,8 + VSLDB C5,C5,C5,8 + VSLDB B0,B0,B0,12 + VSLDB B1,B1,B1,12 + VSLDB B2,B2,B2,12 + VSLDB B3,B3,B3,12 + VSLDB B4,B4,B4,12 + VSLDB B5,B5,B5,12 + VSLDB D0,D0,D0,4 + VSLDB D1,D1,D1,4 + VSLDB D2,D2,D2,4 + VSLDB D3,D3,D3,4 + VSLDB D4,D4,D4,4 + VSLDB D5,D5,D5,4 + brct %r0,.Loop_vx + + VAF A0,A0,K0 + VAF B0,B0,K1 + VAF C0,C0,K2 + VAF D0,D0,K3 + VAF A1,A1,K0 + VAF D1,D1,T1 # +K[3]+1 + + VPERM A0,A0,A0,BEPERM + VPERM B0,B0,B0,BEPERM + VPERM C0,C0,C0,BEPERM + VPERM D0,D0,D0,BEPERM + + clgfi LEN,0x40 + jl .Ltail_vx + + VAF D2,D2,T2 # +K[3]+2 + VAF D3,D3,T3 # +K[3]+3 + VLM T0,T3,0,INP,0 + + VX A0,A0,T0 + VX B0,B0,T1 + VX C0,C0,T2 + VX D0,D0,T3 + + VLM K0,T3,0,%r7,4 # re-load sigma and increments + + VSTM A0,D0,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_vx + + VAF B1,B1,K1 + VAF C1,C1,K2 + + VPERM A0,A1,A1,BEPERM + VPERM B0,B1,B1,BEPERM + VPERM C0,C1,C1,BEPERM + VPERM D0,D1,D1,BEPERM + + clgfi LEN,0x40 + jl .Ltail_vx + + VLM A1,D1,0,INP,0 + + VX A0,A0,A1 + VX B0,B0,B1 + VX C0,C0,C1 + VX D0,D0,D1 + + VSTM A0,D0,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_vx + + VAF A2,A2,K0 + VAF B2,B2,K1 + VAF C2,C2,K2 + + VPERM A0,A2,A2,BEPERM + VPERM B0,B2,B2,BEPERM + VPERM C0,C2,C2,BEPERM + VPERM D0,D2,D2,BEPERM + + clgfi LEN,0x40 + jl .Ltail_vx + + VLM A1,D1,0,INP,0 + + VX A0,A0,A1 + VX B0,B0,B1 + VX C0,C0,C1 + VX D0,D0,D1 + + VSTM A0,D0,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_vx + + VAF A3,A3,K0 + VAF B3,B3,K1 + VAF C3,C3,K2 + VAF D2,K3,T3 # K[3]+3 + + VPERM A0,A3,A3,BEPERM + VPERM B0,B3,B3,BEPERM + VPERM C0,C3,C3,BEPERM + VPERM D0,D3,D3,BEPERM + + clgfi LEN,0x40 + jl .Ltail_vx + + VAF D3,D2,T1 # K[3]+4 + VLM A1,D1,0,INP,0 + + VX A0,A0,A1 + VX B0,B0,B1 + VX C0,C0,C1 + VX D0,D0,D1 + + VSTM A0,D0,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_vx + + VAF A4,A4,K0 + VAF B4,B4,K1 + VAF C4,C4,K2 + VAF D4,D4,D3 # +K[3]+4 + VAF D3,D3,T1 # K[3]+5 + VAF K3,D2,T3 # K[3]+=6 + + VPERM A0,A4,A4,BEPERM + VPERM B0,B4,B4,BEPERM + VPERM C0,C4,C4,BEPERM + VPERM D0,D4,D4,BEPERM + + clgfi LEN,0x40 + jl .Ltail_vx + + VLM A1,D1,0,INP,0 + + VX A0,A0,A1 + VX B0,B0,B1 + VX C0,C0,C1 + VX D0,D0,D1 + + VSTM A0,D0,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + aghi LEN,-0x40 + je .Ldone_vx + + VAF A5,A5,K0 + VAF B5,B5,K1 + VAF C5,C5,K2 + VAF D5,D5,D3 # +K[3]+5 + + VPERM A0,A5,A5,BEPERM + VPERM B0,B5,B5,BEPERM + VPERM C0,C5,C5,BEPERM + VPERM D0,D5,D5,BEPERM + + clgfi LEN,0x40 + jl .Ltail_vx + + VLM A1,D1,0,INP,0 + + VX A0,A0,A1 + VX B0,B0,B1 + VX C0,C0,C1 + VX D0,D0,D1 + + VSTM A0,D0,0,OUT,0 + + la INP,0x40(INP) + la OUT,0x40(OUT) + lhi %r0,10 + aghi LEN,-0x40 + jne .Loop_outer_vx + +.Ldone_vx: + lmg %r6,%r7,FRAME+6*8(SP) + la SP,FRAME(SP) + BR_EX %r14 + +.Ltail_vx: + VSTM A0,D0,8*8,SP,3 + lghi %r1,0 + +.Loop_tail_vx: + llgc %r5,0(%r1,INP) + llgc %r6,8*8(%r1,SP) + xr %r6,%r5 + stc %r6,0(%r1,OUT) + la %r1,1(%r1) + brct LEN,.Loop_tail_vx + + lmg %r6,%r7,FRAME+6*8(SP) + la SP,FRAME(SP) + BR_EX %r14 +SYM_FUNC_END(chacha20_vx) + +.previous diff --git a/arch/s390/lib/crypto/chacha-s390.h b/arch/s390/lib/crypto/chacha-s390.h new file mode 100644 index 000000000000..733744ce30f5 --- /dev/null +++ b/arch/s390/lib/crypto/chacha-s390.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * s390 ChaCha stream cipher. + * + * Copyright IBM Corp. 2021 + */ + +#ifndef _CHACHA_S390_H +#define _CHACHA_S390_H + +void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key, + const u32 *counter); + +#endif /* _CHACHA_S390_H */ diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c new file mode 100644 index 000000000000..7dfe120fafab --- /dev/null +++ b/arch/s390/lib/crypto/sha256.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized using the CP Assist for Cryptographic Functions (CPACF) + * + * Copyright 2025 Google LLC + */ +#include <asm/cpacf.h> +#include <crypto/internal/sha2.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_sha256); + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_cpacf_sha256)) + cpacf_kimd(CPACF_KIMD_SHA_256, state, data, + nblocks * SHA256_BLOCK_SIZE); + else + sha256_blocks_generic(state, data, nblocks); +} +EXPORT_SYMBOL_GPL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + return static_key_enabled(&have_cpacf_sha256); +} +EXPORT_SYMBOL_GPL(sha256_is_arch_optimized); + +static int __init sha256_s390_mod_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_MSA) && + cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) + static_branch_enable(&have_cpacf_sha256); + return 0; +} +subsys_initcall(sha256_s390_mod_init); + +static void __exit sha256_s390_mod_exit(void) +{ +} +module_exit(sha256_s390_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 using the CP Assist for Cryptographic Functions (CPACF)"); diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 373fa1f01937..099de76e8b1a 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen); #endif /** - * strcpy - Copy a %NUL terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * - * returns a pointer to @dest - */ -#ifdef __HAVE_ARCH_STRCPY -char *strcpy(char *dest, const char *src) -{ - char *ret = dest; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dest],%[src]\n" - " jo 0b\n" - : [dest] "+&a" (dest), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -EXPORT_SYMBOL(strcpy); -#endif - -/** - * strncpy - Copy a length-limited, %NUL-terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @n: The maximum number of bytes to copy - * - * The result is not %NUL-terminated if the source exceeds - * @n bytes. - */ -#ifdef __HAVE_ARCH_STRNCPY -char *strncpy(char *dest, const char *src, size_t n) -{ - size_t len = __strnend(src, n) - src; - memset(dest + len, 0, n - len); - memcpy(dest, src, len); - return dest; -} -EXPORT_SYMBOL(strncpy); -#endif - -/** * strcat - Append one %NUL-terminated string to another * @dest: The string to be appended to * @src: The string to append to it @@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat); * @n: The maximum numbers of bytes to copy * * returns a pointer to @dest - * - * Note that in contrast to strncpy, strncat ensures the result is - * terminated. */ #ifdef __HAVE_ARCH_STRNCAT char *strncat(char *dest, const char *src, size_t n) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index cec20db88479..fa7d98fa1320 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -17,17 +17,18 @@ #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) { + struct lowcore *lc = get_lowcore(); struct ctlreg cr1, cr7; local_ctl_store(1, &cr1); local_ctl_store(7, &cr7); - if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val) + if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val) return; panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016lx user: %016lx\n", exit ? "exit" : "entry", cr1.val, cr7.val, - get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val); + lc->kernel_asce.val, lc->user_asce.val); } #endif /*CONFIG_DEBUG_ENTRY */ |