diff options
author | Nick Terrell <terrelln@fb.com> | 2022-10-21 16:00:35 -0700 |
---|---|---|
committer | Nick Terrell <terrelln@fb.com> | 2022-10-21 16:00:35 -0700 |
commit | 14e77332e74603efab8347c89d3cda447c3b97c9 (patch) | |
tree | b7b8a48f4f75590266a763c52e072dda32b228ae /lib/crypto | |
parent | 88a309465b3f05a100c3b81966982c0f9f5d23a6 (diff) | |
parent | 1d61754caa8c69f566504e63c8b3f3a2df0954c8 (diff) |
Merge branch 'main' into zstd-next
Diffstat (limited to 'lib/crypto')
-rw-r--r-- | lib/crypto/Kconfig | 11 | ||||
-rw-r--r-- | lib/crypto/Makefile | 9 | ||||
-rw-r--r-- | lib/crypto/blake2s-selftest.c | 41 | ||||
-rw-r--r-- | lib/crypto/blake2s.c | 37 | ||||
-rw-r--r-- | lib/crypto/memneq.c | 173 | ||||
-rw-r--r-- | lib/crypto/sha1.c | 140 | ||||
-rw-r--r-- | lib/crypto/sm4.c | 176 | ||||
-rw-r--r-- | lib/crypto/utils.c | 88 |
8 files changed, 485 insertions, 190 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index e8e525650cf2..7e9683e9f5c6 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -2,6 +2,9 @@ menu "Crypto library routines" +config CRYPTO_LIB_UTILS + tristate + config CRYPTO_LIB_AES tristate @@ -33,7 +36,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select XOR_BLOCKS + select CRYPTO_LIB_UTILS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a @@ -43,7 +46,6 @@ config CRYPTO_LIB_CHACHA_GENERIC config CRYPTO_LIB_CHACHA tristate "ChaCha library interface" - depends on CRYPTO depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -71,6 +73,7 @@ config CRYPTO_LIB_CURVE25519 tristate "Curve25519 scalar multiplication library" depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n + select CRYPTO_LIB_UTILS help Enable the Curve25519 library interface. This interface may be fulfilled by either the generic implementation or an arch-specific @@ -120,10 +123,10 @@ config CRYPTO_LIB_CHACHA20POLY1305 select CRYPTO_LIB_POLY1305 select CRYPTO_ALGAPI -config CRYPTO_LIB_SHA256 +config CRYPTO_LIB_SHA1 tristate -config CRYPTO_LIB_SM4 +config CRYPTO_LIB_SHA256 tristate endmenu diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index ed43a41f2dcc..c852f067ab06 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o +libcryptoutils-y := memneq.o utils.o + # chacha is used by the /dev/random driver which is always builtin obj-y += chacha.o obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o @@ -34,12 +37,12 @@ libpoly1305-y := poly1305-donna32.o libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o libpoly1305-y += poly1305.o +obj-$(CONFIG_CRYPTO_LIB_SHA1) += libsha1.o +libsha1-y := sha1.o + obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o libsha256-y := sha256.o -obj-$(CONFIG_CRYPTO_LIB_SM4) += libsm4.o -libsm4-y := sm4.o - ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) libblake2s-y += blake2s-selftest.o libchacha20poly1305-y += chacha20poly1305-selftest.o diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 409e4b728770..7d77dea15587 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -4,6 +4,8 @@ */ #include <crypto/internal/blake2s.h> +#include <linux/kernel.h> +#include <linux/random.h> #include <linux/string.h> /* @@ -587,5 +589,44 @@ bool __init blake2s_selftest(void) } } + for (i = 0; i < 32; ++i) { + enum { TEST_ALIGNMENT = 16 }; + u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] + __aligned(TEST_ALIGNMENT); + u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; + struct blake2s_state state1, state2; + + get_random_bytes(blocks, sizeof(blocks)); + get_random_bytes(&state, sizeof(state)); + +#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \ + defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) + memcpy(&state1, &state, sizeof(state1)); + memcpy(&state2, &state, sizeof(state2)); + blake2s_compress(&state1, blocks, 2, BLAKE2S_BLOCK_SIZE); + blake2s_compress_generic(&state2, blocks, 2, BLAKE2S_BLOCK_SIZE); + if (memcmp(&state1, &state2, sizeof(state1))) { + pr_err("blake2s random compress self-test %d: FAIL\n", + i + 1); + success = false; + } +#endif + + memcpy(&state1, &state, sizeof(state1)); + blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE); + for (l = 1; l < TEST_ALIGNMENT; ++l) { + memcpy(unaligned_block + l, blocks, + BLAKE2S_BLOCK_SIZE); + memcpy(&state2, &state, sizeof(state2)); + blake2s_compress(&state2, unaligned_block + l, 1, + BLAKE2S_BLOCK_SIZE); + if (memcmp(&state1, &state2, sizeof(state1))) { + pr_err("blake2s random compress align %d self-test %d: FAIL\n", + l, i + 1); + success = false; + } + } + } + return success; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 9364f79937b8..98e688c6d891 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,16 +16,44 @@ #include <linux/init.h> #include <linux/bug.h> +static inline void blake2s_set_lastblock(struct blake2s_state *state) +{ + state->f[0] = -1; +} + void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - __blake2s_update(state, in, inlen, blake2s_compress); + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2S_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); + blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - __blake2s_final(state, out, blake2s_compress); + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ + blake2s_compress(state, state->buf, 1, state->buflen); + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + memcpy(out, state->h, state->outlen); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); @@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void) return 0; } -static void __exit blake2s_mod_exit(void) -{ -} - module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/lib/crypto/memneq.c b/lib/crypto/memneq.c new file mode 100644 index 000000000000..243d8677cc51 --- /dev/null +++ b/lib/crypto/memneq.c @@ -0,0 +1,173 @@ +/* + * Constant-time equality testing of memory regions. + * + * Authors: + * + * James Yonan <james@openvpn.net> + * Daniel Borkmann <dborkman@redhat.com> + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * BSD LICENSE + * + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of OpenVPN Technologies nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +/* Generic path for arbitrary size */ +static inline unsigned long +__crypto_memneq_generic(const void *a, const void *b, size_t size) +{ + unsigned long neq = 0; + +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + while (size >= sizeof(unsigned long)) { + neq |= get_unaligned((unsigned long *)a) ^ + get_unaligned((unsigned long *)b); + OPTIMIZER_HIDE_VAR(neq); + a += sizeof(unsigned long); + b += sizeof(unsigned long); + size -= sizeof(unsigned long); + } +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + while (size > 0) { + neq |= *(unsigned char *)a ^ *(unsigned char *)b; + OPTIMIZER_HIDE_VAR(neq); + a += 1; + b += 1; + size -= 1; + } + return neq; +} + +/* Loop-free fast-path for frequently used 16-byte size */ +static inline unsigned long __crypto_memneq_16(const void *a, const void *b) +{ + unsigned long neq = 0; + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (sizeof(unsigned long) == 8) { + neq |= get_unaligned((unsigned long *)a) ^ + get_unaligned((unsigned long *)b); + OPTIMIZER_HIDE_VAR(neq); + neq |= get_unaligned((unsigned long *)(a + 8)) ^ + get_unaligned((unsigned long *)(b + 8)); + OPTIMIZER_HIDE_VAR(neq); + } else if (sizeof(unsigned int) == 4) { + neq |= get_unaligned((unsigned int *)a) ^ + get_unaligned((unsigned int *)b); + OPTIMIZER_HIDE_VAR(neq); + neq |= get_unaligned((unsigned int *)(a + 4)) ^ + get_unaligned((unsigned int *)(b + 4)); + OPTIMIZER_HIDE_VAR(neq); + neq |= get_unaligned((unsigned int *)(a + 8)) ^ + get_unaligned((unsigned int *)(b + 8)); + OPTIMIZER_HIDE_VAR(neq); + neq |= get_unaligned((unsigned int *)(a + 12)) ^ + get_unaligned((unsigned int *)(b + 12)); + OPTIMIZER_HIDE_VAR(neq); + } else +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + { + neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15); + OPTIMIZER_HIDE_VAR(neq); + } + + return neq; +} + +/* Compare two areas of memory without leaking timing information, + * and with special optimizations for common sizes. Users should + * not call this function directly, but should instead use + * crypto_memneq defined in crypto/algapi.h. + */ +noinline unsigned long __crypto_memneq(const void *a, const void *b, + size_t size) +{ + switch (size) { + case 16: + return __crypto_memneq_16(a, b); + default: + return __crypto_memneq_generic(a, b, size); + } +} +EXPORT_SYMBOL(__crypto_memneq); diff --git a/lib/crypto/sha1.c b/lib/crypto/sha1.c new file mode 100644 index 000000000000..1aebe7be9401 --- /dev/null +++ b/lib/crypto/sha1.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SHA1 routine optimized to do word accesses rather than byte accesses, + * and to avoid unnecessary copies into the context array. + * + * This was based on the git SHA1 implementation. + */ + +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/string.h> +#include <crypto/sha1.h> +#include <asm/unaligned.h> + +/* + * If you have 32 registers or more, the compiler can (and should) + * try to change the array[] accesses into registers. However, on + * machines with less than ~25 registers, that won't really work, + * and at least gcc will make an unholy mess of it. + * + * So to avoid that mess which just slows things down, we force + * the stores to memory to actually happen (we might be better off + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as + * suggested by Artur Skawina - that will also make gcc unable to + * try to do the silly "optimize away loads" part because it won't + * see what the value will be). + * + * Ben Herrenschmidt reports that on PPC, the C version comes close + * to the optimized asm with this (ie on PPC you don't want that + * 'volatile', since there are lots of registers). + * + * On ARM we get the best code generation by forcing a full memory barrier + * between each SHA_ROUND, otherwise gcc happily get wild with spilling and + * the stack frame size simply explode and performance goes down the drain. + */ + +#ifdef CONFIG_X86 + #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) +#elif defined(CONFIG_ARM) + #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) +#else + #define setW(x, val) (W(x) = (val)) +#endif + +/* This "rolls" over the 512-bit array */ +#define W(x) (array[(x)&15]) + +/* + * Where do we get the source from? The first 16 iterations get it from + * the input data, the next mix it from the 512-bit array. + */ +#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) +#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) + +#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ + __u32 TEMP = input(t); setW(t, TEMP); \ + E += TEMP + rol32(A,5) + (fn) + (constant); \ + B = ror32(B, 2); \ + TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) + +#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) +#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) +#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) + +/** + * sha1_transform - single block SHA1 transform (deprecated) + * + * @digest: 160 bit digest to update + * @data: 512 bits of data to hash + * @array: 16 words of workspace (see note) + * + * This function executes SHA-1's internal compression function. It updates the + * 160-bit internal state (@digest) with a single 512-bit data block (@data). + * + * Don't use this function. SHA-1 is no longer considered secure. And even if + * you do have to use SHA-1, this isn't the correct way to hash something with + * SHA-1 as this doesn't handle padding and finalization. + * + * Note: If the hash is security sensitive, the caller should be sure + * to clear the workspace. This is left to the caller to avoid + * unnecessary clears between chained hashing operations. + */ +void sha1_transform(__u32 *digest, const char *data, __u32 *array) +{ + __u32 A, B, C, D, E; + unsigned int i = 0; + + A = digest[0]; + B = digest[1]; + C = digest[2]; + D = digest[3]; + E = digest[4]; + + /* Round 1 - iterations 0-16 take their input from 'data' */ + for (; i < 16; ++i) + T_0_15(i, A, B, C, D, E); + + /* Round 1 - tail. Input from 512-bit mixing array */ + for (; i < 20; ++i) + T_16_19(i, A, B, C, D, E); + + /* Round 2 */ + for (; i < 40; ++i) + T_20_39(i, A, B, C, D, E); + + /* Round 3 */ + for (; i < 60; ++i) + T_40_59(i, A, B, C, D, E); + + /* Round 4 */ + for (; i < 80; ++i) + T_60_79(i, A, B, C, D, E); + + digest[0] += A; + digest[1] += B; + digest[2] += C; + digest[3] += D; + digest[4] += E; +} +EXPORT_SYMBOL(sha1_transform); + +/** + * sha1_init - initialize the vectors for a SHA1 digest + * @buf: vector to initialize + */ +void sha1_init(__u32 *buf) +{ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + buf[4] = 0xc3d2e1f0; +} +EXPORT_SYMBOL(sha1_init); + +MODULE_LICENSE("GPL"); diff --git a/lib/crypto/sm4.c b/lib/crypto/sm4.c deleted file mode 100644 index 284e62576d0c..000000000000 --- a/lib/crypto/sm4.c +++ /dev/null @@ -1,176 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * SM4, as specified in - * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html - * - * Copyright (C) 2018 ARM Limited or its affiliates. - * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> - */ - -#include <linux/module.h> -#include <asm/unaligned.h> -#include <crypto/sm4.h> - -static const u32 fk[4] = { - 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc -}; - -static const u32 ____cacheline_aligned ck[32] = { - 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, - 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, - 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, - 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, - 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, - 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, - 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, - 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 -}; - -static const u8 ____cacheline_aligned sbox[256] = { - 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, - 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, - 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, - 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, - 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, - 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, - 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, - 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, - 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, - 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, - 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, - 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, - 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, - 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, - 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, - 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, - 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, - 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, - 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, - 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, - 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, - 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, - 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, - 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, - 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, - 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, - 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, - 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, - 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, - 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, - 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, - 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 -}; - -static inline u32 sm4_t_non_lin_sub(u32 x) -{ - u32 out; - - out = (u32)sbox[x & 0xff]; - out |= (u32)sbox[(x >> 8) & 0xff] << 8; - out |= (u32)sbox[(x >> 16) & 0xff] << 16; - out |= (u32)sbox[(x >> 24) & 0xff] << 24; - - return out; -} - -static inline u32 sm4_key_lin_sub(u32 x) -{ - return x ^ rol32(x, 13) ^ rol32(x, 23); -} - -static inline u32 sm4_enc_lin_sub(u32 x) -{ - return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); -} - -static inline u32 sm4_key_sub(u32 x) -{ - return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); -} - -static inline u32 sm4_enc_sub(u32 x) -{ - return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); -} - -static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) -{ - return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); -} - - -/** - * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 - * @ctx: The location where the computed key will be stored. - * @in_key: The supplied key. - * @key_len: The length of the supplied key. - * - * Returns 0 on success. The function fails only if an invalid key size (or - * pointer) is supplied. - */ -int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, - unsigned int key_len) -{ - u32 rk[4]; - const u32 *key = (u32 *)in_key; - int i; - - if (key_len != SM4_KEY_SIZE) - return -EINVAL; - - rk[0] = get_unaligned_be32(&key[0]) ^ fk[0]; - rk[1] = get_unaligned_be32(&key[1]) ^ fk[1]; - rk[2] = get_unaligned_be32(&key[2]) ^ fk[2]; - rk[3] = get_unaligned_be32(&key[3]) ^ fk[3]; - - for (i = 0; i < 32; i += 4) { - rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); - rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); - rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); - rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); - - ctx->rkey_enc[i + 0] = rk[0]; - ctx->rkey_enc[i + 1] = rk[1]; - ctx->rkey_enc[i + 2] = rk[2]; - ctx->rkey_enc[i + 3] = rk[3]; - ctx->rkey_dec[31 - 0 - i] = rk[0]; - ctx->rkey_dec[31 - 1 - i] = rk[1]; - ctx->rkey_dec[31 - 2 - i] = rk[2]; - ctx->rkey_dec[31 - 3 - i] = rk[3]; - } - - return 0; -} -EXPORT_SYMBOL_GPL(sm4_expandkey); - -/** - * sm4_crypt_block - Encrypt or decrypt a single SM4 block - * @rk: The rkey_enc for encrypt or rkey_dec for decrypt - * @out: Buffer to store output data - * @in: Buffer containing the input data - */ -void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in) -{ - u32 x[4], i; - - x[0] = get_unaligned_be32(in + 0 * 4); - x[1] = get_unaligned_be32(in + 1 * 4); - x[2] = get_unaligned_be32(in + 2 * 4); - x[3] = get_unaligned_be32(in + 3 * 4); - - for (i = 0; i < 32; i += 4) { - x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); - x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); - x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); - x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); - } - - put_unaligned_be32(x[3 - 0], out + 0 * 4); - put_unaligned_be32(x[3 - 1], out + 1 * 4); - put_unaligned_be32(x[3 - 2], out + 2 * 4); - put_unaligned_be32(x[3 - 3], out + 3 * 4); -} -EXPORT_SYMBOL_GPL(sm4_crypt_block); - -MODULE_DESCRIPTION("Generic SM4 library"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/utils.c b/lib/crypto/utils.c new file mode 100644 index 000000000000..53230ab1b195 --- /dev/null +++ b/lib/crypto/utils.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Crypto library utility functions + * + * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +/* + * XOR @len bytes from @src1 and @src2 together, writing the result to @dst + * (which may alias one of the sources). Don't call this directly; call + * crypto_xor() or crypto_xor_cpy() instead. + */ +void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len) +{ + int relalign = 0; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + int size = sizeof(unsigned long); + int d = (((unsigned long)dst ^ (unsigned long)src1) | + ((unsigned long)dst ^ (unsigned long)src2)) & + (size - 1); + + relalign = d ? 1 << __ffs(d) : size; + + /* + * If we care about alignment, process as many bytes as + * needed to advance dst and src to values whose alignments + * equal their relative alignment. This will allow us to + * process the remainder of the input using optimal strides. + */ + while (((unsigned long)dst & (relalign - 1)) && len > 0) { + *dst++ = *src1++ ^ *src2++; + len--; + } + } + + while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + u64 l = get_unaligned((u64 *)src1) ^ + get_unaligned((u64 *)src2); + put_unaligned(l, (u64 *)dst); + } else { + *(u64 *)dst = *(u64 *)src1 ^ *(u64 *)src2; + } + dst += 8; + src1 += 8; + src2 += 8; + len -= 8; + } + + while (len >= 4 && !(relalign & 3)) { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + u32 l = get_unaligned((u32 *)src1) ^ + get_unaligned((u32 *)src2); + put_unaligned(l, (u32 *)dst); + } else { + *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2; + } + dst += 4; + src1 += 4; + src2 += 4; + len -= 4; + } + + while (len >= 2 && !(relalign & 1)) { + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + u16 l = get_unaligned((u16 *)src1) ^ + get_unaligned((u16 *)src2); + put_unaligned(l, (u16 *)dst); + } else { + *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2; + } + dst += 2; + src1 += 2; + src2 += 2; + len -= 2; + } + + while (len--) + *dst++ = *src1++ ^ *src2++; +} +EXPORT_SYMBOL_GPL(__crypto_xor); + +MODULE_LICENSE("GPL"); |