diff options
Diffstat (limited to 'arch/arm/lib/crypto/chacha-glue.c')
-rw-r--r-- | arch/arm/lib/crypto/chacha-glue.c | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c new file mode 100644 index 000000000000..88ec96415283 --- /dev/null +++ b/arch/arm/lib/crypto/chacha-glue.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ChaCha and HChaCha functions (ARM optimized) + * + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 Martin Willi + */ + +#include <crypto/chacha.h> +#include <crypto/internal/simd.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cputype.h> +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, unsigned int nbytes); +asmlinkage void hchacha_block_arm(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const struct chacha_state *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } + if (bytes) { + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); + state->x[12]++; + } +} + +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + /* We always can use at least the ARM scalar implementation. */ + return true; +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +static int __init chacha_arm_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + break; + default: + static_branch_enable(&use_neon); + } + } + return 0; +} +subsys_initcall(chacha_arm_mod_init); + +static void __exit chacha_arm_mod_exit(void) +{ +} +module_exit(chacha_arm_mod_exit); + +MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); |