diff options
Diffstat (limited to 'arch/mips/crypto')
-rw-r--r-- | arch/mips/crypto/Kconfig | 33 | ||||
-rw-r--r-- | arch/mips/crypto/Makefile | 17 | ||||
-rw-r--r-- | arch/mips/crypto/chacha-core.S | 497 | ||||
-rw-r--r-- | arch/mips/crypto/chacha-glue.c | 146 | ||||
-rw-r--r-- | arch/mips/crypto/poly1305-glue.c | 192 | ||||
-rw-r--r-- | arch/mips/crypto/poly1305-mips.pl | 1273 |
6 files changed, 0 insertions, 2158 deletions
diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 545fc0e12422..6bf073ae7613 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -2,17 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (mips)" -config CRYPTO_POLY1305_MIPS - tristate - depends on MIPS - select CRYPTO_HASH - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - default CRYPTO_LIB_POLY1305_INTERNAL - help - Poly1305 authenticator algorithm (RFC7539) - - Architecture: mips - config CRYPTO_MD5_OCTEON tristate "Digests: MD5 (OCTEON)" depends on CPU_CAVIUM_OCTEON @@ -33,16 +22,6 @@ config CRYPTO_SHA1_OCTEON Architecture: mips OCTEON -config CRYPTO_SHA256_OCTEON - tristate "Hash functions: SHA-224 and SHA-256 (OCTEON)" - depends on CPU_CAVIUM_OCTEON - select CRYPTO_SHA256 - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: mips OCTEON using crypto instructions, when available - config CRYPTO_SHA512_OCTEON tristate "Hash functions: SHA-384 and SHA-512 (OCTEON)" depends on CPU_CAVIUM_OCTEON @@ -53,16 +32,4 @@ config CRYPTO_SHA512_OCTEON Architecture: mips OCTEON using crypto instructions, when available -config CRYPTO_CHACHA_MIPS - tristate - depends on CPU_MIPS32_R2 - select CRYPTO_SKCIPHER - select CRYPTO_ARCH_HAVE_LIB_CHACHA - default CRYPTO_LIB_CHACHA_INTERNAL - help - Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 - stream cipher algorithms - - Architecture: MIPS32r2 - endmenu diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index fddc88281412..5adb631a69c1 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -3,20 +3,3 @@ # Makefile for MIPS crypto files.. # -obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o -chacha-mips-y := chacha-core.o chacha-glue.o -AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots - -obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o -poly1305-mips-y := poly1305-core.o poly1305-glue.o - -perlasm-flavour-$(CONFIG_32BIT) := o32 -perlasm-flavour-$(CONFIG_64BIT) := 64 - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) - -$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE - $(call if_changed,perlasm) - -targets += poly1305-core.S diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S deleted file mode 100644 index 5755f69cfe00..000000000000 --- a/arch/mips/crypto/chacha-core.S +++ /dev/null @@ -1,497 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - */ - -#define MASK_U32 0x3c -#define CHACHA20_BLOCK_SIZE 64 -#define STACK_SIZE 32 - -#define X0 $t0 -#define X1 $t1 -#define X2 $t2 -#define X3 $t3 -#define X4 $t4 -#define X5 $t5 -#define X6 $t6 -#define X7 $t7 -#define X8 $t8 -#define X9 $t9 -#define X10 $v1 -#define X11 $s6 -#define X12 $s5 -#define X13 $s4 -#define X14 $s3 -#define X15 $s2 -/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ -#define T0 $s1 -#define T1 $s0 -#define T(n) T ## n -#define X(n) X ## n - -/* Input arguments */ -#define STATE $a0 -#define OUT $a1 -#define IN $a2 -#define BYTES $a3 - -/* Output argument */ -/* NONCE[0] is kept in a register and not in memory. - * We don't want to touch original value in memory. - * Must be incremented every loop iteration. - */ -#define NONCE_0 $v0 - -/* SAVED_X and SAVED_CA are set in the jump table. - * Use regs which are overwritten on exit else we don't leak clear data. - * They are used to handling the last bytes which are not multiple of 4. - */ -#define SAVED_X X15 -#define SAVED_CA $s7 - -#define IS_UNALIGNED $s7 - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define MSB 0 -#define LSB 3 -#define ROTx rotl -#define ROTR(n) rotr n, 24 -#define CPU_TO_LE32(n) \ - wsbh n; \ - rotr n, 16; -#else -#define MSB 3 -#define LSB 0 -#define ROTx rotr -#define CPU_TO_LE32(n) -#define ROTR(n) -#endif - -#define FOR_EACH_WORD(x) \ - x( 0); \ - x( 1); \ - x( 2); \ - x( 3); \ - x( 4); \ - x( 5); \ - x( 6); \ - x( 7); \ - x( 8); \ - x( 9); \ - x(10); \ - x(11); \ - x(12); \ - x(13); \ - x(14); \ - x(15); - -#define FOR_EACH_WORD_REV(x) \ - x(15); \ - x(14); \ - x(13); \ - x(12); \ - x(11); \ - x(10); \ - x( 9); \ - x( 8); \ - x( 7); \ - x( 6); \ - x( 5); \ - x( 4); \ - x( 3); \ - x( 2); \ - x( 1); \ - x( 0); - -#define PLUS_ONE_0 1 -#define PLUS_ONE_1 2 -#define PLUS_ONE_2 3 -#define PLUS_ONE_3 4 -#define PLUS_ONE_4 5 -#define PLUS_ONE_5 6 -#define PLUS_ONE_6 7 -#define PLUS_ONE_7 8 -#define PLUS_ONE_8 9 -#define PLUS_ONE_9 10 -#define PLUS_ONE_10 11 -#define PLUS_ONE_11 12 -#define PLUS_ONE_12 13 -#define PLUS_ONE_13 14 -#define PLUS_ONE_14 15 -#define PLUS_ONE_15 16 -#define PLUS_ONE(x) PLUS_ONE_ ## x -#define _CONCAT3(a,b,c) a ## b ## c -#define CONCAT3(a,b,c) _CONCAT3(a,b,c) - -#define STORE_UNALIGNED(x) \ -CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ - lwl T1, (x*4)+MSB ## (IN); \ - lwr T1, (x*4)+LSB ## (IN); \ - .if (x == 12); \ - addu X ## x, NONCE_0; \ - .else; \ - addu X ## x, T0; \ - .endif; \ - CPU_TO_LE32(X ## x); \ - xor X ## x, T1; \ - swl X ## x, (x*4)+MSB ## (OUT); \ - swr X ## x, (x*4)+LSB ## (OUT); - -#define STORE_ALIGNED(x) \ -CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ - lw T1, (x*4) ## (IN); \ - .if (x == 12); \ - addu X ## x, NONCE_0; \ - .else; \ - addu X ## x, T0; \ - .endif; \ - CPU_TO_LE32(X ## x); \ - xor X ## x, T1; \ - sw X ## x, (x*4) ## (OUT); - -/* Jump table macro. - * Used for setup and handling the last bytes, which are not multiple of 4. - * X15 is free to store Xn - * Every jumptable entry must be equal in size. - */ -#define JMPTBL_ALIGNED(x) \ -.Lchacha_mips_jmptbl_aligned_ ## x: ; \ - .set noreorder; \ - b .Lchacha_mips_xor_aligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ - addu SAVED_X, X ## x, SAVED_CA; \ - .endif; \ - .set reorder - -#define JMPTBL_UNALIGNED(x) \ -.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ - .set noreorder; \ - b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ - addu SAVED_X, X ## x, SAVED_CA; \ - .endif; \ - .set reorder - -#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ - addu X(A), X(K); \ - addu X(B), X(L); \ - addu X(C), X(M); \ - addu X(D), X(N); \ - xor X(V), X(A); \ - xor X(W), X(B); \ - xor X(Y), X(C); \ - xor X(Z), X(D); \ - rotl X(V), S; \ - rotl X(W), S; \ - rotl X(Y), S; \ - rotl X(Z), S; - -.text -.set reorder -.set noat -.globl chacha_crypt_arch -.ent chacha_crypt_arch -chacha_crypt_arch: - .frame $sp, STACK_SIZE, $ra - - /* Load number of rounds */ - lw $at, 16($sp) - - addiu $sp, -STACK_SIZE - - /* Return bytes = 0. */ - beqz BYTES, .Lchacha_mips_end - - lw NONCE_0, 48(STATE) - - /* Save s0-s7 */ - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - sw $s4, 16($sp) - sw $s5, 20($sp) - sw $s6, 24($sp) - sw $s7, 28($sp) - - /* Test IN or OUT is unaligned. - * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 - */ - or IS_UNALIGNED, IN, OUT - andi IS_UNALIGNED, 0x3 - - b .Lchacha_rounds_start - -.align 4 -.Loop_chacha_rounds: - addiu IN, CHACHA20_BLOCK_SIZE - addiu OUT, CHACHA20_BLOCK_SIZE - addiu NONCE_0, 1 - -.Lchacha_rounds_start: - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) - lw X3, 12(STATE) - - lw X4, 16(STATE) - lw X5, 20(STATE) - lw X6, 24(STATE) - lw X7, 28(STATE) - lw X8, 32(STATE) - lw X9, 36(STATE) - lw X10, 40(STATE) - lw X11, 44(STATE) - - move X12, NONCE_0 - lw X13, 52(STATE) - lw X14, 56(STATE) - lw X15, 60(STATE) - -.Loop_chacha_xor_rounds: - addiu $at, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); - bnez $at, .Loop_chacha_xor_rounds - - addiu BYTES, -(CHACHA20_BLOCK_SIZE) - - /* Is data src/dst unaligned? Jump */ - bnez IS_UNALIGNED, .Loop_chacha_unaligned - - /* Set number rounds here to fill delayslot. */ - lw $at, (STACK_SIZE+16)($sp) - - /* BYTES < 0, it has no full block. */ - bltz BYTES, .Lchacha_mips_no_full_block_aligned - - FOR_EACH_WORD_REV(STORE_ALIGNED) - - /* BYTES > 0? Loop again. */ - bgtz BYTES, .Loop_chacha_rounds - - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - - /* BYTES < 0? Handle last bytes */ - bltz BYTES, .Lchacha_mips_xor_bytes - -.Lchacha_mips_xor_done: - /* Restore used registers */ - lw $s0, 0($sp) - lw $s1, 4($sp) - lw $s2, 8($sp) - lw $s3, 12($sp) - lw $s4, 16($sp) - lw $s5, 20($sp) - lw $s6, 24($sp) - lw $s7, 28($sp) - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - -.Lchacha_mips_end: - addiu $sp, STACK_SIZE - jr $ra - -.Lchacha_mips_no_full_block_aligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - - /* Get number of full WORDS */ - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ - lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 - - /* Add offset to STATE */ - addu T1, STATE, $at - - /* Add lower half jump table addr */ - addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) - - /* Store remaining bytecounter as negative value */ - subu BYTES, $at, BYTES - - jr T0 - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_ALIGNED) - - -.Loop_chacha_unaligned: - /* Set number rounds here to fill delayslot. */ - lw $at, (STACK_SIZE+16)($sp) - - /* BYTES > 0, it has no full block. */ - bltz BYTES, .Lchacha_mips_no_full_block_unaligned - - FOR_EACH_WORD_REV(STORE_UNALIGNED) - - /* BYTES > 0? Loop again. */ - bgtz BYTES, .Loop_chacha_rounds - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - - .set noreorder - /* Fall through to byte handling */ - bgez BYTES, .Lchacha_mips_xor_done -.Lchacha_mips_xor_unaligned_0_b: -.Lchacha_mips_xor_aligned_0_b: - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - .set reorder - -.Lchacha_mips_xor_bytes: - addu IN, $at - addu OUT, $at - /* First byte */ - lbu T1, 0(IN) - addiu $at, BYTES, 1 - CPU_TO_LE32(SAVED_X) - ROTR(SAVED_X) - xor T1, SAVED_X - sb T1, 0(OUT) - beqz $at, .Lchacha_mips_xor_done - /* Second byte */ - lbu T1, 1(IN) - addiu $at, BYTES, 2 - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 1(OUT) - beqz $at, .Lchacha_mips_xor_done - /* Third byte */ - lbu T1, 2(IN) - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 2(OUT) - b .Lchacha_mips_xor_done - -.Lchacha_mips_no_full_block_unaligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - - /* Get number of full WORDS */ - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ - lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 - - /* Add offset to STATE */ - addu T1, STATE, $at - - /* Add lower half jump table addr */ - addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) - - /* Store remaining bytecounter as negative value */ - subu BYTES, $at, BYTES - - jr T0 - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_UNALIGNED) -.end chacha_crypt_arch -.set at - -/* Input arguments - * STATE $a0 - * OUT $a1 - * NROUND $a2 - */ - -#undef X12 -#undef X13 -#undef X14 -#undef X15 - -#define X12 $a3 -#define X13 $at -#define X14 $v0 -#define X15 STATE - -.set noat -.globl hchacha_block_arch -.ent hchacha_block_arch -hchacha_block_arch: - .frame $sp, STACK_SIZE, $ra - - addiu $sp, -STACK_SIZE - - /* Save X11(s6) */ - sw X11, 0($sp) - - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) - lw X3, 12(STATE) - lw X4, 16(STATE) - lw X5, 20(STATE) - lw X6, 24(STATE) - lw X7, 28(STATE) - lw X8, 32(STATE) - lw X9, 36(STATE) - lw X10, 40(STATE) - lw X11, 44(STATE) - lw X12, 48(STATE) - lw X13, 52(STATE) - lw X14, 56(STATE) - lw X15, 60(STATE) - -.Loop_hchacha_xor_rounds: - addiu $a2, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); - bnez $a2, .Loop_hchacha_xor_rounds - - /* Restore used register */ - lw X11, 0($sp) - - sw X0, 0(OUT) - sw X1, 4(OUT) - sw X2, 8(OUT) - sw X3, 12(OUT) - sw X12, 16(OUT) - sw X13, 20(OUT) - sw X14, 24(OUT) - sw X15, 28(OUT) - - addiu $sp, STACK_SIZE - jr $ra -.end hchacha_block_arch -.set at diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c deleted file mode 100644 index f6fc2e1079a1..000000000000 --- a/arch/mips/crypto/chacha-glue.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * MIPS accelerated ChaCha and XChaCha stream ciphers, - * including ChaCha20 (RFC7539) - * - * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <asm/byteorder.h> -#include <crypto/algapi.h> -#include <crypto/internal/chacha.h> -#include <crypto/internal/skcipher.h> -#include <linux/kernel.h> -#include <linux/module.h> - -asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds); -EXPORT_SYMBOL(chacha_crypt_arch); - -asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds); -EXPORT_SYMBOL(hchacha_block_arch); - -static int chacha_mips_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) -{ - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - chacha_init(state, ctx->key, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes, ctx->nrounds); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static int chacha_mips(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - return chacha_mips_stream_xor(req, ctx, req->iv); -} - -static int xchacha_mips(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha_ctx subctx; - u32 state[16]; - u8 real_iv[16]; - - chacha_init(state, ctx->key, req->iv); - - hchacha_block(state, subctx.key, ctx->nrounds); - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); - memcpy(&real_iv[8], req->iv + 16, 8); - return chacha_mips_stream_xor(req, &subctx, real_iv); -} - -static struct skcipher_alg algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-mips", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha_mips, - .decrypt = chacha_mips, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-mips", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = xchacha_mips, - .decrypt = xchacha_mips, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-mips", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha12_setkey, - .encrypt = xchacha_mips, - .decrypt = xchacha_mips, - } -}; - -static int __init chacha_simd_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? - crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; -} - -static void __exit chacha_simd_mod_fini(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -} - -module_init(chacha_simd_mod_init); -module_exit(chacha_simd_mod_fini); - -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-mips"); -MODULE_ALIAS_CRYPTO("xchacha20"); -MODULE_ALIAS_CRYPTO("xchacha20-mips"); -MODULE_ALIAS_CRYPTO("xchacha12"); -MODULE_ALIAS_CRYPTO("xchacha12-mips"); diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c deleted file mode 100644 index c03ad0bbe69c..000000000000 --- a/arch/mips/crypto/poly1305-glue.c +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS - * - * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <linux/unaligned.h> -#include <crypto/algapi.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/poly1305.h> -#include <linux/cpufeature.h> -#include <linux/crypto.h> -#include <linux/module.h> - -asmlinkage void poly1305_init_mips(void *state, const u8 *key); -asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); - -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) -{ - poly1305_init_mips(&dctx->h, key); - dctx->s[0] = get_unaligned_le32(key + 16); - dctx->s[1] = get_unaligned_le32(key + 20); - dctx->s[2] = get_unaligned_le32(key + 24); - dctx->s[3] = get_unaligned_le32(key + 28); - dctx->buflen = 0; -} -EXPORT_SYMBOL(poly1305_init_arch); - -static int mips_poly1305_init(struct shash_desc *desc) -{ - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->buflen = 0; - dctx->rset = 0; - dctx->sset = false; - - return 0; -} - -static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, - u32 len, u32 hibit) -{ - if (unlikely(!dctx->sset)) { - if (!dctx->rset) { - poly1305_init_mips(&dctx->h, src); - src += POLY1305_BLOCK_SIZE; - len -= POLY1305_BLOCK_SIZE; - dctx->rset = 1; - } - if (len >= POLY1305_BLOCK_SIZE) { - dctx->s[0] = get_unaligned_le32(src + 0); - dctx->s[1] = get_unaligned_le32(src + 4); - dctx->s[2] = get_unaligned_le32(src + 8); - dctx->s[3] = get_unaligned_le32(src + 12); - src += POLY1305_BLOCK_SIZE; - len -= POLY1305_BLOCK_SIZE; - dctx->sset = true; - } - if (len < POLY1305_BLOCK_SIZE) - return; - } - - len &= ~(POLY1305_BLOCK_SIZE - 1); - - poly1305_blocks_mips(&dctx->h, src, len, hibit); -} - -static int mips_poly1305_update(struct shash_desc *desc, const u8 *src, - unsigned int len) -{ - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - if (unlikely(dctx->buflen)) { - u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - len -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(len >= POLY1305_BLOCK_SIZE)) { - mips_poly1305_blocks(dctx, src, len, 1); - src += round_down(len, POLY1305_BLOCK_SIZE); - len %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(len)) { - dctx->buflen = len; - memcpy(dctx->buf, src, len); - } - return 0; -} - -void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int nbytes) -{ - if (unlikely(dctx->buflen)) { - u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); - - memcpy(dctx->buf + dctx->buflen, src, bytes); - src += bytes; - nbytes -= bytes; - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks_mips(&dctx->h, dctx->buf, - POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - poly1305_blocks_mips(&dctx->h, src, len, 1); - src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - - if (unlikely(nbytes)) { - dctx->buflen = nbytes; - memcpy(dctx->buf, src, nbytes); - } -} -EXPORT_SYMBOL(poly1305_update_arch); - -void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -{ - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); - poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - - poly1305_emit_mips(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; -} -EXPORT_SYMBOL(poly1305_final_arch); - -static int mips_poly1305_final(struct shash_desc *desc, u8 *dst) -{ - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - - if (unlikely(!dctx->sset)) - return -ENOKEY; - - poly1305_final_arch(dctx, dst); - return 0; -} - -static struct shash_alg mips_poly1305_alg = { - .init = mips_poly1305_init, - .update = mips_poly1305_update, - .final = mips_poly1305_final, - .digestsize = POLY1305_DIGEST_SIZE, - .descsize = sizeof(struct poly1305_desc_ctx), - - .base.cra_name = "poly1305", - .base.cra_driver_name = "poly1305-mips", - .base.cra_priority = 200, - .base.cra_blocksize = POLY1305_BLOCK_SIZE, - .base.cra_module = THIS_MODULE, -}; - -static int __init mips_poly1305_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shash(&mips_poly1305_alg) : 0; -} - -static void __exit mips_poly1305_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) - crypto_unregister_shash(&mips_poly1305_alg); -} - -module_init(mips_poly1305_mod_init); -module_exit(mips_poly1305_mod_exit); - -MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("poly1305"); -MODULE_ALIAS_CRYPTO("poly1305-mips"); diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl deleted file mode 100644 index b05bab884ed2..000000000000 --- a/arch/mips/crypto/poly1305-mips.pl +++ /dev/null @@ -1,1273 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause -# -# ==================================================================== -# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL -# project. -# ==================================================================== - -# Poly1305 hash for MIPS. -# -# May 2016 -# -# Numbers are cycles per processed byte with poly1305_blocks alone. -# -# IALU/gcc -# R1x000 ~5.5/+130% (big-endian) -# Octeon II 2.50/+70% (little-endian) -# -# March 2019 -# -# Add 32-bit code path. -# -# October 2019 -# -# Modulo-scheduling reduction allows to omit dependency chain at the -# end of inner loop and improve performance. Also optimize MIPS32R2 -# code path for MIPS 1004K core. Per René von Dorst's suggestions. -# -# IALU/gcc -# R1x000 ~9.8/? (big-endian) -# Octeon II 3.65/+140% (little-endian) -# MT7621/1004K 4.75/? (little-endian) -# -###################################################################### -# There is a number of MIPS ABI in use, O32 and N32/64 are most -# widely used. Then there is a new contender: NUBI. It appears that if -# one picks the latter, it's possible to arrange code in ABI neutral -# manner. Therefore let's stick to NUBI register layout: -# -($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -# -# The return value is placed in $a0. Following coding rules facilitate -# interoperability: -# -# - never ever touch $tp, "thread pointer", former $gp [o32 can be -# excluded from the rule, because it's specified volatile]; -# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -# old code]; -# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -# -# For reference here is register layout for N32/64 MIPS ABIs: -# -# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -# -# <appro@openssl.org> -# -###################################################################### - -$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 - -$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; - -if ($flavour =~ /64|n32/i) {{{ -###################################################################### -# 64-bit code path -# - -my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); -my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); - -$code.=<<___; -#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ - defined(_MIPS_ARCH_MIPS64R6)) \\ - && !defined(_MIPS_ARCH_MIPS64R2) -# define _MIPS_ARCH_MIPS64R2 -#endif - -#if defined(_MIPS_ARCH_MIPS64R6) -# define dmultu(rs,rt) -# define mflo(rd,rs,rt) dmulu rd,rs,rt -# define mfhi(rd,rs,rt) dmuhu rd,rs,rt -#else -# define dmultu(rs,rt) dmultu rs,rt -# define mflo(rd,rs,rt) mflo rd -# define mfhi(rd,rs,rt) mfhi rd -#endif - -#ifdef __KERNEL__ -# define poly1305_init poly1305_init_mips -# define poly1305_blocks poly1305_blocks_mips -# define poly1305_emit poly1305_emit_mips -#endif - -#if defined(__MIPSEB__) && !defined(MIPSEB) -# define MIPSEB -#endif - -#ifdef MIPSEB -# define MSB 0 -# define LSB 7 -#else -# define MSB 7 -# define LSB 0 -#endif - -.text -.set noat -.set noreorder - -.align 5 -.globl poly1305_init -.ent poly1305_init -poly1305_init: - .frame $sp,0,$ra - .set reorder - - sd $zero,0($ctx) - sd $zero,8($ctx) - sd $zero,16($ctx) - - beqz $inp,.Lno_key - -#if defined(_MIPS_ARCH_MIPS64R6) - andi $tmp0,$inp,7 # $inp % 8 - dsubu $inp,$inp,$tmp0 # align $inp - sll $tmp0,$tmp0,3 # byte to bit offset - ld $in0,0($inp) - ld $in1,8($inp) - beqz $tmp0,.Laligned_key - ld $tmp2,16($inp) - - subu $tmp1,$zero,$tmp0 -# ifdef MIPSEB - dsllv $in0,$in0,$tmp0 - dsrlv $tmp3,$in1,$tmp1 - dsllv $in1,$in1,$tmp0 - dsrlv $tmp2,$tmp2,$tmp1 -# else - dsrlv $in0,$in0,$tmp0 - dsllv $tmp3,$in1,$tmp1 - dsrlv $in1,$in1,$tmp0 - dsllv $tmp2,$tmp2,$tmp1 -# endif - or $in0,$in0,$tmp3 - or $in1,$in1,$tmp2 -.Laligned_key: -#else - ldl $in0,0+MSB($inp) - ldl $in1,8+MSB($inp) - ldr $in0,0+LSB($inp) - ldr $in1,8+LSB($inp) -#endif -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS64R2) - dsbh $in0,$in0 # byte swap - dsbh $in1,$in1 - dshd $in0,$in0 - dshd $in1,$in1 -# else - ori $tmp0,$zero,0xFF - dsll $tmp2,$tmp0,32 - or $tmp0,$tmp2 # 0x000000FF000000FF - - and $tmp1,$in0,$tmp0 # byte swap - and $tmp3,$in1,$tmp0 - dsrl $tmp2,$in0,24 - dsrl $tmp4,$in1,24 - dsll $tmp1,24 - dsll $tmp3,24 - and $tmp2,$tmp0 - and $tmp4,$tmp0 - dsll $tmp0,8 # 0x0000FF000000FF00 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - and $tmp2,$in0,$tmp0 - and $tmp4,$in1,$tmp0 - dsrl $in0,8 - dsrl $in1,8 - dsll $tmp2,8 - dsll $tmp4,8 - and $in0,$tmp0 - and $in1,$tmp0 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - or $in0,$tmp1 - or $in1,$tmp3 - dsrl $tmp1,$in0,32 - dsrl $tmp3,$in1,32 - dsll $in0,32 - dsll $in1,32 - or $in0,$tmp1 - or $in1,$tmp3 -# endif -#endif - li $tmp0,1 - dsll $tmp0,32 # 0x0000000100000000 - daddiu $tmp0,-63 # 0x00000000ffffffc1 - dsll $tmp0,28 # 0x0ffffffc10000000 - daddiu $tmp0,-1 # 0x0ffffffc0fffffff - - and $in0,$tmp0 - daddiu $tmp0,-3 # 0x0ffffffc0ffffffc - and $in1,$tmp0 - - sd $in0,24($ctx) - dsrl $tmp0,$in1,2 - sd $in1,32($ctx) - daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) - sd $tmp0,40($ctx) - -.Lno_key: - li $v0,0 # return 0 - jr $ra -.end poly1305_init -___ -{ -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; - -my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = - ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); -my ($shr,$shl) = ($s6,$s7); # used on R6 - -$code.=<<___; -.align 5 -.globl poly1305_blocks -.ent poly1305_blocks -poly1305_blocks: - .set noreorder - dsrl $len,4 # number of complete blocks - bnez $len,poly1305_blocks_internal - nop - jr $ra - nop -.end poly1305_blocks - -.align 5 -.ent poly1305_blocks_internal -poly1305_blocks_internal: - .set noreorder -#if defined(_MIPS_ARCH_MIPS64R6) - .frame $sp,8*8,$ra - .mask $SAVED_REGS_MASK|0x000c0000,-8 - dsubu $sp,8*8 - sd $s7,56($sp) - sd $s6,48($sp) -#else - .frame $sp,6*8,$ra - .mask $SAVED_REGS_MASK,-8 - dsubu $sp,6*8 -#endif - sd $s5,40($sp) - sd $s4,32($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - sd $s3,24($sp) - sd $s2,16($sp) - sd $s1,8($sp) - sd $s0,0($sp) -___ -$code.=<<___; - .set reorder - -#if defined(_MIPS_ARCH_MIPS64R6) - andi $shr,$inp,7 - dsubu $inp,$inp,$shr # align $inp - sll $shr,$shr,3 # byte to bit offset - subu $shl,$zero,$shr -#endif - - ld $h0,0($ctx) # load hash value - ld $h1,8($ctx) - ld $h2,16($ctx) - - ld $r0,24($ctx) # load key - ld $r1,32($ctx) - ld $rs1,40($ctx) - - dsll $len,4 - daddu $len,$inp # end of buffer - b .Loop - -.align 4 -.Loop: -#if defined(_MIPS_ARCH_MIPS64R6) - ld $in0,0($inp) # load input - ld $in1,8($inp) - beqz $shr,.Laligned_inp - - ld $tmp2,16($inp) -# ifdef MIPSEB - dsllv $in0,$in0,$shr - dsrlv $tmp3,$in1,$shl - dsllv $in1,$in1,$shr - dsrlv $tmp2,$tmp2,$shl -# else - dsrlv $in0,$in0,$shr - dsllv $tmp3,$in1,$shl - dsrlv $in1,$in1,$shr - dsllv $tmp2,$tmp2,$shl -# endif - or $in0,$in0,$tmp3 - or $in1,$in1,$tmp2 -.Laligned_inp: -#else - ldl $in0,0+MSB($inp) # load input - ldl $in1,8+MSB($inp) - ldr $in0,0+LSB($inp) - ldr $in1,8+LSB($inp) -#endif - daddiu $inp,16 -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS64R2) - dsbh $in0,$in0 # byte swap - dsbh $in1,$in1 - dshd $in0,$in0 - dshd $in1,$in1 -# else - ori $tmp0,$zero,0xFF - dsll $tmp2,$tmp0,32 - or $tmp0,$tmp2 # 0x000000FF000000FF - - and $tmp1,$in0,$tmp0 # byte swap - and $tmp3,$in1,$tmp0 - dsrl $tmp2,$in0,24 - dsrl $tmp4,$in1,24 - dsll $tmp1,24 - dsll $tmp3,24 - and $tmp2,$tmp0 - and $tmp4,$tmp0 - dsll $tmp0,8 # 0x0000FF000000FF00 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - and $tmp2,$in0,$tmp0 - and $tmp4,$in1,$tmp0 - dsrl $in0,8 - dsrl $in1,8 - dsll $tmp2,8 - dsll $tmp4,8 - and $in0,$tmp0 - and $in1,$tmp0 - or $tmp1,$tmp2 - or $tmp3,$tmp4 - or $in0,$tmp1 - or $in1,$tmp3 - dsrl $tmp1,$in0,32 - dsrl $tmp3,$in1,32 - dsll $in0,32 - dsll $in1,32 - or $in0,$tmp1 - or $in1,$tmp3 -# endif -#endif - dsrl $tmp1,$h2,2 # modulo-scheduled reduction - andi $h2,$h2,3 - dsll $tmp0,$tmp1,2 - - daddu $d0,$h0,$in0 # accumulate input - daddu $tmp1,$tmp0 - sltu $tmp0,$d0,$h0 - daddu $d0,$d0,$tmp1 # ... and residue - sltu $tmp1,$d0,$tmp1 - daddu $d1,$h1,$in1 - daddu $tmp0,$tmp1 - sltu $tmp1,$d1,$h1 - daddu $d1,$tmp0 - - dmultu ($r0,$d0) # h0*r0 - daddu $d2,$h2,$padbit - sltu $tmp0,$d1,$tmp0 - mflo ($h0,$r0,$d0) - mfhi ($h1,$r0,$d0) - - dmultu ($rs1,$d1) # h1*5*r1 - daddu $d2,$tmp1 - daddu $d2,$tmp0 - mflo ($tmp0,$rs1,$d1) - mfhi ($tmp1,$rs1,$d1) - - dmultu ($r1,$d0) # h0*r1 - mflo ($tmp2,$r1,$d0) - mfhi ($h2,$r1,$d0) - daddu $h0,$tmp0 - daddu $h1,$tmp1 - sltu $tmp0,$h0,$tmp0 - - dmultu ($r0,$d1) # h1*r0 - daddu $h1,$tmp0 - daddu $h1,$tmp2 - mflo ($tmp0,$r0,$d1) - mfhi ($tmp1,$r0,$d1) - - dmultu ($rs1,$d2) # h2*5*r1 - sltu $tmp2,$h1,$tmp2 - daddu $h2,$tmp2 - mflo ($tmp2,$rs1,$d2) - - dmultu ($r0,$d2) # h2*r0 - daddu $h1,$tmp0 - daddu $h2,$tmp1 - mflo ($tmp3,$r0,$d2) - sltu $tmp0,$h1,$tmp0 - daddu $h2,$tmp0 - - daddu $h1,$tmp2 - sltu $tmp2,$h1,$tmp2 - daddu $h2,$tmp2 - daddu $h2,$tmp3 - - bne $inp,$len,.Loop - - sd $h0,0($ctx) # store hash value - sd $h1,8($ctx) - sd $h2,16($ctx) - - .set noreorder -#if defined(_MIPS_ARCH_MIPS64R6) - ld $s7,56($sp) - ld $s6,48($sp) -#endif - ld $s5,40($sp) # epilogue - ld $s4,32($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue - ld $s3,24($sp) - ld $s2,16($sp) - ld $s1,8($sp) - ld $s0,0($sp) -___ -$code.=<<___; - jr $ra -#if defined(_MIPS_ARCH_MIPS64R6) - daddu $sp,8*8 -#else - daddu $sp,6*8 -#endif -.end poly1305_blocks_internal -___ -} -{ -my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); - -$code.=<<___; -.align 5 -.globl poly1305_emit -.ent poly1305_emit -poly1305_emit: - .frame $sp,0,$ra - .set reorder - - ld $tmp2,16($ctx) - ld $tmp0,0($ctx) - ld $tmp1,8($ctx) - - li $in0,-4 # final reduction - dsrl $in1,$tmp2,2 - and $in0,$tmp2 - andi $tmp2,$tmp2,3 - daddu $in0,$in1 - - daddu $tmp0,$tmp0,$in0 - sltu $in1,$tmp0,$in0 - daddiu $in0,$tmp0,5 # compare to modulus - daddu $tmp1,$tmp1,$in1 - sltiu $tmp3,$in0,5 - sltu $tmp4,$tmp1,$in1 - daddu $in1,$tmp1,$tmp3 - daddu $tmp2,$tmp2,$tmp4 - sltu $tmp3,$in1,$tmp3 - daddu $tmp2,$tmp2,$tmp3 - - dsrl $tmp2,2 # see if it carried/borrowed - dsubu $tmp2,$zero,$tmp2 - - xor $in0,$tmp0 - xor $in1,$tmp1 - and $in0,$tmp2 - and $in1,$tmp2 - xor $in0,$tmp0 - xor $in1,$tmp1 - - lwu $tmp0,0($nonce) # load nonce - lwu $tmp1,4($nonce) - lwu $tmp2,8($nonce) - lwu $tmp3,12($nonce) - dsll $tmp1,32 - dsll $tmp3,32 - or $tmp0,$tmp1 - or $tmp2,$tmp3 - - daddu $in0,$tmp0 # accumulate nonce - daddu $in1,$tmp2 - sltu $tmp0,$in0,$tmp0 - daddu $in1,$tmp0 - - dsrl $tmp0,$in0,8 # write mac value - dsrl $tmp1,$in0,16 - dsrl $tmp2,$in0,24 - sb $in0,0($mac) - dsrl $tmp3,$in0,32 - sb $tmp0,1($mac) - dsrl $tmp0,$in0,40 - sb $tmp1,2($mac) - dsrl $tmp1,$in0,48 - sb $tmp2,3($mac) - dsrl $tmp2,$in0,56 - sb $tmp3,4($mac) - dsrl $tmp3,$in1,8 - sb $tmp0,5($mac) - dsrl $tmp0,$in1,16 - sb $tmp1,6($mac) - dsrl $tmp1,$in1,24 - sb $tmp2,7($mac) - - sb $in1,8($mac) - dsrl $tmp2,$in1,32 - sb $tmp3,9($mac) - dsrl $tmp3,$in1,40 - sb $tmp0,10($mac) - dsrl $tmp0,$in1,48 - sb $tmp1,11($mac) - dsrl $tmp1,$in1,56 - sb $tmp2,12($mac) - sb $tmp3,13($mac) - sb $tmp0,14($mac) - sb $tmp1,15($mac) - - jr $ra -.end poly1305_emit -.rdata -.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" -.align 2 -___ -} -}}} else {{{ -###################################################################### -# 32-bit code path -# - -my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); -my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = - ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); - -$code.=<<___; -#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ - defined(_MIPS_ARCH_MIPS32R6)) \\ - && !defined(_MIPS_ARCH_MIPS32R2) -# define _MIPS_ARCH_MIPS32R2 -#endif - -#if defined(_MIPS_ARCH_MIPS32R6) -# define multu(rs,rt) -# define mflo(rd,rs,rt) mulu rd,rs,rt -# define mfhi(rd,rs,rt) muhu rd,rs,rt -#else -# define multu(rs,rt) multu rs,rt -# define mflo(rd,rs,rt) mflo rd -# define mfhi(rd,rs,rt) mfhi rd -#endif - -#ifdef __KERNEL__ -# define poly1305_init poly1305_init_mips -# define poly1305_blocks poly1305_blocks_mips -# define poly1305_emit poly1305_emit_mips -#endif - -#if defined(__MIPSEB__) && !defined(MIPSEB) -# define MIPSEB -#endif - -#ifdef MIPSEB -# define MSB 0 -# define LSB 3 -#else -# define MSB 3 -# define LSB 0 -#endif - -.text -.set noat -.set noreorder - -.align 5 -.globl poly1305_init -.ent poly1305_init -poly1305_init: - .frame $sp,0,$ra - .set reorder - - sw $zero,0($ctx) - sw $zero,4($ctx) - sw $zero,8($ctx) - sw $zero,12($ctx) - sw $zero,16($ctx) - - beqz $inp,.Lno_key - -#if defined(_MIPS_ARCH_MIPS32R6) - andi $tmp0,$inp,3 # $inp % 4 - subu $inp,$inp,$tmp0 # align $inp - sll $tmp0,$tmp0,3 # byte to bit offset - lw $in0,0($inp) - lw $in1,4($inp) - lw $in2,8($inp) - lw $in3,12($inp) - beqz $tmp0,.Laligned_key - - lw $tmp2,16($inp) - subu $tmp1,$zero,$tmp0 -# ifdef MIPSEB - sllv $in0,$in0,$tmp0 - srlv $tmp3,$in1,$tmp1 - sllv $in1,$in1,$tmp0 - or $in0,$in0,$tmp3 - srlv $tmp3,$in2,$tmp1 - sllv $in2,$in2,$tmp0 - or $in1,$in1,$tmp3 - srlv $tmp3,$in3,$tmp1 - sllv $in3,$in3,$tmp0 - or $in2,$in2,$tmp3 - srlv $tmp2,$tmp2,$tmp1 - or $in3,$in3,$tmp2 -# else - srlv $in0,$in0,$tmp0 - sllv $tmp3,$in1,$tmp1 - srlv $in1,$in1,$tmp0 - or $in0,$in0,$tmp3 - sllv $tmp3,$in2,$tmp1 - srlv $in2,$in2,$tmp0 - or $in1,$in1,$tmp3 - sllv $tmp3,$in3,$tmp1 - srlv $in3,$in3,$tmp0 - or $in2,$in2,$tmp3 - sllv $tmp2,$tmp2,$tmp1 - or $in3,$in3,$tmp2 -# endif -.Laligned_key: -#else - lwl $in0,0+MSB($inp) - lwl $in1,4+MSB($inp) - lwl $in2,8+MSB($inp) - lwl $in3,12+MSB($inp) - lwr $in0,0+LSB($inp) - lwr $in1,4+LSB($inp) - lwr $in2,8+LSB($inp) - lwr $in3,12+LSB($inp) -#endif -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS32R2) - wsbh $in0,$in0 # byte swap - wsbh $in1,$in1 - wsbh $in2,$in2 - wsbh $in3,$in3 - rotr $in0,$in0,16 - rotr $in1,$in1,16 - rotr $in2,$in2,16 - rotr $in3,$in3,16 -# else - srl $tmp0,$in0,24 # byte swap - srl $tmp1,$in0,8 - andi $tmp2,$in0,0xFF00 - sll $in0,$in0,24 - andi $tmp1,0xFF00 - sll $tmp2,$tmp2,8 - or $in0,$tmp0 - srl $tmp0,$in1,24 - or $tmp1,$tmp2 - srl $tmp2,$in1,8 - or $in0,$tmp1 - andi $tmp1,$in1,0xFF00 - sll $in1,$in1,24 - andi $tmp2,0xFF00 - sll $tmp1,$tmp1,8 - or $in1,$tmp0 - srl $tmp0,$in2,24 - or $tmp2,$tmp1 - srl $tmp1,$in2,8 - or $in1,$tmp2 - andi $tmp2,$in2,0xFF00 - sll $in2,$in2,24 - andi $tmp1,0xFF00 - sll $tmp2,$tmp2,8 - or $in2,$tmp0 - srl $tmp0,$in3,24 - or $tmp1,$tmp2 - srl $tmp2,$in3,8 - or $in2,$tmp1 - andi $tmp1,$in3,0xFF00 - sll $in3,$in3,24 - andi $tmp2,0xFF00 - sll $tmp1,$tmp1,8 - or $in3,$tmp0 - or $tmp2,$tmp1 - or $in3,$tmp2 -# endif -#endif - lui $tmp0,0x0fff - ori $tmp0,0xffff # 0x0fffffff - and $in0,$in0,$tmp0 - subu $tmp0,3 # 0x0ffffffc - and $in1,$in1,$tmp0 - and $in2,$in2,$tmp0 - and $in3,$in3,$tmp0 - - sw $in0,20($ctx) - sw $in1,24($ctx) - sw $in2,28($ctx) - sw $in3,32($ctx) - - srl $tmp1,$in1,2 - srl $tmp2,$in2,2 - srl $tmp3,$in3,2 - addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) - addu $in2,$in2,$tmp2 - addu $in3,$in3,$tmp3 - sw $in1,36($ctx) - sw $in2,40($ctx) - sw $in3,44($ctx) -.Lno_key: - li $v0,0 - jr $ra -.end poly1305_init -___ -{ -my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; - -my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = - ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); -my ($d0,$d1,$d2,$d3) = - ($a4,$a5,$a6,$a7); -my $shr = $t2; # used on R6 -my $one = $t2; # used on R2 - -$code.=<<___; -.globl poly1305_blocks -.align 5 -.ent poly1305_blocks -poly1305_blocks: - .frame $sp,16*4,$ra - .mask $SAVED_REGS_MASK,-4 - .set noreorder - subu $sp, $sp,4*12 - sw $s11,4*11($sp) - sw $s10,4*10($sp) - sw $s9, 4*9($sp) - sw $s8, 4*8($sp) - sw $s7, 4*7($sp) - sw $s6, 4*6($sp) - sw $s5, 4*5($sp) - sw $s4, 4*4($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - sw $s3, 4*3($sp) - sw $s2, 4*2($sp) - sw $s1, 4*1($sp) - sw $s0, 4*0($sp) -___ -$code.=<<___; - .set reorder - - srl $len,4 # number of complete blocks - li $one,1 - beqz $len,.Labort - -#if defined(_MIPS_ARCH_MIPS32R6) - andi $shr,$inp,3 - subu $inp,$inp,$shr # align $inp - sll $shr,$shr,3 # byte to bit offset -#endif - - lw $h0,0($ctx) # load hash value - lw $h1,4($ctx) - lw $h2,8($ctx) - lw $h3,12($ctx) - lw $h4,16($ctx) - - lw $r0,20($ctx) # load key - lw $r1,24($ctx) - lw $r2,28($ctx) - lw $r3,32($ctx) - lw $rs1,36($ctx) - lw $rs2,40($ctx) - lw $rs3,44($ctx) - - sll $len,4 - addu $len,$len,$inp # end of buffer - b .Loop - -.align 4 -.Loop: -#if defined(_MIPS_ARCH_MIPS32R6) - lw $d0,0($inp) # load input - lw $d1,4($inp) - lw $d2,8($inp) - lw $d3,12($inp) - beqz $shr,.Laligned_inp - - lw $t0,16($inp) - subu $t1,$zero,$shr -# ifdef MIPSEB - sllv $d0,$d0,$shr - srlv $at,$d1,$t1 - sllv $d1,$d1,$shr - or $d0,$d0,$at - srlv $at,$d2,$t1 - sllv $d2,$d2,$shr - or $d1,$d1,$at - srlv $at,$d3,$t1 - sllv $d3,$d3,$shr - or $d2,$d2,$at - srlv $t0,$t0,$t1 - or $d3,$d3,$t0 -# else - srlv $d0,$d0,$shr - sllv $at,$d1,$t1 - srlv $d1,$d1,$shr - or $d0,$d0,$at - sllv $at,$d2,$t1 - srlv $d2,$d2,$shr - or $d1,$d1,$at - sllv $at,$d3,$t1 - srlv $d3,$d3,$shr - or $d2,$d2,$at - sllv $t0,$t0,$t1 - or $d3,$d3,$t0 -# endif -.Laligned_inp: -#else - lwl $d0,0+MSB($inp) # load input - lwl $d1,4+MSB($inp) - lwl $d2,8+MSB($inp) - lwl $d3,12+MSB($inp) - lwr $d0,0+LSB($inp) - lwr $d1,4+LSB($inp) - lwr $d2,8+LSB($inp) - lwr $d3,12+LSB($inp) -#endif -#ifdef MIPSEB -# if defined(_MIPS_ARCH_MIPS32R2) - wsbh $d0,$d0 # byte swap - wsbh $d1,$d1 - wsbh $d2,$d2 - wsbh $d3,$d3 - rotr $d0,$d0,16 - rotr $d1,$d1,16 - rotr $d2,$d2,16 - rotr $d3,$d3,16 -# else - srl $at,$d0,24 # byte swap - srl $t0,$d0,8 - andi $t1,$d0,0xFF00 - sll $d0,$d0,24 - andi $t0,0xFF00 - sll $t1,$t1,8 - or $d0,$at - srl $at,$d1,24 - or $t0,$t1 - srl $t1,$d1,8 - or $d0,$t0 - andi $t0,$d1,0xFF00 - sll $d1,$d1,24 - andi $t1,0xFF00 - sll $t0,$t0,8 - or $d1,$at - srl $at,$d2,24 - or $t1,$t0 - srl $t0,$d2,8 - or $d1,$t1 - andi $t1,$d2,0xFF00 - sll $d2,$d2,24 - andi $t0,0xFF00 - sll $t1,$t1,8 - or $d2,$at - srl $at,$d3,24 - or $t0,$t1 - srl $t1,$d3,8 - or $d2,$t0 - andi $t0,$d3,0xFF00 - sll $d3,$d3,24 - andi $t1,0xFF00 - sll $t0,$t0,8 - or $d3,$at - or $t1,$t0 - or $d3,$t1 -# endif -#endif - srl $t0,$h4,2 # modulo-scheduled reduction - andi $h4,$h4,3 - sll $at,$t0,2 - - addu $d0,$d0,$h0 # accumulate input - addu $t0,$t0,$at - sltu $h0,$d0,$h0 - addu $d0,$d0,$t0 # ... and residue - sltu $at,$d0,$t0 - - addu $d1,$d1,$h1 - addu $h0,$h0,$at # carry - sltu $h1,$d1,$h1 - addu $d1,$d1,$h0 - sltu $h0,$d1,$h0 - - addu $d2,$d2,$h2 - addu $h1,$h1,$h0 # carry - sltu $h2,$d2,$h2 - addu $d2,$d2,$h1 - sltu $h1,$d2,$h1 - - addu $d3,$d3,$h3 - addu $h2,$h2,$h1 # carry - sltu $h3,$d3,$h3 - addu $d3,$d3,$h2 - -#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) - multu $r0,$d0 # d0*r0 - sltu $h2,$d3,$h2 - maddu $rs3,$d1 # d1*s3 - addu $h3,$h3,$h2 # carry - maddu $rs2,$d2 # d2*s2 - addu $h4,$h4,$padbit - maddu $rs1,$d3 # d3*s1 - addu $h4,$h4,$h3 - mfhi $at - mflo $h0 - - multu $r1,$d0 # d0*r1 - maddu $r0,$d1 # d1*r0 - maddu $rs3,$d2 # d2*s3 - maddu $rs2,$d3 # d3*s2 - maddu $rs1,$h4 # h4*s1 - maddu $at,$one # hi*1 - mfhi $at - mflo $h1 - - multu $r2,$d0 # d0*r2 - maddu $r1,$d1 # d1*r1 - maddu $r0,$d2 # d2*r0 - maddu $rs3,$d3 # d3*s3 - maddu $rs2,$h4 # h4*s2 - maddu $at,$one # hi*1 - mfhi $at - mflo $h2 - - mul $t0,$r0,$h4 # h4*r0 - - multu $r3,$d0 # d0*r3 - maddu $r2,$d1 # d1*r2 - maddu $r1,$d2 # d2*r1 - maddu $r0,$d3 # d3*r0 - maddu $rs3,$h4 # h4*s3 - maddu $at,$one # hi*1 - mfhi $at - mflo $h3 - - addiu $inp,$inp,16 - - addu $h4,$t0,$at -#else - multu ($r0,$d0) # d0*r0 - mflo ($h0,$r0,$d0) - mfhi ($h1,$r0,$d0) - - sltu $h2,$d3,$h2 - addu $h3,$h3,$h2 # carry - - multu ($rs3,$d1) # d1*s3 - mflo ($at,$rs3,$d1) - mfhi ($t0,$rs3,$d1) - - addu $h4,$h4,$padbit - addiu $inp,$inp,16 - addu $h4,$h4,$h3 - - multu ($rs2,$d2) # d2*s2 - mflo ($a3,$rs2,$d2) - mfhi ($t1,$rs2,$d2) - addu $h0,$h0,$at - addu $h1,$h1,$t0 - multu ($rs1,$d3) # d3*s1 - sltu $at,$h0,$at - addu $h1,$h1,$at - - mflo ($at,$rs1,$d3) - mfhi ($t0,$rs1,$d3) - addu $h0,$h0,$a3 - addu $h1,$h1,$t1 - multu ($r1,$d0) # d0*r1 - sltu $a3,$h0,$a3 - addu $h1,$h1,$a3 - - - mflo ($a3,$r1,$d0) - mfhi ($h2,$r1,$d0) - addu $h0,$h0,$at - addu $h1,$h1,$t0 - multu ($r0,$d1) # d1*r0 - sltu $at,$h0,$at - addu $h1,$h1,$at - - mflo ($at,$r0,$d1) - mfhi ($t0,$r0,$d1) - addu $h1,$h1,$a3 - sltu $a3,$h1,$a3 - multu ($rs3,$d2) # d2*s3 - addu $h2,$h2,$a3 - - mflo ($a3,$rs3,$d2) - mfhi ($t1,$rs3,$d2) - addu $h1,$h1,$at - addu $h2,$h2,$t0 - multu ($rs2,$d3) # d3*s2 - sltu $at,$h1,$at - addu $h2,$h2,$at - - mflo ($at,$rs2,$d3) - mfhi ($t0,$rs2,$d3) - addu $h1,$h1,$a3 - addu $h2,$h2,$t1 - multu ($rs1,$h4) # h4*s1 - sltu $a3,$h1,$a3 - addu $h2,$h2,$a3 - - mflo ($a3,$rs1,$h4) - addu $h1,$h1,$at - addu $h2,$h2,$t0 - multu ($r2,$d0) # d0*r2 - sltu $at,$h1,$at - addu $h2,$h2,$at - - - mflo ($at,$r2,$d0) - mfhi ($h3,$r2,$d0) - addu $h1,$h1,$a3 - sltu $a3,$h1,$a3 - multu ($r1,$d1) # d1*r1 - addu $h2,$h2,$a3 - - mflo ($a3,$r1,$d1) - mfhi ($t1,$r1,$d1) - addu $h2,$h2,$at - sltu $at,$h2,$at - multu ($r0,$d2) # d2*r0 - addu $h3,$h3,$at - - mflo ($at,$r0,$d2) - mfhi ($t0,$r0,$d2) - addu $h2,$h2,$a3 - addu $h3,$h3,$t1 - multu ($rs3,$d3) # d3*s3 - sltu $a3,$h2,$a3 - addu $h3,$h3,$a3 - - mflo ($a3,$rs3,$d3) - mfhi ($t1,$rs3,$d3) - addu $h2,$h2,$at - addu $h3,$h3,$t0 - multu ($rs2,$h4) # h4*s2 - sltu $at,$h2,$at - addu $h3,$h3,$at - - mflo ($at,$rs2,$h4) - addu $h2,$h2,$a3 - addu $h3,$h3,$t1 - multu ($r3,$d0) # d0*r3 - sltu $a3,$h2,$a3 - addu $h3,$h3,$a3 - - - mflo ($a3,$r3,$d0) - mfhi ($t1,$r3,$d0) - addu $h2,$h2,$at - sltu $at,$h2,$at - multu ($r2,$d1) # d1*r2 - addu $h3,$h3,$at - - mflo ($at,$r2,$d1) - mfhi ($t0,$r2,$d1) - addu $h3,$h3,$a3 - sltu $a3,$h3,$a3 - multu ($r0,$d3) # d3*r0 - addu $t1,$t1,$a3 - - mflo ($a3,$r0,$d3) - mfhi ($d3,$r0,$d3) - addu $h3,$h3,$at - addu $t1,$t1,$t0 - multu ($r1,$d2) # d2*r1 - sltu $at,$h3,$at - addu $t1,$t1,$at - - mflo ($at,$r1,$d2) - mfhi ($t0,$r1,$d2) - addu $h3,$h3,$a3 - addu $t1,$t1,$d3 - multu ($rs3,$h4) # h4*s3 - sltu $a3,$h3,$a3 - addu $t1,$t1,$a3 - - mflo ($a3,$rs3,$h4) - addu $h3,$h3,$at - addu $t1,$t1,$t0 - multu ($r0,$h4) # h4*r0 - sltu $at,$h3,$at - addu $t1,$t1,$at - - - mflo ($h4,$r0,$h4) - addu $h3,$h3,$a3 - sltu $a3,$h3,$a3 - addu $t1,$t1,$a3 - addu $h4,$h4,$t1 - - li $padbit,1 # if we loop, padbit is 1 -#endif - bne $inp,$len,.Loop - - sw $h0,0($ctx) # store hash value - sw $h1,4($ctx) - sw $h2,8($ctx) - sw $h3,12($ctx) - sw $h4,16($ctx) - - .set noreorder -.Labort: - lw $s11,4*11($sp) - lw $s10,4*10($sp) - lw $s9, 4*9($sp) - lw $s8, 4*8($sp) - lw $s7, 4*7($sp) - lw $s6, 4*6($sp) - lw $s5, 4*5($sp) - lw $s4, 4*4($sp) -___ -$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue - lw $s3, 4*3($sp) - lw $s2, 4*2($sp) - lw $s1, 4*1($sp) - lw $s0, 4*0($sp) -___ -$code.=<<___; - jr $ra - addu $sp,$sp,4*12 -.end poly1305_blocks -___ -} -{ -my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); - -$code.=<<___; -.align 5 -.globl poly1305_emit -.ent poly1305_emit -poly1305_emit: - .frame $sp,0,$ra - .set reorder - - lw $tmp4,16($ctx) - lw $tmp0,0($ctx) - lw $tmp1,4($ctx) - lw $tmp2,8($ctx) - lw $tmp3,12($ctx) - - li $in0,-4 # final reduction - srl $ctx,$tmp4,2 - and $in0,$in0,$tmp4 - andi $tmp4,$tmp4,3 - addu $ctx,$ctx,$in0 - - addu $tmp0,$tmp0,$ctx - sltu $ctx,$tmp0,$ctx - addiu $in0,$tmp0,5 # compare to modulus - addu $tmp1,$tmp1,$ctx - sltiu $in1,$in0,5 - sltu $ctx,$tmp1,$ctx - addu $in1,$in1,$tmp1 - addu $tmp2,$tmp2,$ctx - sltu $in2,$in1,$tmp1 - sltu $ctx,$tmp2,$ctx - addu $in2,$in2,$tmp2 - addu $tmp3,$tmp3,$ctx - sltu $in3,$in2,$tmp2 - sltu $ctx,$tmp3,$ctx - addu $in3,$in3,$tmp3 - addu $tmp4,$tmp4,$ctx - sltu $ctx,$in3,$tmp3 - addu $ctx,$tmp4 - - srl $ctx,2 # see if it carried/borrowed - subu $ctx,$zero,$ctx - - xor $in0,$tmp0 - xor $in1,$tmp1 - xor $in2,$tmp2 - xor $in3,$tmp3 - and $in0,$ctx - and $in1,$ctx - and $in2,$ctx - and $in3,$ctx - xor $in0,$tmp0 - xor $in1,$tmp1 - xor $in2,$tmp2 - xor $in3,$tmp3 - - lw $tmp0,0($nonce) # load nonce - lw $tmp1,4($nonce) - lw $tmp2,8($nonce) - lw $tmp3,12($nonce) - - addu $in0,$tmp0 # accumulate nonce - sltu $ctx,$in0,$tmp0 - - addu $in1,$tmp1 - sltu $tmp1,$in1,$tmp1 - addu $in1,$ctx - sltu $ctx,$in1,$ctx - addu $ctx,$tmp1 - - addu $in2,$tmp2 - sltu $tmp2,$in2,$tmp2 - addu $in2,$ctx - sltu $ctx,$in2,$ctx - addu $ctx,$tmp2 - - addu $in3,$tmp3 - addu $in3,$ctx - - srl $tmp0,$in0,8 # write mac value - srl $tmp1,$in0,16 - srl $tmp2,$in0,24 - sb $in0, 0($mac) - sb $tmp0,1($mac) - srl $tmp0,$in1,8 - sb $tmp1,2($mac) - srl $tmp1,$in1,16 - sb $tmp2,3($mac) - srl $tmp2,$in1,24 - sb $in1, 4($mac) - sb $tmp0,5($mac) - srl $tmp0,$in2,8 - sb $tmp1,6($mac) - srl $tmp1,$in2,16 - sb $tmp2,7($mac) - srl $tmp2,$in2,24 - sb $in2, 8($mac) - sb $tmp0,9($mac) - srl $tmp0,$in3,8 - sb $tmp1,10($mac) - srl $tmp1,$in3,16 - sb $tmp2,11($mac) - srl $tmp2,$in3,24 - sb $in3, 12($mac) - sb $tmp0,13($mac) - sb $tmp1,14($mac) - sb $tmp2,15($mac) - - jr $ra -.end poly1305_emit -.rdata -.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" -.align 2 -___ -} -}}} - -$output=pop and open STDOUT,">$output"; -print $code; -close STDOUT; |