diff options
Diffstat (limited to 'arch/mips/crypto/chacha-core.S')
-rw-r--r-- | arch/mips/crypto/chacha-core.S | 497 |
1 files changed, 0 insertions, 497 deletions
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S deleted file mode 100644 index 5755f69cfe00..000000000000 --- a/arch/mips/crypto/chacha-core.S +++ /dev/null @@ -1,497 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - */ - -#define MASK_U32 0x3c -#define CHACHA20_BLOCK_SIZE 64 -#define STACK_SIZE 32 - -#define X0 $t0 -#define X1 $t1 -#define X2 $t2 -#define X3 $t3 -#define X4 $t4 -#define X5 $t5 -#define X6 $t6 -#define X7 $t7 -#define X8 $t8 -#define X9 $t9 -#define X10 $v1 -#define X11 $s6 -#define X12 $s5 -#define X13 $s4 -#define X14 $s3 -#define X15 $s2 -/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ -#define T0 $s1 -#define T1 $s0 -#define T(n) T ## n -#define X(n) X ## n - -/* Input arguments */ -#define STATE $a0 -#define OUT $a1 -#define IN $a2 -#define BYTES $a3 - -/* Output argument */ -/* NONCE[0] is kept in a register and not in memory. - * We don't want to touch original value in memory. - * Must be incremented every loop iteration. - */ -#define NONCE_0 $v0 - -/* SAVED_X and SAVED_CA are set in the jump table. - * Use regs which are overwritten on exit else we don't leak clear data. - * They are used to handling the last bytes which are not multiple of 4. - */ -#define SAVED_X X15 -#define SAVED_CA $s7 - -#define IS_UNALIGNED $s7 - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define MSB 0 -#define LSB 3 -#define ROTx rotl -#define ROTR(n) rotr n, 24 -#define CPU_TO_LE32(n) \ - wsbh n; \ - rotr n, 16; -#else -#define MSB 3 -#define LSB 0 -#define ROTx rotr -#define CPU_TO_LE32(n) -#define ROTR(n) -#endif - -#define FOR_EACH_WORD(x) \ - x( 0); \ - x( 1); \ - x( 2); \ - x( 3); \ - x( 4); \ - x( 5); \ - x( 6); \ - x( 7); \ - x( 8); \ - x( 9); \ - x(10); \ - x(11); \ - x(12); \ - x(13); \ - x(14); \ - x(15); - -#define FOR_EACH_WORD_REV(x) \ - x(15); \ - x(14); \ - x(13); \ - x(12); \ - x(11); \ - x(10); \ - x( 9); \ - x( 8); \ - x( 7); \ - x( 6); \ - x( 5); \ - x( 4); \ - x( 3); \ - x( 2); \ - x( 1); \ - x( 0); - -#define PLUS_ONE_0 1 -#define PLUS_ONE_1 2 -#define PLUS_ONE_2 3 -#define PLUS_ONE_3 4 -#define PLUS_ONE_4 5 -#define PLUS_ONE_5 6 -#define PLUS_ONE_6 7 -#define PLUS_ONE_7 8 -#define PLUS_ONE_8 9 -#define PLUS_ONE_9 10 -#define PLUS_ONE_10 11 -#define PLUS_ONE_11 12 -#define PLUS_ONE_12 13 -#define PLUS_ONE_13 14 -#define PLUS_ONE_14 15 -#define PLUS_ONE_15 16 -#define PLUS_ONE(x) PLUS_ONE_ ## x -#define _CONCAT3(a,b,c) a ## b ## c -#define CONCAT3(a,b,c) _CONCAT3(a,b,c) - -#define STORE_UNALIGNED(x) \ -CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ - lwl T1, (x*4)+MSB ## (IN); \ - lwr T1, (x*4)+LSB ## (IN); \ - .if (x == 12); \ - addu X ## x, NONCE_0; \ - .else; \ - addu X ## x, T0; \ - .endif; \ - CPU_TO_LE32(X ## x); \ - xor X ## x, T1; \ - swl X ## x, (x*4)+MSB ## (OUT); \ - swr X ## x, (x*4)+LSB ## (OUT); - -#define STORE_ALIGNED(x) \ -CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ - lw T1, (x*4) ## (IN); \ - .if (x == 12); \ - addu X ## x, NONCE_0; \ - .else; \ - addu X ## x, T0; \ - .endif; \ - CPU_TO_LE32(X ## x); \ - xor X ## x, T1; \ - sw X ## x, (x*4) ## (OUT); - -/* Jump table macro. - * Used for setup and handling the last bytes, which are not multiple of 4. - * X15 is free to store Xn - * Every jumptable entry must be equal in size. - */ -#define JMPTBL_ALIGNED(x) \ -.Lchacha_mips_jmptbl_aligned_ ## x: ; \ - .set noreorder; \ - b .Lchacha_mips_xor_aligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ - addu SAVED_X, X ## x, SAVED_CA; \ - .endif; \ - .set reorder - -#define JMPTBL_UNALIGNED(x) \ -.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ - .set noreorder; \ - b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ - addu SAVED_X, X ## x, SAVED_CA; \ - .endif; \ - .set reorder - -#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ - addu X(A), X(K); \ - addu X(B), X(L); \ - addu X(C), X(M); \ - addu X(D), X(N); \ - xor X(V), X(A); \ - xor X(W), X(B); \ - xor X(Y), X(C); \ - xor X(Z), X(D); \ - rotl X(V), S; \ - rotl X(W), S; \ - rotl X(Y), S; \ - rotl X(Z), S; - -.text -.set reorder -.set noat -.globl chacha_crypt_arch -.ent chacha_crypt_arch -chacha_crypt_arch: - .frame $sp, STACK_SIZE, $ra - - /* Load number of rounds */ - lw $at, 16($sp) - - addiu $sp, -STACK_SIZE - - /* Return bytes = 0. */ - beqz BYTES, .Lchacha_mips_end - - lw NONCE_0, 48(STATE) - - /* Save s0-s7 */ - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - sw $s4, 16($sp) - sw $s5, 20($sp) - sw $s6, 24($sp) - sw $s7, 28($sp) - - /* Test IN or OUT is unaligned. - * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 - */ - or IS_UNALIGNED, IN, OUT - andi IS_UNALIGNED, 0x3 - - b .Lchacha_rounds_start - -.align 4 -.Loop_chacha_rounds: - addiu IN, CHACHA20_BLOCK_SIZE - addiu OUT, CHACHA20_BLOCK_SIZE - addiu NONCE_0, 1 - -.Lchacha_rounds_start: - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) - lw X3, 12(STATE) - - lw X4, 16(STATE) - lw X5, 20(STATE) - lw X6, 24(STATE) - lw X7, 28(STATE) - lw X8, 32(STATE) - lw X9, 36(STATE) - lw X10, 40(STATE) - lw X11, 44(STATE) - - move X12, NONCE_0 - lw X13, 52(STATE) - lw X14, 56(STATE) - lw X15, 60(STATE) - -.Loop_chacha_xor_rounds: - addiu $at, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); - bnez $at, .Loop_chacha_xor_rounds - - addiu BYTES, -(CHACHA20_BLOCK_SIZE) - - /* Is data src/dst unaligned? Jump */ - bnez IS_UNALIGNED, .Loop_chacha_unaligned - - /* Set number rounds here to fill delayslot. */ - lw $at, (STACK_SIZE+16)($sp) - - /* BYTES < 0, it has no full block. */ - bltz BYTES, .Lchacha_mips_no_full_block_aligned - - FOR_EACH_WORD_REV(STORE_ALIGNED) - - /* BYTES > 0? Loop again. */ - bgtz BYTES, .Loop_chacha_rounds - - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - - /* BYTES < 0? Handle last bytes */ - bltz BYTES, .Lchacha_mips_xor_bytes - -.Lchacha_mips_xor_done: - /* Restore used registers */ - lw $s0, 0($sp) - lw $s1, 4($sp) - lw $s2, 8($sp) - lw $s3, 12($sp) - lw $s4, 16($sp) - lw $s5, 20($sp) - lw $s6, 24($sp) - lw $s7, 28($sp) - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - -.Lchacha_mips_end: - addiu $sp, STACK_SIZE - jr $ra - -.Lchacha_mips_no_full_block_aligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - - /* Get number of full WORDS */ - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ - lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 - - /* Add offset to STATE */ - addu T1, STATE, $at - - /* Add lower half jump table addr */ - addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) - - /* Store remaining bytecounter as negative value */ - subu BYTES, $at, BYTES - - jr T0 - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_ALIGNED) - - -.Loop_chacha_unaligned: - /* Set number rounds here to fill delayslot. */ - lw $at, (STACK_SIZE+16)($sp) - - /* BYTES > 0, it has no full block. */ - bltz BYTES, .Lchacha_mips_no_full_block_unaligned - - FOR_EACH_WORD_REV(STORE_UNALIGNED) - - /* BYTES > 0? Loop again. */ - bgtz BYTES, .Loop_chacha_rounds - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - - .set noreorder - /* Fall through to byte handling */ - bgez BYTES, .Lchacha_mips_xor_done -.Lchacha_mips_xor_unaligned_0_b: -.Lchacha_mips_xor_aligned_0_b: - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - .set reorder - -.Lchacha_mips_xor_bytes: - addu IN, $at - addu OUT, $at - /* First byte */ - lbu T1, 0(IN) - addiu $at, BYTES, 1 - CPU_TO_LE32(SAVED_X) - ROTR(SAVED_X) - xor T1, SAVED_X - sb T1, 0(OUT) - beqz $at, .Lchacha_mips_xor_done - /* Second byte */ - lbu T1, 1(IN) - addiu $at, BYTES, 2 - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 1(OUT) - beqz $at, .Lchacha_mips_xor_done - /* Third byte */ - lbu T1, 2(IN) - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 2(OUT) - b .Lchacha_mips_xor_done - -.Lchacha_mips_no_full_block_unaligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - - /* Get number of full WORDS */ - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ - lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 - - /* Add offset to STATE */ - addu T1, STATE, $at - - /* Add lower half jump table addr */ - addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) - - /* Store remaining bytecounter as negative value */ - subu BYTES, $at, BYTES - - jr T0 - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_UNALIGNED) -.end chacha_crypt_arch -.set at - -/* Input arguments - * STATE $a0 - * OUT $a1 - * NROUND $a2 - */ - -#undef X12 -#undef X13 -#undef X14 -#undef X15 - -#define X12 $a3 -#define X13 $at -#define X14 $v0 -#define X15 STATE - -.set noat -.globl hchacha_block_arch -.ent hchacha_block_arch -hchacha_block_arch: - .frame $sp, STACK_SIZE, $ra - - addiu $sp, -STACK_SIZE - - /* Save X11(s6) */ - sw X11, 0($sp) - - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) - lw X3, 12(STATE) - lw X4, 16(STATE) - lw X5, 20(STATE) - lw X6, 24(STATE) - lw X7, 28(STATE) - lw X8, 32(STATE) - lw X9, 36(STATE) - lw X10, 40(STATE) - lw X11, 44(STATE) - lw X12, 48(STATE) - lw X13, 52(STATE) - lw X14, 56(STATE) - lw X15, 60(STATE) - -.Loop_hchacha_xor_rounds: - addiu $a2, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); - bnez $a2, .Loop_hchacha_xor_rounds - - /* Restore used register */ - lw X11, 0($sp) - - sw X0, 0(OUT) - sw X1, 4(OUT) - sw X2, 8(OUT) - sw X3, 12(OUT) - sw X12, 16(OUT) - sw X13, 20(OUT) - sw X14, 24(OUT) - sw X15, 28(OUT) - - addiu $sp, STACK_SIZE - jr $ra -.end hchacha_block_arch -.set at |