summaryrefslogtreecommitdiff
path: root/arch/mips/crypto/chacha-core.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/crypto/chacha-core.S')
-rw-r--r--arch/mips/crypto/chacha-core.S497
1 files changed, 0 insertions, 497 deletions
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
deleted file mode 100644
index 5755f69cfe00..000000000000
--- a/arch/mips/crypto/chacha-core.S
+++ /dev/null
@@ -1,497 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#define MASK_U32 0x3c
-#define CHACHA20_BLOCK_SIZE 64
-#define STACK_SIZE 32
-
-#define X0 $t0
-#define X1 $t1
-#define X2 $t2
-#define X3 $t3
-#define X4 $t4
-#define X5 $t5
-#define X6 $t6
-#define X7 $t7
-#define X8 $t8
-#define X9 $t9
-#define X10 $v1
-#define X11 $s6
-#define X12 $s5
-#define X13 $s4
-#define X14 $s3
-#define X15 $s2
-/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
-#define T0 $s1
-#define T1 $s0
-#define T(n) T ## n
-#define X(n) X ## n
-
-/* Input arguments */
-#define STATE $a0
-#define OUT $a1
-#define IN $a2
-#define BYTES $a3
-
-/* Output argument */
-/* NONCE[0] is kept in a register and not in memory.
- * We don't want to touch original value in memory.
- * Must be incremented every loop iteration.
- */
-#define NONCE_0 $v0
-
-/* SAVED_X and SAVED_CA are set in the jump table.
- * Use regs which are overwritten on exit else we don't leak clear data.
- * They are used to handling the last bytes which are not multiple of 4.
- */
-#define SAVED_X X15
-#define SAVED_CA $s7
-
-#define IS_UNALIGNED $s7
-
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#define MSB 0
-#define LSB 3
-#define ROTx rotl
-#define ROTR(n) rotr n, 24
-#define CPU_TO_LE32(n) \
- wsbh n; \
- rotr n, 16;
-#else
-#define MSB 3
-#define LSB 0
-#define ROTx rotr
-#define CPU_TO_LE32(n)
-#define ROTR(n)
-#endif
-
-#define FOR_EACH_WORD(x) \
- x( 0); \
- x( 1); \
- x( 2); \
- x( 3); \
- x( 4); \
- x( 5); \
- x( 6); \
- x( 7); \
- x( 8); \
- x( 9); \
- x(10); \
- x(11); \
- x(12); \
- x(13); \
- x(14); \
- x(15);
-
-#define FOR_EACH_WORD_REV(x) \
- x(15); \
- x(14); \
- x(13); \
- x(12); \
- x(11); \
- x(10); \
- x( 9); \
- x( 8); \
- x( 7); \
- x( 6); \
- x( 5); \
- x( 4); \
- x( 3); \
- x( 2); \
- x( 1); \
- x( 0);
-
-#define PLUS_ONE_0 1
-#define PLUS_ONE_1 2
-#define PLUS_ONE_2 3
-#define PLUS_ONE_3 4
-#define PLUS_ONE_4 5
-#define PLUS_ONE_5 6
-#define PLUS_ONE_6 7
-#define PLUS_ONE_7 8
-#define PLUS_ONE_8 9
-#define PLUS_ONE_9 10
-#define PLUS_ONE_10 11
-#define PLUS_ONE_11 12
-#define PLUS_ONE_12 13
-#define PLUS_ONE_13 14
-#define PLUS_ONE_14 15
-#define PLUS_ONE_15 16
-#define PLUS_ONE(x) PLUS_ONE_ ## x
-#define _CONCAT3(a,b,c) a ## b ## c
-#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
-
-#define STORE_UNALIGNED(x) \
-CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
- .if (x != 12); \
- lw T0, (x*4)(STATE); \
- .endif; \
- lwl T1, (x*4)+MSB ## (IN); \
- lwr T1, (x*4)+LSB ## (IN); \
- .if (x == 12); \
- addu X ## x, NONCE_0; \
- .else; \
- addu X ## x, T0; \
- .endif; \
- CPU_TO_LE32(X ## x); \
- xor X ## x, T1; \
- swl X ## x, (x*4)+MSB ## (OUT); \
- swr X ## x, (x*4)+LSB ## (OUT);
-
-#define STORE_ALIGNED(x) \
-CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
- .if (x != 12); \
- lw T0, (x*4)(STATE); \
- .endif; \
- lw T1, (x*4) ## (IN); \
- .if (x == 12); \
- addu X ## x, NONCE_0; \
- .else; \
- addu X ## x, T0; \
- .endif; \
- CPU_TO_LE32(X ## x); \
- xor X ## x, T1; \
- sw X ## x, (x*4) ## (OUT);
-
-/* Jump table macro.
- * Used for setup and handling the last bytes, which are not multiple of 4.
- * X15 is free to store Xn
- * Every jumptable entry must be equal in size.
- */
-#define JMPTBL_ALIGNED(x) \
-.Lchacha_mips_jmptbl_aligned_ ## x: ; \
- .set noreorder; \
- b .Lchacha_mips_xor_aligned_ ## x ## _b; \
- .if (x == 12); \
- addu SAVED_X, X ## x, NONCE_0; \
- .else; \
- addu SAVED_X, X ## x, SAVED_CA; \
- .endif; \
- .set reorder
-
-#define JMPTBL_UNALIGNED(x) \
-.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
- .set noreorder; \
- b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
- .if (x == 12); \
- addu SAVED_X, X ## x, NONCE_0; \
- .else; \
- addu SAVED_X, X ## x, SAVED_CA; \
- .endif; \
- .set reorder
-
-#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
- addu X(A), X(K); \
- addu X(B), X(L); \
- addu X(C), X(M); \
- addu X(D), X(N); \
- xor X(V), X(A); \
- xor X(W), X(B); \
- xor X(Y), X(C); \
- xor X(Z), X(D); \
- rotl X(V), S; \
- rotl X(W), S; \
- rotl X(Y), S; \
- rotl X(Z), S;
-
-.text
-.set reorder
-.set noat
-.globl chacha_crypt_arch
-.ent chacha_crypt_arch
-chacha_crypt_arch:
- .frame $sp, STACK_SIZE, $ra
-
- /* Load number of rounds */
- lw $at, 16($sp)
-
- addiu $sp, -STACK_SIZE
-
- /* Return bytes = 0. */
- beqz BYTES, .Lchacha_mips_end
-
- lw NONCE_0, 48(STATE)
-
- /* Save s0-s7 */
- sw $s0, 0($sp)
- sw $s1, 4($sp)
- sw $s2, 8($sp)
- sw $s3, 12($sp)
- sw $s4, 16($sp)
- sw $s5, 20($sp)
- sw $s6, 24($sp)
- sw $s7, 28($sp)
-
- /* Test IN or OUT is unaligned.
- * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
- */
- or IS_UNALIGNED, IN, OUT
- andi IS_UNALIGNED, 0x3
-
- b .Lchacha_rounds_start
-
-.align 4
-.Loop_chacha_rounds:
- addiu IN, CHACHA20_BLOCK_SIZE
- addiu OUT, CHACHA20_BLOCK_SIZE
- addiu NONCE_0, 1
-
-.Lchacha_rounds_start:
- lw X0, 0(STATE)
- lw X1, 4(STATE)
- lw X2, 8(STATE)
- lw X3, 12(STATE)
-
- lw X4, 16(STATE)
- lw X5, 20(STATE)
- lw X6, 24(STATE)
- lw X7, 28(STATE)
- lw X8, 32(STATE)
- lw X9, 36(STATE)
- lw X10, 40(STATE)
- lw X11, 44(STATE)
-
- move X12, NONCE_0
- lw X13, 52(STATE)
- lw X14, 56(STATE)
- lw X15, 60(STATE)
-
-.Loop_chacha_xor_rounds:
- addiu $at, -2
- AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
- AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
- AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
- AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
- AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
- AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
- AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
- AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
- bnez $at, .Loop_chacha_xor_rounds
-
- addiu BYTES, -(CHACHA20_BLOCK_SIZE)
-
- /* Is data src/dst unaligned? Jump */
- bnez IS_UNALIGNED, .Loop_chacha_unaligned
-
- /* Set number rounds here to fill delayslot. */
- lw $at, (STACK_SIZE+16)($sp)
-
- /* BYTES < 0, it has no full block. */
- bltz BYTES, .Lchacha_mips_no_full_block_aligned
-
- FOR_EACH_WORD_REV(STORE_ALIGNED)
-
- /* BYTES > 0? Loop again. */
- bgtz BYTES, .Loop_chacha_rounds
-
- /* Place this here to fill delay slot */
- addiu NONCE_0, 1
-
- /* BYTES < 0? Handle last bytes */
- bltz BYTES, .Lchacha_mips_xor_bytes
-
-.Lchacha_mips_xor_done:
- /* Restore used registers */
- lw $s0, 0($sp)
- lw $s1, 4($sp)
- lw $s2, 8($sp)
- lw $s3, 12($sp)
- lw $s4, 16($sp)
- lw $s5, 20($sp)
- lw $s6, 24($sp)
- lw $s7, 28($sp)
-
- /* Write NONCE_0 back to right location in state */
- sw NONCE_0, 48(STATE)
-
-.Lchacha_mips_end:
- addiu $sp, STACK_SIZE
- jr $ra
-
-.Lchacha_mips_no_full_block_aligned:
- /* Restore the offset on BYTES */
- addiu BYTES, CHACHA20_BLOCK_SIZE
-
- /* Get number of full WORDS */
- andi $at, BYTES, MASK_U32
-
- /* Load upper half of jump table addr */
- lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
-
- /* Calculate lower half jump table offset */
- ins T0, $at, 1, 6
-
- /* Add offset to STATE */
- addu T1, STATE, $at
-
- /* Add lower half jump table addr */
- addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
-
- /* Read value from STATE */
- lw SAVED_CA, 0(T1)
-
- /* Store remaining bytecounter as negative value */
- subu BYTES, $at, BYTES
-
- jr T0
-
- /* Jump table */
- FOR_EACH_WORD(JMPTBL_ALIGNED)
-
-
-.Loop_chacha_unaligned:
- /* Set number rounds here to fill delayslot. */
- lw $at, (STACK_SIZE+16)($sp)
-
- /* BYTES > 0, it has no full block. */
- bltz BYTES, .Lchacha_mips_no_full_block_unaligned
-
- FOR_EACH_WORD_REV(STORE_UNALIGNED)
-
- /* BYTES > 0? Loop again. */
- bgtz BYTES, .Loop_chacha_rounds
-
- /* Write NONCE_0 back to right location in state */
- sw NONCE_0, 48(STATE)
-
- .set noreorder
- /* Fall through to byte handling */
- bgez BYTES, .Lchacha_mips_xor_done
-.Lchacha_mips_xor_unaligned_0_b:
-.Lchacha_mips_xor_aligned_0_b:
- /* Place this here to fill delay slot */
- addiu NONCE_0, 1
- .set reorder
-
-.Lchacha_mips_xor_bytes:
- addu IN, $at
- addu OUT, $at
- /* First byte */
- lbu T1, 0(IN)
- addiu $at, BYTES, 1
- CPU_TO_LE32(SAVED_X)
- ROTR(SAVED_X)
- xor T1, SAVED_X
- sb T1, 0(OUT)
- beqz $at, .Lchacha_mips_xor_done
- /* Second byte */
- lbu T1, 1(IN)
- addiu $at, BYTES, 2
- ROTx SAVED_X, 8
- xor T1, SAVED_X
- sb T1, 1(OUT)
- beqz $at, .Lchacha_mips_xor_done
- /* Third byte */
- lbu T1, 2(IN)
- ROTx SAVED_X, 8
- xor T1, SAVED_X
- sb T1, 2(OUT)
- b .Lchacha_mips_xor_done
-
-.Lchacha_mips_no_full_block_unaligned:
- /* Restore the offset on BYTES */
- addiu BYTES, CHACHA20_BLOCK_SIZE
-
- /* Get number of full WORDS */
- andi $at, BYTES, MASK_U32
-
- /* Load upper half of jump table addr */
- lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
-
- /* Calculate lower half jump table offset */
- ins T0, $at, 1, 6
-
- /* Add offset to STATE */
- addu T1, STATE, $at
-
- /* Add lower half jump table addr */
- addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
-
- /* Read value from STATE */
- lw SAVED_CA, 0(T1)
-
- /* Store remaining bytecounter as negative value */
- subu BYTES, $at, BYTES
-
- jr T0
-
- /* Jump table */
- FOR_EACH_WORD(JMPTBL_UNALIGNED)
-.end chacha_crypt_arch
-.set at
-
-/* Input arguments
- * STATE $a0
- * OUT $a1
- * NROUND $a2
- */
-
-#undef X12
-#undef X13
-#undef X14
-#undef X15
-
-#define X12 $a3
-#define X13 $at
-#define X14 $v0
-#define X15 STATE
-
-.set noat
-.globl hchacha_block_arch
-.ent hchacha_block_arch
-hchacha_block_arch:
- .frame $sp, STACK_SIZE, $ra
-
- addiu $sp, -STACK_SIZE
-
- /* Save X11(s6) */
- sw X11, 0($sp)
-
- lw X0, 0(STATE)
- lw X1, 4(STATE)
- lw X2, 8(STATE)
- lw X3, 12(STATE)
- lw X4, 16(STATE)
- lw X5, 20(STATE)
- lw X6, 24(STATE)
- lw X7, 28(STATE)
- lw X8, 32(STATE)
- lw X9, 36(STATE)
- lw X10, 40(STATE)
- lw X11, 44(STATE)
- lw X12, 48(STATE)
- lw X13, 52(STATE)
- lw X14, 56(STATE)
- lw X15, 60(STATE)
-
-.Loop_hchacha_xor_rounds:
- addiu $a2, -2
- AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
- AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
- AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
- AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
- AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
- AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
- AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
- AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
- bnez $a2, .Loop_hchacha_xor_rounds
-
- /* Restore used register */
- lw X11, 0($sp)
-
- sw X0, 0(OUT)
- sw X1, 4(OUT)
- sw X2, 8(OUT)
- sw X3, 12(OUT)
- sw X12, 16(OUT)
- sw X13, 20(OUT)
- sw X14, 24(OUT)
- sw X15, 28(OUT)
-
- addiu $sp, STACK_SIZE
- jr $ra
-.end hchacha_block_arch
-.set at