summaryrefslogtreecommitdiff
path: root/arch/riscv/kernel/vdso/vgetrandom-chacha.S
blob: 5f0dad8f2373e04b17a9f8efb8028ce2fcc0de87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
 *
 * Based on arch/loongarch/vdso/vgetrandom-chacha.S.
 */

#include <asm/asm.h>
#include <linux/linkage.h>

.text

.macro	ROTRI	rd rs imm
	slliw	t0, \rs, 32 - \imm
	srliw	\rd, \rs, \imm
	or	\rd, \rd, t0
.endm

.macro	OP_4REG	op d0 d1 d2 d3 s0 s1 s2 s3
	\op	\d0, \d0, \s0
	\op	\d1, \d1, \s1
	\op	\d2, \d2, \s2
	\op	\d3, \d3, \s3
.endm

/*
 *	a0: output bytes
 * 	a1: 32-byte key input
 *	a2: 8-byte counter input/output
 *	a3: number of 64-byte blocks to write to output
 */
SYM_FUNC_START(__arch_chacha20_blocks_nostack)

#define output		a0
#define key		a1
#define counter		a2
#define nblocks		a3
#define i		a4
#define state0		s0
#define state1		s1
#define state2		s2
#define state3		s3
#define state4		s4
#define state5		s5
#define state6		s6
#define state7		s7
#define state8		s8
#define state9		s9
#define state10		s10
#define state11		s11
#define state12		a5
#define state13		a6
#define state14		a7
#define state15		t1
#define cnt		t2
#define copy0		t3
#define copy1		t4
#define copy2		t5
#define copy3		t6

/* Packs to be used with OP_4REG */
#define line0		state0, state1, state2, state3
#define line1		state4, state5, state6, state7
#define line2		state8, state9, state10, state11
#define line3		state12, state13, state14, state15

#define line1_perm	state5, state6, state7, state4
#define line2_perm	state10, state11, state8, state9
#define line3_perm	state15, state12, state13, state14

#define copy		copy0, copy1, copy2, copy3

#define _16		16, 16, 16, 16
#define _20		20, 20, 20, 20
#define _24		24, 24, 24, 24
#define _25		25, 25, 25, 25

	/*
	 * The ABI requires s0-s9 saved.
	 * This does not violate the stack-less requirement: no sensitive data
	 * is spilled onto the stack.
	 */
	addi		sp, sp, -12*SZREG
	REG_S		s0,         (sp)
	REG_S		s1,    SZREG(sp)
	REG_S		s2,  2*SZREG(sp)
	REG_S		s3,  3*SZREG(sp)
	REG_S		s4,  4*SZREG(sp)
	REG_S		s5,  5*SZREG(sp)
	REG_S		s6,  6*SZREG(sp)
	REG_S		s7,  7*SZREG(sp)
	REG_S		s8,  8*SZREG(sp)
	REG_S		s9,  9*SZREG(sp)
	REG_S		s10, 10*SZREG(sp)
	REG_S		s11, 11*SZREG(sp)

	ld		cnt, (counter)

	li		copy0, 0x61707865
	li		copy1, 0x3320646e
	li		copy2, 0x79622d32
	li		copy3, 0x6b206574

.Lblock:
	/* state[0,1,2,3] = "expand 32-byte k" */
	mv		state0, copy0
	mv		state1, copy1
	mv		state2, copy2
	mv		state3, copy3

	/* state[4,5,..,11] = key */
	lw		state4,   (key)
	lw		state5,  4(key)
	lw		state6,  8(key)
	lw		state7,  12(key)
	lw		state8,  16(key)
	lw		state9,  20(key)
	lw		state10, 24(key)
	lw		state11, 28(key)

	/* state[12,13] = counter */
	mv		state12, cnt
	srli		state13, cnt, 32

	/* state[14,15] = 0 */
	mv		state14, zero
	mv		state15, zero

	li		i, 10
.Lpermute:
	/* odd round */
	OP_4REG	addw	line0, line1
	OP_4REG	xor	line3, line0
	OP_4REG	ROTRI	line3, _16

	OP_4REG	addw	line2, line3
	OP_4REG	xor	line1, line2
	OP_4REG	ROTRI	line1, _20

	OP_4REG	addw	line0, line1
	OP_4REG	xor	line3, line0
	OP_4REG	ROTRI	line3, _24

	OP_4REG	addw	line2, line3
	OP_4REG	xor	line1, line2
	OP_4REG	ROTRI	line1, _25

	/* even round */
	OP_4REG	addw	line0, line1_perm
	OP_4REG	xor	line3_perm, line0
	OP_4REG	ROTRI	line3_perm, _16

	OP_4REG	addw	line2_perm, line3_perm
	OP_4REG	xor	line1_perm, line2_perm
	OP_4REG	ROTRI	line1_perm, _20

	OP_4REG	addw	line0, line1_perm
	OP_4REG	xor	line3_perm, line0
	OP_4REG	ROTRI	line3_perm, _24

	OP_4REG	addw	line2_perm, line3_perm
	OP_4REG	xor	line1_perm, line2_perm
	OP_4REG	ROTRI	line1_perm, _25

	addi		i, i, -1
	bnez		i, .Lpermute

	/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
	OP_4REG	addw	line0, copy
	sw		state0,   (output)
	sw		state1,  4(output)
	sw		state2,  8(output)
	sw		state3, 12(output)

	/* from now on state[0,1,2,3] are scratch registers  */

	/* state[0,1,2,3] = lo(key) */
	lw		state0,   (key)
	lw		state1,  4(key)
	lw		state2,  8(key)
	lw		state3, 12(key)

	/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
	OP_4REG	addw	line1, line0
	sw		state4, 16(output)
	sw		state5, 20(output)
	sw		state6, 24(output)
	sw		state7, 28(output)

	/* state[0,1,2,3] = hi(key) */
	lw		state0, 16(key)
	lw		state1, 20(key)
	lw		state2, 24(key)
	lw		state3, 28(key)

	/* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
	OP_4REG	addw	line2, line0
	sw		state8,  32(output)
	sw		state9,  36(output)
	sw		state10, 40(output)
	sw		state11, 44(output)

	/* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
	addw		state12, state12, cnt
	srli		state0, cnt, 32
	addw		state13, state13, state0
	sw		state12, 48(output)
	sw		state13, 52(output)
	sw		state14, 56(output)
	sw		state15, 60(output)

	/* ++counter */
	addi		cnt, cnt, 1

	/* output += 64 */
	addi		output, output, 64
	/* --nblocks */
	addi		nblocks, nblocks, -1
	bnez		nblocks, .Lblock

	/* counter = [cnt_lo, cnt_hi] */
	sd		cnt, (counter)

	/* Zero out the potentially sensitive regs, in case nothing uses these
	 * again.  As at now copy[0,1,2,3] just contains "expand 32-byte k" and
	 * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
	 * only need to zero state[12,...,15].
	 */
	mv		state12, zero
	mv		state13, zero
	mv		state14, zero
	mv		state15, zero

	REG_L		s0,         (sp)
	REG_L		s1,    SZREG(sp)
	REG_L		s2,  2*SZREG(sp)
	REG_L		s3,  3*SZREG(sp)
	REG_L		s4,  4*SZREG(sp)
	REG_L		s5,  5*SZREG(sp)
	REG_L		s6,  6*SZREG(sp)
	REG_L		s7,  7*SZREG(sp)
	REG_L		s8,  8*SZREG(sp)
	REG_L		s9,  9*SZREG(sp)
	REG_L		s10, 10*SZREG(sp)
	REG_L		s11, 11*SZREG(sp)
	addi		sp, sp, 12*SZREG

	ret
SYM_FUNC_END(__arch_chacha20_blocks_nostack)