| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
 | /* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * SHA-1 optimized for PowerPC
 *
 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
 */
#include <asm/switch_to.h>
#include <linux/preempt.h>
#ifdef CONFIG_SPE
/*
 * MAX_BYTES defines the number of bytes that are allowed to be processed
 * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
 * operations per 64 bytes. e500 cores can issue two arithmetic instructions
 * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
 * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
 * Headroom for cache misses included. Even with the low end model clocked
 * at 667 MHz this equals to a critical time window of less than 27us.
 *
 */
#define MAX_BYTES 2048
asmlinkage void ppc_spe_sha1_transform(struct sha1_block_state *state,
				       const u8 *data, u32 nblocks);
static void spe_begin(void)
{
	/* We just start SPE operations and will save SPE registers later. */
	preempt_disable();
	enable_kernel_spe();
}
static void spe_end(void)
{
	disable_kernel_spe();
	/* reenable preemption */
	preempt_enable();
}
static void sha1_blocks(struct sha1_block_state *state,
			const u8 *data, size_t nblocks)
{
	do {
		u32 unit = min_t(size_t, nblocks, MAX_BYTES / SHA1_BLOCK_SIZE);
		spe_begin();
		ppc_spe_sha1_transform(state, data, unit);
		spe_end();
		data += unit * SHA1_BLOCK_SIZE;
		nblocks -= unit;
	} while (nblocks);
}
#else /* CONFIG_SPE */
asmlinkage void powerpc_sha_transform(struct sha1_block_state *state,
				      const u8 data[SHA1_BLOCK_SIZE]);
static void sha1_blocks(struct sha1_block_state *state,
			const u8 *data, size_t nblocks)
{
	do {
		powerpc_sha_transform(state, data);
		data += SHA1_BLOCK_SIZE;
	} while (--nblocks);
}
#endif /* !CONFIG_SPE */
 |