From 14c174633f349cb41ea90c2c0aaddac157012f74 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 10 Feb 2022 16:40:44 +0100 Subject: random: remove unused tracepoints These explicit tracepoints aren't really used and show sign of aging. It's work to keep these up to date, and before I attempted to keep them up to date, they weren't up to date, which indicates that they're not really used. These days there are better ways of introspecting anyway. Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Reviewed-by: Eric Biggers Signed-off-by: Jason A. Donenfeld --- lib/random32.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/random32.c') diff --git a/lib/random32.c b/lib/random32.c index a57a0e18819d..3c19820796d0 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -41,7 +41,6 @@ #include #include #include -#include /** * prandom_u32_state - seeded pseudo-random number generator. @@ -387,7 +386,6 @@ u32 prandom_u32(void) struct siprand_state *state = get_cpu_ptr(&net_rand_state); u32 res = siprand_u32(state); - trace_prandom_u32(res); put_cpu_ptr(&net_rand_state); return res; } -- cgit From 5acd35487dc911541672b3ffc322851769c32a56 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 1 Mar 2022 20:03:49 +0100 Subject: random: replace custom notifier chain with standard one We previously rolled our own randomness readiness notifier, which only has two users in the whole kernel. Replace this with a more standard atomic notifier block that serves the same purpose with less code. Also unexport the symbols, because no modules use it, only unconditional builtins. The only drawback is that it's possible for a notification handler returning the "stop" code to prevent further processing, but given that there are only two users, and that we're unexporting this anyway, that doesn't seem like a significant drawback for the simplification we receive here. Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Reviewed-by: Dominik Brodowski Signed-off-by: Jason A. Donenfeld --- lib/random32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'lib/random32.c') diff --git a/lib/random32.c b/lib/random32.c index 3c19820796d0..976632003ec6 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -551,9 +551,11 @@ static void prandom_reseed(struct timer_list *unused) * To avoid worrying about whether it's safe to delay that interrupt * long enough to seed all CPUs, just schedule an immediate timer event. */ -static void prandom_timer_start(struct random_ready_callback *unused) +static int prandom_timer_start(struct notifier_block *nb, + unsigned long action, void *data) { mod_timer(&seed_timer, jiffies); + return 0; } #ifdef CONFIG_RANDOM32_SELFTEST @@ -617,13 +619,13 @@ core_initcall(prandom32_state_selftest); */ static int __init prandom_init_late(void) { - static struct random_ready_callback random_ready = { - .func = prandom_timer_start + static struct notifier_block random_ready = { + .notifier_call = prandom_timer_start }; - int ret = add_random_ready_callback(&random_ready); + int ret = register_random_ready_notifier(&random_ready); if (ret == -EALREADY) { - prandom_timer_start(&random_ready); + prandom_timer_start(&random_ready, 0, NULL); ret = 0; } return ret; -- cgit From d4150779e60fb6c49be25572596b2cdfc5d46a09 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 11 May 2022 16:11:29 +0200 Subject: random32: use real rng for non-deterministic randomness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit random32.c has two random number generators in it: one that is meant to be used deterministically, with some predefined seed, and one that does the same exact thing as random.c, except does it poorly. The first one has some use cases. The second one no longer does and can be replaced with calls to random.c's proper random number generator. The relatively recent siphash-based bad random32.c code was added in response to concerns that the prior random32.c was too deterministic. Out of fears that random.c was (at the time) too slow, this code was anonymously contributed. Then out of that emerged a kind of shadow entropy gathering system, with its own tentacles throughout various net code, added willy nilly. Stop👏making👏bespoke👏random👏number👏generators👏. Fortunately, recent advances in random.c mean that we can stop playing with this sketchiness, and just use get_random_u32(), which is now fast enough. In micro benchmarks using RDPMC, I'm seeing the same median cycle count between the two functions, with the mean being _slightly_ higher due to batches refilling (which we can optimize further need be). However, when doing *real* benchmarks of the net functions that actually use these random numbers, the mean cycles actually *decreased* slightly (with the median still staying the same), likely because the additional prandom code means icache misses and complexity, whereas random.c is generally already being used by something else nearby. The biggest benefit of this is that there are many users of prandom who probably should be using cryptographically secure random numbers. This makes all of those accidental cases become secure by just flipping a switch. Later on, we can do a tree-wide cleanup to remove the static inline wrapper functions that this commit adds. There are also some low-ish hanging fruits for making this even faster in the future: a get_random_u16() function for use in the networking stack will give a 2x performance boost there, using SIMD for ChaCha20 will let us compute 4 or 8 or 16 blocks of output in parallel, instead of just one, giving us large buffers for cheap, and introducing a get_random_*_bh() function that assumes irqs are already disabled will shave off a few cycles for ordinary calls. These are things we can chip away at down the road. Acked-by: Jakub Kicinski Acked-by: Theodore Ts'o Signed-off-by: Jason A. Donenfeld --- lib/random32.c | 347 ++------------------------------------------------------- 1 file changed, 7 insertions(+), 340 deletions(-) (limited to 'lib/random32.c') diff --git a/lib/random32.c b/lib/random32.c index 976632003ec6..d5d9029362cb 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -245,25 +245,13 @@ static struct prandom_test2 { { 407983964U, 921U, 728767059U }, }; -static u32 __extract_hwseed(void) -{ - unsigned int val = 0; - - (void)(arch_get_random_seed_int(&val) || - arch_get_random_int(&val)); - - return val; -} - -static void prandom_seed_early(struct rnd_state *state, u32 seed, - bool mix_with_hwseed) +static void prandom_state_selftest_seed(struct rnd_state *state, u32 seed) { #define LCG(x) ((x) * 69069U) /* super-duper LCG */ -#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) - state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); - state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); - state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); - state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); + state->s1 = __seed(LCG(seed), 2U); + state->s2 = __seed(LCG(state->s1), 8U); + state->s3 = __seed(LCG(state->s2), 16U); + state->s4 = __seed(LCG(state->s3), 128U); } static int __init prandom_state_selftest(void) @@ -274,7 +262,7 @@ static int __init prandom_state_selftest(void) for (i = 0; i < ARRAY_SIZE(test1); i++) { struct rnd_state state; - prandom_seed_early(&state, test1[i].seed, false); + prandom_state_selftest_seed(&state, test1[i].seed); prandom_warmup(&state); if (test1[i].result != prandom_u32_state(&state)) @@ -289,7 +277,7 @@ static int __init prandom_state_selftest(void) for (i = 0; i < ARRAY_SIZE(test2); i++) { struct rnd_state state; - prandom_seed_early(&state, test2[i].seed, false); + prandom_state_selftest_seed(&state, test2[i].seed); prandom_warmup(&state); for (j = 0; j < test2[i].iteration - 1; j++) @@ -310,324 +298,3 @@ static int __init prandom_state_selftest(void) } core_initcall(prandom_state_selftest); #endif - -/* - * The prandom_u32() implementation is now completely separate from the - * prandom_state() functions, which are retained (for now) for compatibility. - * - * Because of (ab)use in the networking code for choosing random TCP/UDP port - * numbers, which open DoS possibilities if guessable, we want something - * stronger than a standard PRNG. But the performance requirements of - * the network code do not allow robust crypto for this application. - * - * So this is a homebrew Junior Spaceman implementation, based on the - * lowest-latency trustworthy crypto primitive available, SipHash. - * (The authors of SipHash have not been consulted about this abuse of - * their work.) - * - * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to - * one word of output. This abbreviated version uses 2 rounds per word - * of output. - */ - -struct siprand_state { - unsigned long v0; - unsigned long v1; - unsigned long v2; - unsigned long v3; -}; - -static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; -DEFINE_PER_CPU(unsigned long, net_rand_noise); -EXPORT_PER_CPU_SYMBOL(net_rand_noise); - -/* - * This is the core CPRNG function. As "pseudorandom", this is not used - * for truly valuable things, just intended to be a PITA to guess. - * For maximum speed, we do just two SipHash rounds per word. This is - * the same rate as 4 rounds per 64 bits that SipHash normally uses, - * so hopefully it's reasonably secure. - * - * There are two changes from the official SipHash finalization: - * - We omit some constants XORed with v2 in the SipHash spec as irrelevant; - * they are there only to make the output rounds distinct from the input - * rounds, and this application has no input rounds. - * - Rather than returning v0^v1^v2^v3, return v1+v3. - * If you look at the SipHash round, the last operation on v3 is - * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time. - * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but - * it still cancels out half of the bits in v2 for no benefit.) - * Second, since the last combining operation was xor, continue the - * pattern of alternating xor/add for a tiny bit of extra non-linearity. - */ -static inline u32 siprand_u32(struct siprand_state *s) -{ - unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; - unsigned long n = raw_cpu_read(net_rand_noise); - - v3 ^= n; - PRND_SIPROUND(v0, v1, v2, v3); - PRND_SIPROUND(v0, v1, v2, v3); - v0 ^= n; - s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; - return v1 + v3; -} - - -/** - * prandom_u32 - pseudo random number generator - * - * A 32 bit pseudo-random number is generated using a fast - * algorithm suitable for simulation. This algorithm is NOT - * considered safe for cryptographic use. - */ -u32 prandom_u32(void) -{ - struct siprand_state *state = get_cpu_ptr(&net_rand_state); - u32 res = siprand_u32(state); - - put_cpu_ptr(&net_rand_state); - return res; -} -EXPORT_SYMBOL(prandom_u32); - -/** - * prandom_bytes - get the requested number of pseudo-random bytes - * @buf: where to copy the pseudo-random bytes to - * @bytes: the requested number of bytes - */ -void prandom_bytes(void *buf, size_t bytes) -{ - struct siprand_state *state = get_cpu_ptr(&net_rand_state); - u8 *ptr = buf; - - while (bytes >= sizeof(u32)) { - put_unaligned(siprand_u32(state), (u32 *)ptr); - ptr += sizeof(u32); - bytes -= sizeof(u32); - } - - if (bytes > 0) { - u32 rem = siprand_u32(state); - - do { - *ptr++ = (u8)rem; - rem >>= BITS_PER_BYTE; - } while (--bytes > 0); - } - put_cpu_ptr(&net_rand_state); -} -EXPORT_SYMBOL(prandom_bytes); - -/** - * prandom_seed - add entropy to pseudo random number generator - * @entropy: entropy value - * - * Add some additional seed material to the prandom pool. - * The "entropy" is actually our IP address (the only caller is - * the network code), not for unpredictability, but to ensure that - * different machines are initialized differently. - */ -void prandom_seed(u32 entropy) -{ - int i; - - add_device_randomness(&entropy, sizeof(entropy)); - - for_each_possible_cpu(i) { - struct siprand_state *state = per_cpu_ptr(&net_rand_state, i); - unsigned long v0 = state->v0, v1 = state->v1; - unsigned long v2 = state->v2, v3 = state->v3; - - do { - v3 ^= entropy; - PRND_SIPROUND(v0, v1, v2, v3); - PRND_SIPROUND(v0, v1, v2, v3); - v0 ^= entropy; - } while (unlikely(!v0 || !v1 || !v2 || !v3)); - - WRITE_ONCE(state->v0, v0); - WRITE_ONCE(state->v1, v1); - WRITE_ONCE(state->v2, v2); - WRITE_ONCE(state->v3, v3); - } -} -EXPORT_SYMBOL(prandom_seed); - -/* - * Generate some initially weak seeding values to allow - * the prandom_u32() engine to be started. - */ -static int __init prandom_init_early(void) -{ - int i; - unsigned long v0, v1, v2, v3; - - if (!arch_get_random_long(&v0)) - v0 = jiffies; - if (!arch_get_random_long(&v1)) - v1 = random_get_entropy(); - v2 = v0 ^ PRND_K0; - v3 = v1 ^ PRND_K1; - - for_each_possible_cpu(i) { - struct siprand_state *state; - - v3 ^= i; - PRND_SIPROUND(v0, v1, v2, v3); - PRND_SIPROUND(v0, v1, v2, v3); - v0 ^= i; - - state = per_cpu_ptr(&net_rand_state, i); - state->v0 = v0; state->v1 = v1; - state->v2 = v2; state->v3 = v3; - } - - return 0; -} -core_initcall(prandom_init_early); - - -/* Stronger reseeding when available, and periodically thereafter. */ -static void prandom_reseed(struct timer_list *unused); - -static DEFINE_TIMER(seed_timer, prandom_reseed); - -static void prandom_reseed(struct timer_list *unused) -{ - unsigned long expires; - int i; - - /* - * Reinitialize each CPU's PRNG with 128 bits of key. - * No locking on the CPUs, but then somewhat random results are, - * well, expected. - */ - for_each_possible_cpu(i) { - struct siprand_state *state; - unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0; - unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1; -#if BITS_PER_LONG == 32 - int j; - - /* - * On 32-bit machines, hash in two extra words to - * approximate 128-bit key length. Not that the hash - * has that much security, but this prevents a trivial - * 64-bit brute force. - */ - for (j = 0; j < 2; j++) { - unsigned long m = get_random_long(); - - v3 ^= m; - PRND_SIPROUND(v0, v1, v2, v3); - PRND_SIPROUND(v0, v1, v2, v3); - v0 ^= m; - } -#endif - /* - * Probably impossible in practice, but there is a - * theoretical risk that a race between this reseeding - * and the target CPU writing its state back could - * create the all-zero SipHash fixed point. - * - * To ensure that never happens, ensure the state - * we write contains no zero words. - */ - state = per_cpu_ptr(&net_rand_state, i); - WRITE_ONCE(state->v0, v0 ? v0 : -1ul); - WRITE_ONCE(state->v1, v1 ? v1 : -1ul); - WRITE_ONCE(state->v2, v2 ? v2 : -1ul); - WRITE_ONCE(state->v3, v3 ? v3 : -1ul); - } - - /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ)); - mod_timer(&seed_timer, expires); -} - -/* - * The random ready callback can be called from almost any interrupt. - * To avoid worrying about whether it's safe to delay that interrupt - * long enough to seed all CPUs, just schedule an immediate timer event. - */ -static int prandom_timer_start(struct notifier_block *nb, - unsigned long action, void *data) -{ - mod_timer(&seed_timer, jiffies); - return 0; -} - -#ifdef CONFIG_RANDOM32_SELFTEST -/* Principle: True 32-bit random numbers will all have 16 differing bits on - * average. For each 32-bit number, there are 601M numbers differing by 16 - * bits, and 89% of the numbers differ by at least 12 bits. Note that more - * than 16 differing bits also implies a correlation with inverted bits. Thus - * we take 1024 random numbers and compare each of them to the other ones, - * counting the deviation of correlated bits to 16. Constants report 32, - * counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the - * u32 total, TEST_SIZE may be as large as 4096 samples. - */ -#define TEST_SIZE 1024 -static int __init prandom32_state_selftest(void) -{ - unsigned int x, y, bits, samples; - u32 xor, flip; - u32 total; - u32 *data; - - data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL); - if (!data) - return 0; - - for (samples = 0; samples < TEST_SIZE; samples++) - data[samples] = prandom_u32(); - - flip = total = 0; - for (x = 0; x < samples; x++) { - for (y = 0; y < samples; y++) { - if (x == y) - continue; - xor = data[x] ^ data[y]; - flip |= xor; - bits = hweight32(xor); - total += (bits - 16) * (bits - 16); - } - } - - /* We'll return the average deviation as 2*sqrt(corr/samples), which - * is also sqrt(4*corr/samples) which provides a better resolution. - */ - bits = int_sqrt(total / (samples * (samples - 1)) * 4); - if (bits > 6) - pr_warn("prandom32: self test failed (at least %u bits" - " correlated, fixed_mask=%#x fixed_value=%#x\n", - bits, ~flip, data[0] & ~flip); - else - pr_info("prandom32: self test passed (less than %u bits" - " correlated)\n", - bits+1); - kfree(data); - return 0; -} -core_initcall(prandom32_state_selftest); -#endif /* CONFIG_RANDOM32_SELFTEST */ - -/* - * Start periodic full reseeding as soon as strong - * random numbers are available. - */ -static int __init prandom_init_late(void) -{ - static struct notifier_block random_ready = { - .notifier_call = prandom_timer_start - }; - int ret = register_random_ready_notifier(&random_ready); - - if (ret == -EALREADY) { - prandom_timer_start(&random_ready, 0, NULL); - ret = 0; - } - return ret; -} -late_initcall(prandom_init_late); -- cgit From a251c17aa558d8e3128a528af5cf8b9d7caae4fd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 5 Oct 2022 17:43:22 +0200 Subject: treewide: use get_random_u32() when possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prandom_u32() function has been a deprecated inline wrapper around get_random_u32() for several releases now, and compiles down to the exact same code. Replace the deprecated wrapper with a direct call to the real function. The same also applies to get_random_int(), which is just a wrapper around get_random_u32(). This was done as a basic find and replace. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kees Cook Reviewed-by: Yury Norov Reviewed-by: Jan Kara # for ext4 Acked-by: Toke Høiland-Jørgensen # for sch_cake Acked-by: Chuck Lever # for nfsd Acked-by: Jakub Kicinski Acked-by: Mika Westerberg # for thunderbolt Acked-by: Darrick J. Wong # for xfs Acked-by: Helge Deller # for parisc Acked-by: Heiko Carstens # for s390 Signed-off-by: Jason A. Donenfeld --- lib/random32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/random32.c') diff --git a/lib/random32.c b/lib/random32.c index d5d9029362cb..d4f19e1a69d4 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -47,7 +47,7 @@ * @state: pointer to state structure holding seeded state. * * This is used for pseudo-randomness with no outside seeding. - * For more random results, use prandom_u32(). + * For more random results, use get_random_u32(). */ u32 prandom_u32_state(struct rnd_state *state) { -- cgit From 197173db990cad244221ba73c43b1df6170ae278 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 5 Oct 2022 17:49:46 +0200 Subject: treewide: use get_random_bytes() when possible The prandom_bytes() function has been a deprecated inline wrapper around get_random_bytes() for several releases now, and compiles down to the exact same code. Replace the deprecated wrapper with a direct call to the real function. This was done as a basic find and replace. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kees Cook Reviewed-by: Yury Norov Reviewed-by: Christophe Leroy # powerpc Acked-by: Jakub Kicinski Signed-off-by: Jason A. Donenfeld --- lib/random32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/random32.c') diff --git a/lib/random32.c b/lib/random32.c index d4f19e1a69d4..32060b852668 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -69,7 +69,7 @@ EXPORT_SYMBOL(prandom_u32_state); * @bytes: the requested number of bytes * * This is used for pseudo-randomness with no outside seeding. - * For more random results, use prandom_bytes(). + * For more random results, use get_random_bytes(). */ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) { -- cgit