2022-05-04 17:37:24

by Jason A. Donenfeld

[permalink] [raw]
Subject: [PATCH v3] random: use first 128 bits of input as fast init

Before, the first 64 bytes of input, regardless of how entropic it was,
would be used to mutate the crng base key directly, and none of those
bytes would be credited as having entropy. Then 256 bits of credited
input would be accumulated, and only then would the rng transition from
the earlier "fast init" phase into being actually initialized.

The thinking was that by mixing and matching fast init and real init, an
attacker who compromised the fast init state, considered easy to do
given how little entropy might be in those first 64 bytes, would then be
able to bruteforce bits from the actual initialization. By keeping these
separate, bruteforcing became impossible.

However, by not crediting potentially creditable bits from those first 64
bytes of input, we delay initialization, and actually make the problem
worse, because it means the user is drawing worse random numbers for a
longer period of time.

Instead, we can take the first 128 bits as fast init, and allow them to
be credited, and then hold off on the next 128 bits until they've
accumulated. This is still a wide enough margin to prevent bruteforcing
the rng state, while still initializing much faster.

Then, rather than trying to piecemeal inject into the base crng key at
various points, instead just extract from the pool when we need it, for
the crng_init==0 phase. Performance may even be better for the various
inputs here, since there are likely more calls to mix_pool_bytes() then
there are to get_random_bytes() during this phase of system execution.

Since the preinit injection code is gone, bootloader randomness can then
do something significantly more straight forward, removing the weird
system_wq hack in hwgenerator randomness.

Cc: Theodore Ts'o <[email protected]>
Cc: Dominik Brodowski <[email protected]>
Signed-off-by: Jason A. Donenfeld <[email protected]>
---
drivers/char/random.c | 140 ++++++++++++++----------------------------
1 file changed, 46 insertions(+), 94 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4cf02bff7642..ff9e42073bf8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -232,10 +232,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void
*
*********************************************************************/

-enum {
- CRNG_RESEED_INTERVAL = 300 * HZ,
- CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE
-};
+enum { CRNG_RESEED_INTERVAL = 300 * HZ };

static struct {
u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long));
@@ -259,6 +256,8 @@ static DEFINE_PER_CPU(struct crng, crngs) = {

/* Used by crng_reseed() to extract a new seed from the input pool. */
static bool drain_entropy(void *buf, size_t nbytes, bool force);
+/* Used by crng_make_state() to extract a new seed when crng_init==0. */
+static void extract_entropy(void *buf, size_t nbytes);

/*
* This extracts a new crng key from the input pool, but only if there is a
@@ -383,17 +382,20 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
/*
* For the fast path, we check whether we're ready, unlocked first, and
* then re-check once locked later. In the case where we're really not
- * ready, we do fast key erasure with the base_crng directly, because
- * this is what crng_pre_init_inject() mutates during early init.
+ * ready, we do fast key erasure with the base_crng directly, extracting
+ * when crng_init==0.
*/
if (!crng_ready()) {
bool ready;

spin_lock_irqsave(&base_crng.lock, flags);
ready = crng_ready();
- if (!ready)
+ if (!ready) {
+ if (crng_init == 0)
+ extract_entropy(base_crng.key, sizeof(base_crng.key));
crng_fast_key_erasure(base_crng.key, chacha_state,
random_data, random_data_len);
+ }
spin_unlock_irqrestore(&base_crng.lock, flags);
if (!ready)
return;
@@ -434,48 +436,6 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS],
local_unlock_irqrestore(&crngs.lock, flags);
}

-/*
- * This function is for crng_init == 0 only. It loads entropy directly
- * into the crng's key, without going through the input pool. It is,
- * generally speaking, not very safe, but we use this only at early
- * boot time when it's better to have something there rather than
- * nothing.
- *
- * If account is set, then the crng_init_cnt counter is incremented.
- * This shouldn't be set by functions like add_device_randomness(),
- * where we can't trust the buffer passed to it is guaranteed to be
- * unpredictable (so it might not have any entropy at all).
- */
-static void crng_pre_init_inject(const void *input, size_t len, bool account)
-{
- static int crng_init_cnt = 0;
- struct blake2s_state hash;
- unsigned long flags;
-
- blake2s_init(&hash, sizeof(base_crng.key));
-
- spin_lock_irqsave(&base_crng.lock, flags);
- if (crng_init != 0) {
- spin_unlock_irqrestore(&base_crng.lock, flags);
- return;
- }
-
- blake2s_update(&hash, base_crng.key, sizeof(base_crng.key));
- blake2s_update(&hash, input, len);
- blake2s_final(&hash, base_crng.key);
-
- if (account) {
- crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt);
- if (crng_init_cnt >= CRNG_INIT_CNT_THRESH)
- crng_init = 1;
- }
-
- spin_unlock_irqrestore(&base_crng.lock, flags);
-
- if (crng_init == 1)
- pr_notice("fast init done\n");
-}
-
static void _get_random_bytes(void *buf, size_t nbytes)
{
u32 chacha_state[CHACHA_STATE_WORDS];
@@ -788,7 +748,8 @@ EXPORT_SYMBOL(get_random_bytes_arch);

enum {
POOL_BITS = BLAKE2S_HASH_SIZE * 8,
- POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */
+ POOL_MIN_BITS = POOL_BITS, /* No point in settling for less. */
+ POOL_FAST_INIT_BITS = POOL_MIN_BITS / 2
};

/* For notifying userspace should write into /dev/random. */
@@ -825,24 +786,6 @@ static void mix_pool_bytes(const void *in, size_t nbytes)
spin_unlock_irqrestore(&input_pool.lock, flags);
}

-static void credit_entropy_bits(size_t nbits)
-{
- unsigned int entropy_count, orig, add;
-
- if (!nbits)
- return;
-
- add = min_t(size_t, nbits, POOL_BITS);
-
- do {
- orig = READ_ONCE(input_pool.entropy_count);
- entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
- } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
-
- if (!crng_ready() && entropy_count >= POOL_MIN_BITS)
- crng_reseed(false);
-}
-
/*
* This is an HKDF-like construction for using the hashed collected entropy
* as a PRF key, that's then expanded block-by-block.
@@ -908,6 +851,32 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force)
return true;
}

+static void credit_entropy_bits(size_t nbits)
+{
+ unsigned int entropy_count, orig, add;
+ unsigned long flags;
+
+ if (!nbits)
+ return;
+
+ add = min_t(size_t, nbits, POOL_BITS);
+
+ do {
+ orig = READ_ONCE(input_pool.entropy_count);
+ entropy_count = min_t(unsigned int, POOL_BITS, orig + add);
+ } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig);
+
+ if (!crng_ready() && entropy_count >= POOL_MIN_BITS)
+ crng_reseed(false);
+ else if (unlikely(crng_init == 0 && entropy_count >= POOL_FAST_INIT_BITS)) {
+ spin_lock_irqsave(&base_crng.lock, flags);
+ if (crng_init == 0) {
+ extract_entropy(base_crng.key, sizeof(base_crng.key));
+ crng_init = 1;
+ }
+ spin_unlock_irqrestore(&base_crng.lock, flags);
+ }
+}

/**********************************************************************
*
@@ -1040,8 +1009,8 @@ int __init rand_initialize(void)
_mix_pool_bytes(&now, sizeof(now));
_mix_pool_bytes(utsname(), sizeof(*(utsname())));

- extract_entropy(base_crng.key, sizeof(base_crng.key));
- ++base_crng.generation;
+ if (crng_ready())
+ crng_reseed(true);

if (arch_init && trust_cpu && !crng_ready()) {
crng_init = 2;
@@ -1073,9 +1042,6 @@ void add_device_randomness(const void *buf, size_t size)
unsigned long entropy = random_get_entropy();
unsigned long flags;

- if (crng_init == 0 && size)
- crng_pre_init_inject(buf, size, false);
-
spin_lock_irqsave(&input_pool.lock, flags);
_mix_pool_bytes(&entropy, sizeof(entropy));
_mix_pool_bytes(buf, size);
@@ -1191,12 +1157,6 @@ void rand_initialize_disk(struct gendisk *disk)
void add_hwgenerator_randomness(const void *buffer, size_t count,
size_t entropy)
{
- if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) {
- crng_pre_init_inject(buffer, count, true);
- mix_pool_bytes(buffer, count);
- return;
- }
-
/*
* Throttle writing if we're above the trickle threshold.
* We'll be woken up again once below POOL_MIN_BITS, when
@@ -1204,7 +1164,7 @@ void add_hwgenerator_randomness(const void *buffer, size_t count,
* CRNG_RESEED_INTERVAL has elapsed.
*/
wait_event_interruptible_timeout(random_write_wait,
- !system_wq || kthread_should_stop() ||
+ kthread_should_stop() ||
input_pool.entropy_count < POOL_MIN_BITS,
CRNG_RESEED_INTERVAL);
mix_pool_bytes(buffer, count);
@@ -1213,17 +1173,14 @@ void add_hwgenerator_randomness(const void *buffer, size_t count,
EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);

/*
- * Handle random seed passed by bootloader.
- * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
- * it would be regarded as device data.
- * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+ * Handle random seed passed by bootloader, and credit it if
+ * CONFIG_RANDOM_TRUST_BOOTLOADER is set.
*/
void add_bootloader_randomness(const void *buf, size_t size)
{
+ mix_pool_bytes(buf, size);
if (trust_bootloader)
- add_hwgenerator_randomness(buf, size, size * 8);
- else
- add_device_randomness(buf, size);
+ credit_entropy_bits(size * 8);
}
EXPORT_SYMBOL_GPL(add_bootloader_randomness);

@@ -1357,13 +1314,8 @@ static void mix_interrupt_randomness(struct work_struct *work)
fast_pool->last = jiffies;
local_irq_enable();

- if (unlikely(crng_init == 0)) {
- crng_pre_init_inject(pool, sizeof(pool), true);
- mix_pool_bytes(pool, sizeof(pool));
- } else {
- mix_pool_bytes(pool, sizeof(pool));
- credit_entropy_bits(1);
- }
+ mix_pool_bytes(pool, sizeof(pool));
+ credit_entropy_bits(1);

memzero_explicit(pool, sizeof(pool));
}
--
2.35.1



2022-05-05 01:11:30

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH v3] random: use first 128 bits of input as fast init

Thanks! Should be fixed by
https://lore.kernel.org/lkml/20220505002910.IAcnpEOE2zR2ibERl4Lh3Y_PMmtb0Rf43lVevgztJiM@z/
which is actually an earlier commit but unearthed by the one you
bisected to.

2022-05-05 11:53:56

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v3] random: use first 128 bits of input as fast init

Hi Jason,

On Wed, May 04, 2022 at 01:16:44PM +0200, Jason A. Donenfeld wrote:
> Before, the first 64 bytes of input, regardless of how entropic it was,
> would be used to mutate the crng base key directly, and none of those
> bytes would be credited as having entropy. Then 256 bits of credited
> input would be accumulated, and only then would the rng transition from
> the earlier "fast init" phase into being actually initialized.
>
> The thinking was that by mixing and matching fast init and real init, an
> attacker who compromised the fast init state, considered easy to do
> given how little entropy might be in those first 64 bytes, would then be
> able to bruteforce bits from the actual initialization. By keeping these
> separate, bruteforcing became impossible.
>
> However, by not crediting potentially creditable bits from those first 64
> bytes of input, we delay initialization, and actually make the problem
> worse, because it means the user is drawing worse random numbers for a
> longer period of time.
>
> Instead, we can take the first 128 bits as fast init, and allow them to
> be credited, and then hold off on the next 128 bits until they've
> accumulated. This is still a wide enough margin to prevent bruteforcing
> the rng state, while still initializing much faster.
>
> Then, rather than trying to piecemeal inject into the base crng key at
> various points, instead just extract from the pool when we need it, for
> the crng_init==0 phase. Performance may even be better for the various
> inputs here, since there are likely more calls to mix_pool_bytes() then
> there are to get_random_bytes() during this phase of system execution.
>
> Since the preinit injection code is gone, bootloader randomness can then
> do something significantly more straight forward, removing the weird
> system_wq hack in hwgenerator randomness.
>
> Cc: Theodore Ts'o <[email protected]>
> Cc: Dominik Brodowski <[email protected]>
> Signed-off-by: Jason A. Donenfeld <[email protected]>

Apologies if this has already been reported or noticed, I did not see
anything on lore.kernel.org.

I bisected a QEMU boot failure with ARCH=arm aspeed_g5_defconfig to this
change in -next, initially noticed by our continuous integration:

https://github.com/ClangBuiltLinux/continuous-integration2/runs/6292038704?check_suite_focus=true

# bad: [bb6ee10133faa3c4742ab8343b5e488cdb29445e] Add linux-next specific files for 20220504
# good: [ef8e4d3c2ab1f47f63b6c7e578266b7e5cc9cd1b] Merge tag 'hwmon-for-v5.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
git bisect start 'bb6ee10133faa3c4742ab8343b5e488cdb29445e' 'ef8e4d3c2ab1f47f63b6c7e578266b7e5cc9cd1b'
# good: [fc346f9c7d4404797b1e22e70b10367cfcb65973] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect good fc346f9c7d4404797b1e22e70b10367cfcb65973
# good: [31ae19714d8019a433c632c8bbc55a8ed51b4dc0] Merge branch 'timers/drivers/next' of git://git.linaro.org/people/daniel.lezcano/linux.git
git bisect good 31ae19714d8019a433c632c8bbc55a8ed51b4dc0
# good: [0a1f00e86626c074bd8b0b4fc4e425ed2cdf182e] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git
git bisect good 0a1f00e86626c074bd8b0b4fc4e425ed2cdf182e
# bad: [64208983cb783cf86d310b713dca86cc48a92e21] Merge branch 'rust-next' of https://github.com/Rust-for-Linux/linux.git
git bisect bad 64208983cb783cf86d310b713dca86cc48a92e21
# good: [c5fab14b25743e24900bbc46c1d674ae085556a3] Merge branch 'renesas-pinctrl' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git
git bisect good c5fab14b25743e24900bbc46c1d674ae085556a3
# good: [6657c54d345821bec0ababf5566117074e9365e8] Merge branch 'hyperv-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
git bisect good 6657c54d345821bec0ababf5566117074e9365e8
# good: [49ff8dd832746466d8ee736fd2ac17468d96c2c8] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git
git bisect good 49ff8dd832746466d8ee736fd2ac17468d96c2c8
# good: [0c0f471ec568bb005ea8ca2660df03841f63b30d] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
git bisect good 0c0f471ec568bb005ea8ca2660df03841f63b30d
# bad: [fee7def4f22dadc01fbb89fc5bf69986b4f7d7a7] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git
git bisect bad fee7def4f22dadc01fbb89fc5bf69986b4f7d7a7
# good: [675ba39e73c5cd58fca0a7efc39c22efc557aadd] nios2: use fallback for random_get_entropy() instead of zero
git bisect good 675ba39e73c5cd58fca0a7efc39c22efc557aadd
# good: [2334ae282252f27ccd44a65be6e36d251936175f] random: vary jitter iterations based on cycle counter speed
git bisect good 2334ae282252f27ccd44a65be6e36d251936175f
# bad: [fbc14042a7fa7820dd7ffdd27d97064edd09d2ea] random: use first 128 bits of input as fast init
git bisect bad fbc14042a7fa7820dd7ffdd27d97064edd09d2ea
# good: [99e2ecc9855eab30603275aeaa34ac7a7ff919b2] random: do not use batches when !crng_ready()
git bisect good 99e2ecc9855eab30603275aeaa34ac7a7ff919b2
# first bad commit: [fbc14042a7fa7820dd7ffdd27d97064edd09d2ea] random: use first 128 bits of input as fast init

It can be reproduced with:

$ make -skj"$(nproc)" ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- mrproper aspeed_g5_defconfig all

$ qemu-system-arm \
-initrd ... \
-machine romulus-bmc \
-no-reboot \
-dtb arch/arm/boot/dts/aspeed-bmc-opp-romulus.dtb \
-display none \
-kernel arch/arm/boot/zImage \
-m 512m \
-nodefaults \
-serial mon:stdio
...
[ 0.000000] random: get_random_u32 called from __kmem_cache_create+0x24/0x46c with crng_init=0
[ 0.000000] 8<--- cut here ---
[ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 0.000000] [00000000] *pgd=00000000
[ 0.000000] Internal error: Oops: 5 [#1] SMP ARM
[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.18.0-rc5-00021-gfbc14042a7fa #1
[ 0.000000] Hardware name: Generic DT based system
[ 0.000000] PC is at random_get_entropy_fallback+0x14/0x24
[ 0.000000] LR is at extract_entropy.constprop.0+0x4c/0x1bc
[ 0.000000] pc : [<8019ea0c>] lr : [<8055f620>] psr: a00001d3
[ 0.000000] sp : 80e01db8 ip : 00000000 fp : 00000000
[ 0.000000] r10: 80e0bb80 r9 : 80eeec94 r8 : 80e01f20
[ 0.000000] r7 : 80e01e84 r6 : 80e01dd0 r5 : 80e01df0 r4 : 80e01dd0
[ 0.000000] r3 : 80ede0c0 r2 : 80eeec94 r1 : 00000000 r0 : 00000000
[ 0.000000] Flags: NzCv IRQs off FIQs off Mode SVC_32 ISA ARM Segment none
[ 0.000000] Control: 00c5387d Table: 80004008 DAC: 00000051
[ 0.000000] Register r0 information: NULL pointer
[ 0.000000] Register r1 information: NULL pointer
[ 0.000000] Register r2 information: non-slab/vmalloc memory
[ 0.000000] Register r3 information: non-slab/vmalloc memory
[ 0.000000] Register r4 information: non-slab/vmalloc memory
[ 0.000000] Register r5 information: non-slab/vmalloc memory
[ 0.000000] Register r6 information: non-slab/vmalloc memory
[ 0.000000] Register r7 information: non-slab/vmalloc memory
[ 0.000000] Register r8 information: non-slab/vmalloc memory
[ 0.000000] Register r9 information: non-slab/vmalloc memory
[ 0.000000] Register r10 information: non-slab/vmalloc memory
[ 0.000000] Register r11 information: NULL pointer
[ 0.000000] Register r12 information: NULL pointer
[ 0.000000] Process swapper (pid: 0, stack limit = 0x(ptrval))
[ 0.000000] Stack: (0x80e01db8 to 0x80e02000)
[ 0.000000] 1da0: 80e01dd0 8055f620
[ 0.000000] 1dc0: 80a98ac8 80eeec6c ffffff10 ffff0a01 80e01e68 80ece060 00000056 80e0bb80
[ 0.000000] 1de0: 3ffff822 ffffff00 ffff0a01 00000000 80e0bb80 00000056 00000000 00000000
[ 0.000000] 1e00: 80ece060 80e0bb80 00000000 80e138d8 80ece060 00000052 80172a4c 80173748
[ 0.000000] 1e20: 80e01e74 80173748 00000052 9e9cd435 00000000 00000000 80b711d4 00000004
[ 0.000000] 1e40: 800001d3 80eeec64 80e01e84 80e01f20 80eeec94 80b1d474 00000000 8055fc3c
[ 0.000000] 1e60: 00000004 80e01f24 80e01e84 00000004 80e0bb80 80d3e0b4 80b1d474 8055fca0
[ 0.000000] 1e80: 0000005c 61723501 006f646e 00000000 00000000 00000000 80e05d60 fffffffe
[ 0.000000] 1ea0: 00000000 80b711d4 00000000 80b1d474 00000000 8017141c 80e01f14 00000000
[ 0.000000] 1ec0: 00000000 00000000 00000000 80e0bb80 801ac33c 801fec4c 00000052 801fecbc
[ 0.000000] 1ee0: 00000052 8016e834 00000052 80171c78 80e01f14 808e4360 80e0bb80 808f4438
[ 0.000000] 1f00: 80e01f14 00000000 80d3e024 80eeec64 80e0bb80 80ee2914 80e57bf8 80560e44
[ 0.000000] 1f20: 80d3e024 00000000 80ee2914 80d3e024 80d3e024 80b3e538 80ee2914 802bfa98
[ 0.000000] 1f40: 00000010 00000020 80d3e024 80b3e538 80ee2914 80e57bf8 80d3e0b4 80b1d474
[ 0.000000] 1f60: 00000000 80d13370 00000000 80ee7e38 80d3e024 80d16578 00000000 00000000
[ 0.000000] 1f80: 00000000 80ec9000 80ec9000 80ec9000 9edffa00 00c0387d 80e05cc0 ffffffff
[ 0.000000] 1fa0: 00000000 80d0115c ffffffff ffffffff 00000000 80d00768 00000000 80e0bb80
[ 0.000000] 1fc0: 00000000 80d39a5c 00000000 00000000 00000000 80d004bc 00000051 00c0387d
[ 0.000000] 1fe0: ffffffff 88392000 410fb767 00c5387d 00000000 00000000 00000000 00000000
[ 0.000000] random_get_entropy_fallback from extract_entropy.constprop.0+0x4c/0x1bc
[ 0.000000] extract_entropy.constprop.0 from crng_make_state+0x184/0x1a4
[ 0.000000] crng_make_state from _get_random_bytes.part.0+0x44/0xe8
[ 0.000000] _get_random_bytes.part.0 from get_random_u32+0xc8/0xe0
[ 0.000000] get_random_u32 from __kmem_cache_create+0x24/0x46c
[ 0.000000] __kmem_cache_create from create_boot_cache+0x94/0xbc
[ 0.000000] create_boot_cache from kmem_cache_init+0x68/0x178
[ 0.000000] kmem_cache_init from start_kernel+0x3b0/0x69c
[ 0.000000] start_kernel from 0x0
[ 0.000000] Code: e52de004 ebfdbebb e59f300c e5930148 (e5903000)
[ 0.000000] ---[ end trace 0000000000000000 ]---
...

Cheers,
Nathan

2022-05-13 14:25:30

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH v3] random: use first 128 bits of input as fast init

Hi Dominik,

On Fri, May 13, 2022 at 08:22:36AM +0200, Dominik Brodowski wrote:
> Am Wed, May 04, 2022 at 01:16:44PM +0200 schrieb Jason A. Donenfeld:
> > Before, the first 64 bytes of input, regardless of how entropic it was,
> > would be used to mutate the crng base key directly, and none of those
> > bytes would be credited as having entropy. Then 256 bits of credited
> > input would be accumulated, and only then would the rng transition from
> > the earlier "fast init" phase into being actually initialized.
> >
> > The thinking was that by mixing and matching fast init and real init, an
> > attacker who compromised the fast init state, considered easy to do
> > given how little entropy might be in those first 64 bytes, would then be
> > able to bruteforce bits from the actual initialization. By keeping these
> > separate, bruteforcing became impossible.
> >
> > However, by not crediting potentially creditable bits from those first 64
> > bytes of input, we delay initialization, and actually make the problem
> > worse, because it means the user is drawing worse random numbers for a
> > longer period of time.
> >
> > Instead, we can take the first 128 bits as fast init, and allow them to
> > be credited, and then hold off on the next 128 bits until they've
> > accumulated. This is still a wide enough margin to prevent bruteforcing
> > the rng state, while still initializing much faster.
> >
> > Then, rather than trying to piecemeal inject into the base crng key at
> > various points, instead just extract from the pool when we need it, for
> > the crng_init==0 phase. Performance may even be better for the various
> > inputs here, since there are likely more calls to mix_pool_bytes() then
> > there are to get_random_bytes() during this phase of system execution.
>
> Have you evaluated this closer, also for systems where it takes ages to
> reach crng_init = 1? And might it make sense to only call extract_entropy()
> if there has been new input between two calls to get_random_bytes()?

Yes. On those systems, the extra calls to extract_entropy() are actually
helping the otherwise abysmal state, because they're adding in some new
cycle counter values on every call. Performance-wise, it's not really
that bad. Actually, by *far* the most expensive thing that
extract_entropy() does is RDSEED/RDRAND, but systems that have those
instructions don't stay in crng_init==CRNG_EARLY for very long anyway.
So all and all, it works out quite nicely.

Jason

2022-05-13 15:03:05

by Dominik Brodowski

[permalink] [raw]
Subject: Re: [PATCH v3] random: use first 128 bits of input as fast init

Am Wed, May 04, 2022 at 01:16:44PM +0200 schrieb Jason A. Donenfeld:
> Before, the first 64 bytes of input, regardless of how entropic it was,
> would be used to mutate the crng base key directly, and none of those
> bytes would be credited as having entropy. Then 256 bits of credited
> input would be accumulated, and only then would the rng transition from
> the earlier "fast init" phase into being actually initialized.
>
> The thinking was that by mixing and matching fast init and real init, an
> attacker who compromised the fast init state, considered easy to do
> given how little entropy might be in those first 64 bytes, would then be
> able to bruteforce bits from the actual initialization. By keeping these
> separate, bruteforcing became impossible.
>
> However, by not crediting potentially creditable bits from those first 64
> bytes of input, we delay initialization, and actually make the problem
> worse, because it means the user is drawing worse random numbers for a
> longer period of time.
>
> Instead, we can take the first 128 bits as fast init, and allow them to
> be credited, and then hold off on the next 128 bits until they've
> accumulated. This is still a wide enough margin to prevent bruteforcing
> the rng state, while still initializing much faster.
>
> Then, rather than trying to piecemeal inject into the base crng key at
> various points, instead just extract from the pool when we need it, for
> the crng_init==0 phase. Performance may even be better for the various
> inputs here, since there are likely more calls to mix_pool_bytes() then
> there are to get_random_bytes() during this phase of system execution.

Have you evaluated this closer, also for systems where it takes ages to
reach crng_init = 1? And might it make sense to only call extract_entropy()
if there has been new input between two calls to get_random_bytes()?

Thanks,
Dominik