2012-08-13 17:26:23

by H. Peter Anvin

[permalink] [raw]
Subject: [PATCH RFC] random: Account for entropy loss due to overwrites

From: "H. Peter Anvin" <[email protected]>

When we write entropy into a non-empty pool, we currently don't
account at all for the fact that we will probabilistically overwrite
some of the entropy in that pool. This means that unless the pool is
fully empty, we are currently *guaranteed* to overestimate the amount
of entropy in the pool!

Assuming Shannon entropy with zero correlations we end up with an
exponentally decaying value of new entropy added:

entropy <- entropy + (pool_size - entropy) *
(1 - exp(-add_entropy/pool_size))

However, calculations involving fractional exponentials are not
practical in the kernel, so apply a piecewise linearization:

For add_entropy <= pool_size then

(1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.632...

... so we can approximate the exponential with
add_entropy/(pool_size*2) and still be on the
safe side by adding at most one pool_size at a time.

In order for the loop not to take arbitrary amounts of time if a bad
ioctl is received, terminate if we are within one bit of full. This
way the loop is guaranteed to terminate after no more than
log2(poolsize) iterations, no matter what the input value is. The
vast majority of the time the loop will be executed exactly once.

The piecewise linearization is very conservative, approaching 1/2 of
the usable input value for small inputs, however, our entropy
estimation is pretty weak at best, especially for small values; we
have no handle on correlation; and the Shannon entropy measure (Rényi
entropy of order 1) is not the correct one to use in the first place,
but rather the correct entropy measure is the min-entropy, the Rényi
entropy of infinite order.

As such, this conservatism seems more than justified. Note, however,
that attempting to add one bit of entropy will never succeed; nor will
two bits unless the pool is completely empty. These roundoff
artifacts could be improved by using fixed-point arithmetic and adding
some number of fractional entropy bits.

Signed-off-by: H. Peter Anvin <[email protected]>
Cc: DJ Johnston <[email protected]>
Cc: <[email protected]>
---
drivers/char/random.c | 104 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 72 insertions(+), 32 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b86eae9..5d870ad 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -272,10 +272,12 @@
/*
* Configuration information
*/
-#define INPUT_POOL_WORDS 128
-#define OUTPUT_POOL_WORDS 32
-#define SEC_XFER_SIZE 512
-#define EXTRACT_SIZE 10
+#define INPUT_POOL_SHIFT 12
+#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5))
+#define OUTPUT_POOL_SHIFT 10
+#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5))
+#define SEC_XFER_SIZE 512
+#define EXTRACT_SIZE 10

#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))

@@ -309,40 +311,41 @@ static DEFINE_PER_CPU(int, trickle_count);
* scaled squared error sum) except for the last tap, which is 1 to
* get the twisting happening as fast as possible.
*/
-static struct poolinfo {
+static const struct poolinfo {
+ int poolshift; /* log2(POOLBITS) */
int poolwords;
int tap1, tap2, tap3, tap4, tap5;
} poolinfo_table[] = {
- /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
- { 128, 103, 76, 51, 25, 1 },
- /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
- { 32, 26, 20, 14, 7, 1 },
+ /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
+ { 12, 128, 103, 76, 51, 25, 1 },
+ /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
+ { 10, 32, 26, 20, 14, 7, 1 },
#if 0
- /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */
- { 2048, 1638, 1231, 819, 411, 1 },
+ /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */
+ { 16, 2048, 1638, 1231, 819, 411, 1 },

- /* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */
- { 1024, 817, 615, 412, 204, 1 },
+ /* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */
+ { 15, 1024, 817, 615, 412, 204, 1 },

- /* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */
- { 1024, 819, 616, 410, 207, 2 },
+ /* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */
+ { 15, 1024, 819, 616, 410, 207, 2 },

- /* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */
- { 512, 411, 308, 208, 104, 1 },
+ /* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */
+ { 14, 512, 411, 308, 208, 104, 1 },

- /* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */
- { 512, 409, 307, 206, 102, 2 },
- /* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */
- { 512, 409, 309, 205, 103, 2 },
+ /* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */
+ { 14, 512, 409, 307, 206, 102, 2 },
+ /* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */
+ { 14, 512, 409, 309, 205, 103, 2 },

- /* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */
- { 256, 205, 155, 101, 52, 1 },
+ /* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */
+ { 13, 256, 205, 155, 101, 52, 1 },

- /* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */
- { 128, 103, 78, 51, 27, 2 },
+ /* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */
+ { 12, 128, 103, 78, 51, 27, 2 },

- /* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */
- { 64, 52, 39, 26, 14, 1 },
+ /* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */
+ { 11, 64, 52, 39, 26, 14, 1 },
#endif
};

@@ -424,7 +427,7 @@ module_param(debug, bool, 0644);
struct entropy_store;
struct entropy_store {
/* read-only data: */
- struct poolinfo *poolinfo;
+ const struct poolinfo *poolinfo;
__u32 *pool;
const char *name;
struct entropy_store *pull;
@@ -585,11 +588,13 @@ static void fast_mix(struct fast_pool *f, const void *in, int nbytes)
}

/*
- * Credit (or debit) the entropy store with n bits of entropy
+ * Credit (or debit) the entropy store with n bits of entropy.
+ * The nbits value is given in units of 2^-16 bits, i.e. 0x10000 == 1 bit.
*/
static void credit_entropy_bits(struct entropy_store *r, int nbits)
{
int entropy_count, orig;
+ int pool_size = r->poolinfo->POOLBITS;

if (!nbits)
return;
@@ -597,13 +602,48 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits)
DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name);
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
- entropy_count += nbits;
+ if (nbits < 0) {
+ /* Debit. */
+ entropy_count += nbits;
+ } else {
+ /*
+ * Credit: we have to account for the possibility of
+ * overwriting already present entropy. Even in the
+ * ideal case of pure Shannon entropy, new contributions
+ * approach the full value asymptotically:
+ *
+ * entropy <- entropy + (pool_size - entropy) *
+ * (1 - exp(-add_entropy/pool_size))
+ *
+ * For add_entropy <= pool_size then
+ * (1 - exp(-add_entropy/pool_size)) >=
+ * (add_entropy/pool_size)*0.632...
+ * so we can approximate the exponential with
+ * add_entropy/(pool_size*2) and still be on the
+ * safe side by adding at most one pool_size at a time.
+ *
+ * The use of pool_size-1 in the while statement is to
+ * prevent rounding artifacts from making the loop
+ * arbitrarily long; this limits the loop to poolshift
+ * turns no matter how large nbits is.
+ */
+ int pnbits = nbits;
+ int s = r->poolinfo->poolshift + 1;
+
+ do {
+ int anbits = min(pnbits, pool_size);
+
+ entropy_count +=
+ ((pool_size - entropy_count)*anbits) >> s;
+ pnbits -= anbits;
+ } while (unlikely(entropy_count < pool_size-1 && pnbits));
+ }

if (entropy_count < 0) {
DEBUG_ENT("negative entropy/overflow\n");
entropy_count = 0;
- } else if (entropy_count > r->poolinfo->POOLBITS)
- entropy_count = r->poolinfo->POOLBITS;
+ } else if (entropy_count > pool_size)
+ entropy_count = pool_size;
if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
goto retry;

--
1.7.11.2


2012-08-15 19:31:20

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH RFC] random: Account for entropy loss due to overwrites

On Mon, 2012-08-13 at 10:26 -0700, H. Peter Anvin wrote:
> From: "H. Peter Anvin" <[email protected]>
>
> When we write entropy into a non-empty pool, we currently don't
> account at all for the fact that we will probabilistically overwrite
> some of the entropy in that pool.

Technically, no, nothing is overwritten. The key fact is that the mixing
function is -reversible-. Thus, even if you mix in known data, you can't
learn anything about the state and thus can't destroy any of the
existing entropy.

But you are correct, mixing new actual entropy is not purely additive
(with saturation). For that to happen, we'd need an input mixing
function with perfect maximal cascading. Instead we effectively cascade
across somewhere between 6 and 64 bits. So the truth lies somewhere
between linear and your exponential estimate (which would be the case
for mixing a single bit into the pool with XOR), but much closer to
linear due to combinatoric expansion.

On the other hand, I don't think this sort of thing matters at all.
There is so much more fundamentally wrong with even trying to do entropy
accounting in the first place that these sorts of details don't even
matter. Instead we should stop fooling ourselves and just drop the
pretense of accounting entirely. Now that we've got a much richer set of
inputs, I think the time is ripe... but of course, I'm no longer the
maintainer.

--
Mathematics is the supreme nostalgia of our time.

2012-08-15 20:37:31

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH RFC] random: Account for entropy loss due to overwrites

On 08/15/2012 12:30 PM, Matt Mackall wrote:
> On Mon, 2012-08-13 at 10:26 -0700, H. Peter Anvin wrote:
>> From: "H. Peter Anvin" <[email protected]>
>>
>> When we write entropy into a non-empty pool, we currently don't
>> account at all for the fact that we will probabilistically overwrite
>> some of the entropy in that pool.
>
> Technically, no, nothing is overwritten. The key fact is that the mixing
> function is -reversible-. Thus, even if you mix in known data, you can't
> learn anything about the state and thus can't destroy any of the
> existing entropy.
>
> But you are correct, mixing new actual entropy is not purely additive
> (with saturation). For that to happen, we'd need an input mixing
> function with perfect maximal cascading. Instead we effectively cascade
> across somewhere between 6 and 64 bits. So the truth lies somewhere
> between linear and your exponential estimate (which would be the case
> for mixing a single bit into the pool with XOR), but much closer to
> linear due to combinatoric expansion.
>

I think you have it backwards; if the input was a FIFO, with no mixing
at all, and no reuse, the linear estimate would be correct. The mixing
into an already-existent and potentially-observed pool is what causes
the exponential estimate to apply... although it is assuming perfect
mixing. However, I believe it is still correct in the aggregate.

> On the other hand, I don't think this sort of thing matters at all.
> There is so much more fundamentally wrong with even trying to do entropy
> accounting in the first place that these sorts of details don't even
> matter. Instead we should stop fooling ourselves and just drop the
> pretense of accounting entirely. Now that we've got a much richer set of
> inputs, I think the time is ripe... but of course, I'm no longer the
> maintainer.

If we're going to fundamentally change the structure perhaps we should
actually take the suggestions long offered from the cryptographic
community, and look at structures like Fortuna.

-hpa

2012-10-16 04:09:44

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH RFC] random: Account for entropy loss due to overwrites

On Sat, Sep 29, 2012 at 12:47:04PM -0700, H. Peter Anvin wrote:
> >-static struct poolinfo {
> >+static const struct poolinfo {
> >+ int poolshift; /* log2(POOLBITS) */
> > int poolwords;
> > int tap1, tap2, tap3, tap4, tap5;

Poolshift is duplicated information; it's just log2(poolwords) + 5
(since POOLBITS is poolwords*32).

Granted you don't want to recalculate it every single time you need to
use it, but perhaps it would be better to add poolshift to struct
entropy_store, and set it in init_std_data()?

- Ted

2012-10-16 04:45:28

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH RFC] random: Account for entropy loss due to overwrites

On 10/15/2012 09:08 PM, Theodore Ts'o wrote:
> On Sat, Sep 29, 2012 at 12:47:04PM -0700, H. Peter Anvin wrote:
>>> -static struct poolinfo {
>>> +static const struct poolinfo {
>>> + int poolshift; /* log2(POOLBITS) */
>>> int poolwords;
>>> int tap1, tap2, tap3, tap4, tap5;
>
> Poolshift is duplicated information; it's just log2(poolwords) + 5
> (since POOLBITS is poolwords*32).
>
> Granted you don't want to recalculate it every single time you need to
> use it, but perhaps it would be better to add poolshift to struct
> entropy_store, and set it in init_std_data()?
>

Or we could compute poolwords (and poolbits, and poolbytes) from it,
since shifts generally are cheap. I don't strongly care, whatever your
preference is.

-hpa


2012-10-16 15:53:46

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH RFC] random: Account for entropy loss due to overwrites

On Mon, Oct 15, 2012 at 09:45:23PM -0700, H. Peter Anvin wrote:
>
> Or we could compute poolwords (and poolbits, and poolbytes) from it,
> since shifts generally are cheap. I don't strongly care, whatever your
> preference is.

We are already calculating poolbits from poolwords:

#define POOLBITS poolwords*32
#define POOLBYTES poolwords*4

So you'd basically be suggesting that we define

#define POOLWORDS (1 << (poolshift - 5))
#define POOLBYTES (1 << (poolshift - 3))
#define POOLBITS (1 << poolshift)

Yeah, that works; we don't use poolwords in that many places, and a
data dependent shift is cheap at least on x86 and arm (so probably all
modern platforms).

There was one aesthetic reason for using POOLWORDS, which was that
first term of the generating polynomial was the same as poolwords,
i.e:

/* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
{ 128, 103, 76, 51, 25, 1 },
/* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
{ 32, 26, 20, 14, 7, 1 },

If we change it to be:

/* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
{ 12, 103, 76, 51, 25, 1 },
/* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
{ 10, 26, 20, 14, 7, 1 },

It's a wee bit less obvious where the "12" and "10" is coming form. I
don't see an easy way to fix this, though, other than perhaps making
sure it's clear in the comments. Unfortunately we can't count on gcc
doing a built-in optimization for a log2 of a constant as far as I
know.... or can we?

Hmm, this does get optimized correctly at least with gcc 4.7:

#define shiftbits(words) ((int) __builtin_log2((double) (words)) + 5)

... and it looks like include/linux/log2.h already has a definition
for ilog2() which should definitely work for all versions of gcc, so
we could do this instead:

#define shiftbits(w) (ilog2((w)) + 5)

/* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
{ shiftbits(128), 103, 76, 51, 25, 1 },
/* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
{ shiftbits(32), 26, 20, 14, 7, 1 },

- Ted

2012-10-16 16:11:09

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH RFC] random: Account for entropy loss due to overwrites

On 10/16/2012 08:53 AM, Theodore Ts'o wrote:
>
> ... and it looks like include/linux/log2.h already has a definition
> for ilog2() which should definitely work for all versions of gcc, so
> we could do this instead:
>
> #define shiftbits(w) (ilog2((w)) + 5)
>
> /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
> { shiftbits(128), 103, 76, 51, 25, 1 },
> /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
> { shiftbits(32), 26, 20, 14, 7, 1 },
>

OK, works for me. I'll rev the patch later this week.

-hpa