2013-04-27 22:44:56

by H. Peter Anvin

[permalink] [raw]
Subject: [PATCH v3 0/3] random: Account for entropy loss due to overwrites

From: "H. Peter Anvin" <[email protected]>

When we write entropy into a non-empty pool, we currently don't
account at all for the fact that we will probabilistically overwrite
some of the entropy in that pool. This means that unless the pool is
fully empty, we are currently *guaranteed* to overestimate the amount
of entropy in the pool!

This version of the patchset avoids manually duplicating information
by using a macro. This removes *all* dynamic computation of derived
pool information and replaces them with static information: on just
about every architecture accessing pointer+offset is no more expensive
than just plain pointer, and this lets us get the information we
actually need from the start.

This version of the patchset adds handling of fractional bits, so that
we can continue to add a single bit of entropy without it being
rounded down to zero. This version has 3 bits of fraction, which
means that with a 4096-bit input pool the multiply still cannot
overflow 32 bits; if we want to add the capability of crediting
fractional bits of entropy, which may be useful in itself, then we
probably need more bits of fraction and would have to use a 64-bit
multiply and shift.


2013-04-27 22:44:58

by H. Peter Anvin

[permalink] [raw]
Subject: [PATCH 2/3] random: Allow fractional bits to be tracked

From: "H. Peter Anvin" <[email protected]>

Allow fractional bits of entropy to be tracked by scaling the entropy
counter (fixed point). This will be used in a subsequent patch that
accounts for entropy lost due to overwrites.

Signed-off-by: H. Peter Anvin <[email protected]>
Cc: <[email protected]>
---
drivers/char/random.c | 92 +++++++++++++++++++++++++++++++++++++--------------
1 file changed, 68 insertions(+), 24 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 106b9b2..5cc8e86 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -280,6 +280,14 @@
#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))

/*
+ * To allow fractional bits to be tracked, the following fields contain
+ * this many fractional bits:
+ *
+ * entropy_count, trickle_thresh
+ */
+#define ENTROPY_SHIFT 3
+
+/*
* The minimum number of bits of entropy before we wake up a read on
* /dev/random. Should be enough to do a significant reseed.
*/
@@ -296,8 +304,7 @@ static int random_write_wakeup_thresh = 128;
* When the input pool goes over trickle_thresh, start dropping most
* samples to avoid wasting CPU time and reduce lock contention.
*/
-
-static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28;
+static const int trickle_thresh = (INPUT_POOL_WORDS * 28) << ENTROPY_SHIFT;

static DEFINE_PER_CPU(int, trickle_count);

@@ -311,8 +318,8 @@ static DEFINE_PER_CPU(int, trickle_count);
*/

static struct poolinfo {
- int poolbitshift, poolwords, poolbytes, poolbits;
-#define S(x) ilog2(x)+5, (x), (x)*4, (x)*32
+ int poolbitshift, poolwords, poolbytes, poolbits, poolfracbits;
+#define S(x) ilog2(x)+5, (x), (x)*4, (x)*32, (x) << (ENTROPY_SHIFT+5)
int tap1, tap2, tap3, tap4, tap5;
} poolinfo_table[] = {
/* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
@@ -581,7 +588,9 @@ static void fast_mix(struct fast_pool *f, const void *in, int nbytes)
}

/*
- * Credit (or debit) the entropy store with n bits of entropy
+ * Credit (or debit) the entropy store with n bits of entropy.
+ * Use credit_entropy_bits_safe() if the value comes from userspace
+ * or otherwise should be checked for extreme values.
*/
static void credit_entropy_bits(struct entropy_store *r, int nbits)
{
@@ -593,13 +602,13 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits)
DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name);
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
- entropy_count += nbits;
+ entropy_count += nbits << ENTROPY_SHIFT;

if (entropy_count < 0) {
DEBUG_ENT("negative entropy/overflow\n");
entropy_count = 0;
- } else if (entropy_count > r->poolinfo->poolbits)
- entropy_count = r->poolinfo->poolbits;
+ } else if (entropy_count > r->poolinfo->poolfracbits)
+ entropy_count = r->poolinfo->poolfracbits;
if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
goto retry;

@@ -613,12 +622,24 @@ retry:
r->entropy_total, _RET_IP_);

/* should we wake readers? */
- if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) {
+ if (r == &input_pool &&
+ (entropy_count >> ENTROPY_SHIFT) >= random_read_wakeup_thresh) {
wake_up_interruptible(&random_read_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
}
}

+static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
+{
+ const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
+
+ /* Cap the value to avoid overflows */
+ nbits = min(nbits, nbits_max);
+ nbits = max(nbits, -nbits_max);
+
+ credit_entropy_bits(r, nbits);
+}
+
/*********************************************************************
*
* Entropy input management
@@ -813,8 +834,9 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
{
__u32 tmp[OUTPUT_POOL_WORDS];

- if (r->pull && r->entropy_count < nbytes * 8 &&
- r->entropy_count < r->poolinfo->poolbits) {
+ if (r->pull &&
+ r->entropy_count < (nbytes << (ENTROPY_SHIFT + 3)) &&
+ r->entropy_count < r->poolinfo->poolfracbits) {
/* If we're limited, always leave two wakeup worth's BITS */
int rsvd = r->limit ? 0 : random_read_wakeup_thresh/4;
int bytes = nbytes;
@@ -826,7 +848,8 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)

DEBUG_ENT("going to reseed %s with %d bits "
"(%zu of %d requested)\n",
- r->name, bytes * 8, nbytes * 8, r->entropy_count);
+ r->name, bytes * 8, nbytes * 8,
+ r->entropy_count >> ENTROPY_SHIFT);

bytes = extract_entropy(r->pull, tmp, bytes,
random_read_wakeup_thresh / 8, rsvd);
@@ -852,28 +875,31 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
{
unsigned long flags;
int wakeup_write = 0;
+ int have_bytes;

/* Hold lock while accounting */
spin_lock_irqsave(&r->lock, flags);

- BUG_ON(r->entropy_count > r->poolinfo->poolbits);
+ BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
DEBUG_ENT("trying to extract %zu bits from %s\n",
nbytes * 8, r->name);

/* Can we pull enough? */
- if (r->entropy_count / 8 < min + reserved) {
+ have_bytes = r->entropy_count >> (ENTROPY_SHIFT + 3);
+ if (have_bytes < min + reserved) {
nbytes = 0;
} else {
/* If limited, never pull more than available */
- if (r->limit && nbytes + reserved >= r->entropy_count / 8)
- nbytes = r->entropy_count/8 - reserved;
+ if (r->limit && nbytes + reserved >= have_bytes)
+ nbytes = have_bytes - reserved;

- if (r->entropy_count / 8 >= nbytes + reserved)
- r->entropy_count -= nbytes*8;
+ if (have_bytes >= nbytes + reserved)
+ r->entropy_count -= nbytes << (ENTROPY_SHIFT + 3);
else
- r->entropy_count = reserved;
+ r->entropy_count = reserved << (ENTROPY_SHIFT + 3);

- if (r->entropy_count < random_write_wakeup_thresh)
+ if ((r->entropy_count >> ENTROPY_SHIFT)
+ < random_write_wakeup_thresh)
wakeup_write = 1;
}

@@ -1269,7 +1295,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
switch (cmd) {
case RNDGETENTCNT:
/* inherently racy, no point locking */
- if (put_user(input_pool.entropy_count, p))
+ ent_count = input_pool.entropy_count >> ENTROPY_SHIFT;
+ if (put_user(ent_count, p))
return -EFAULT;
return 0;
case RNDADDTOENTCNT:
@@ -1277,7 +1304,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EPERM;
if (get_user(ent_count, p))
return -EFAULT;
- credit_entropy_bits(&input_pool, ent_count);
+ credit_entropy_bits_safe(&input_pool, ent_count);
return 0;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
@@ -1292,7 +1319,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
size);
if (retval < 0)
return retval;
- credit_entropy_bits(&input_pool, ent_count);
+ credit_entropy_bits_safe(&input_pool, ent_count);
return 0;
case RNDZAPENTCNT:
case RNDCLEARPOOL:
@@ -1399,6 +1426,23 @@ static int proc_do_uuid(ctl_table *table, int write,
return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}

+/*
+ * Return entropy available scaled to integral bits
+ */
+static int proc_do_entropy(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ ctl_table fake_table;
+ int entropy_count;
+
+ entropy_count = *(int *)table->data >> ENTROPY_SHIFT;
+
+ fake_table.data = &entropy_count;
+ fake_table.maxlen = sizeof(entropy_count);
+
+ return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
+}
+
static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
extern ctl_table random_table[];
ctl_table random_table[] = {
@@ -1413,7 +1457,7 @@ ctl_table random_table[] = {
.procname = "entropy_avail",
.maxlen = sizeof(int),
.mode = 0444,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_do_entropy,
.data = &input_pool.entropy_count,
},
{
--
1.7.11.7

2013-04-27 22:44:59

by H. Peter Anvin

[permalink] [raw]
Subject: [PATCH 3/3] random: Account for entropy loss due to overwrites

From: "H. Peter Anvin" <[email protected]>

When we write entropy into a non-empty pool, we currently don't
account at all for the fact that we will probabilistically overwrite
some of the entropy in that pool. This means that unless the pool is
fully empty, we are currently *guaranteed* to overestimate the amount
of entropy in the pool!

Assuming Shannon entropy with zero correlations we end up with an
exponentally decaying value of new entropy added:

entropy <- entropy + (pool_size - entropy) *
(1 - exp(-add_entropy/pool_size))

However, calculations involving fractional exponentials are not
practical in the kernel, so apply a piecewise linearization:

For add_entropy <= pool_size/2 then

(1 - exp(-add_entropy/pool_size)) >= (add_entropy/pool_size)*0.7869...

... so we can approximate the exponential with
3/4*add_entropy/pool_size and still be on the
safe side by adding at most pool_size/2 at a time.

In order for the loop not to take arbitrary amounts of time if a bad
ioctl is received, terminate if we are within one bit of full. This
way the loop is guaranteed to terminate after no more than
log2(poolsize) iterations, no matter what the input value is. The
vast majority of the time the loop will be executed exactly once.

The piecewise linearization is very conservative, approaching 3/4 of
the usable input value for small inputs, however, our entropy
estimation is pretty weak at best, especially for small values; we
have no handle on correlation; and the Shannon entropy measure (Rényi
entropy of order 1) is not the correct one to use in the first place,
but rather the correct entropy measure is the min-entropy, the Rényi
entropy of infinite order.

As such, this conservatism seems more than justified.

This does introduce fractional bit values. I have left it to have 3
bits of fraction, so that with a pool of 2^12 bits the multiply in
credit_entropy_bits() can still fit into an int, as 2*(3+12) < 31. It
is definitely possible to allow for more fractional accounting, but
that multiply then would have to be turned into a 32*32 -> 64 multiply.

Signed-off-by: H. Peter Anvin <[email protected]>
Cc: DJ Johnston <[email protected]>
Cc: <[email protected]>
---
drivers/char/random.c | 60 ++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 52 insertions(+), 8 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 5cc8e86..afed01b1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -272,10 +272,12 @@
/*
* Configuration information
*/
-#define INPUT_POOL_WORDS 128
-#define OUTPUT_POOL_WORDS 32
-#define SEC_XFER_SIZE 512
-#define EXTRACT_SIZE 10
+#define INPUT_POOL_SHIFT 12
+#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5))
+#define OUTPUT_POOL_SHIFT 10
+#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5))
+#define SEC_XFER_SIZE 512
+#define EXTRACT_SIZE 10

#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))

@@ -284,6 +286,9 @@
* this many fractional bits:
*
* entropy_count, trickle_thresh
+ *
+ * 2*(ENTROPY_SHIFT + log2(poolbits)) must <= 31, or the multiply in
+ * credit_entropy_bits() needs to be 64 bits wide.
*/
#define ENTROPY_SHIFT 3

@@ -426,7 +431,7 @@ module_param(debug, bool, 0644);
struct entropy_store;
struct entropy_store {
/* read-only data: */
- struct poolinfo *poolinfo;
+ const struct poolinfo *poolinfo;
__u32 *pool;
const char *name;
struct entropy_store *pull;
@@ -595,6 +600,8 @@ static void fast_mix(struct fast_pool *f, const void *in, int nbytes)
static void credit_entropy_bits(struct entropy_store *r, int nbits)
{
int entropy_count, orig;
+ const int pool_size = r->poolinfo->poolfracbits;
+ int nfrac = nbits << ENTROPY_SHIFT;

if (!nbits)
return;
@@ -602,13 +609,50 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits)
DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name);
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
- entropy_count += nbits << ENTROPY_SHIFT;
+ if (nfrac < 0) {
+ /* Debit */
+ entropy_count += nfrac;
+ } else {
+ /*
+ * Credit: we have to account for the possibility of
+ * overwriting already present entropy. Even in the
+ * ideal case of pure Shannon entropy, new contributions
+ * approach the full value asymptotically:
+ *
+ * entropy <- entropy + (pool_size - entropy) *
+ * (1 - exp(-add_entropy/pool_size))
+ *
+ * For add_entropy <= pool_size/2 then
+ * (1 - exp(-add_entropy/pool_size)) >=
+ * (add_entropy/pool_size)*0.7869...
+ * so we can approximate the exponential with
+ * 3/4*add_entropy/pool_size and still be on the
+ * safe side by adding at most pool_size/2 at a time.
+ *
+ * The use of pool_size-2 in the while statement is to
+ * prevent rounding artifacts from making the loop
+ * arbitrarily long; this limits the loop to log2(pool_size)*2
+ * turns no matter how large nbits is.
+ */
+ int pnfrac = nfrac;
+ const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2;
+ /* The +2 corresponds to the /4 in the denominator */
+
+ do {
+ unsigned int anfrac = min(pnfrac, pool_size/2);
+ unsigned int add =
+ ((pool_size - entropy_count)*anfrac*3) >> s;
+
+ entropy_count += add;
+ pnfrac -= anfrac;
+ } while (unlikely(entropy_count < pool_size-2 && pnfrac));
+ }

if (entropy_count < 0) {
DEBUG_ENT("negative entropy/overflow\n");
entropy_count = 0;
- } else if (entropy_count > r->poolinfo->poolfracbits)
- entropy_count = r->poolinfo->poolfracbits;
+ } else if (entropy_count > pool_size)
+ entropy_count = pool_size;
if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
goto retry;

--
1.7.11.7

2013-04-27 22:45:55

by H. Peter Anvin

[permalink] [raw]
Subject: [PATCH 1/3] random: Statically compute poolbitshift, poolbytes, poolbits

From: "H. Peter Anvin" <[email protected]>

Use a macro to statically compute poolbitshift (will be used in a
subsequent patch), poolbytes, and poolbits. On virtually all
architectures the cost of a memory load with an offset is the same as
the one of a memory load.

It is still possible for this to generate worse code since the C
compiler doesn't know the fixed relationship between these fields, but
that is somewhat unlikely.

Signed-off-by: H. Peter Anvin <[email protected]>
Cc: <[email protected]>
---
drivers/char/random.c | 39 +++++++++++++++++++--------------------
1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 57d4b15..106b9b2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -309,46 +309,45 @@ static DEFINE_PER_CPU(int, trickle_count);
* scaled squared error sum) except for the last tap, which is 1 to
* get the twisting happening as fast as possible.
*/
+
static struct poolinfo {
- int poolwords;
+ int poolbitshift, poolwords, poolbytes, poolbits;
+#define S(x) ilog2(x)+5, (x), (x)*4, (x)*32
int tap1, tap2, tap3, tap4, tap5;
} poolinfo_table[] = {
/* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */
- { 128, 103, 76, 51, 25, 1 },
+ { S(128), 103, 76, 51, 25, 1 },
/* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */
- { 32, 26, 20, 14, 7, 1 },
+ { S(32), 26, 20, 14, 7, 1 },
#if 0
/* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */
- { 2048, 1638, 1231, 819, 411, 1 },
+ { S(2048), 1638, 1231, 819, 411, 1 },

/* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */
- { 1024, 817, 615, 412, 204, 1 },
+ { S(1024), 817, 615, 412, 204, 1 },

/* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */
- { 1024, 819, 616, 410, 207, 2 },
+ { S(1024), 819, 616, 410, 207, 2 },

/* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */
- { 512, 411, 308, 208, 104, 1 },
+ { S(512), 411, 308, 208, 104, 1 },

/* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */
- { 512, 409, 307, 206, 102, 2 },
+ { S(512), 409, 307, 206, 102, 2 },
/* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */
- { 512, 409, 309, 205, 103, 2 },
+ { S(512), 409, 309, 205, 103, 2 },

/* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */
- { 256, 205, 155, 101, 52, 1 },
+ { S(256), 205, 155, 101, 52, 1 },

/* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */
- { 128, 103, 78, 51, 27, 2 },
+ { S(128), 103, 78, 51, 27, 2 },

/* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */
- { 64, 52, 39, 26, 14, 1 },
+ { S(64), 52, 39, 26, 14, 1 },
#endif
};

-#define POOLBITS poolwords*32
-#define POOLBYTES poolwords*4
-
/*
* For the purposes of better mixing, we use the CRC-32 polynomial as
* well to make a twisted Generalized Feedback Shift Reigster
@@ -599,8 +598,8 @@ retry:
if (entropy_count < 0) {
DEBUG_ENT("negative entropy/overflow\n");
entropy_count = 0;
- } else if (entropy_count > r->poolinfo->POOLBITS)
- entropy_count = r->poolinfo->POOLBITS;
+ } else if (entropy_count > r->poolinfo->poolbits)
+ entropy_count = r->poolinfo->poolbits;
if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
goto retry;

@@ -815,7 +814,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
__u32 tmp[OUTPUT_POOL_WORDS];

if (r->pull && r->entropy_count < nbytes * 8 &&
- r->entropy_count < r->poolinfo->POOLBITS) {
+ r->entropy_count < r->poolinfo->poolbits) {
/* If we're limited, always leave two wakeup worth's BITS */
int rsvd = r->limit ? 0 : random_read_wakeup_thresh/4;
int bytes = nbytes;
@@ -857,7 +856,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
/* Hold lock while accounting */
spin_lock_irqsave(&r->lock, flags);

- BUG_ON(r->entropy_count > r->poolinfo->POOLBITS);
+ BUG_ON(r->entropy_count > r->poolinfo->poolbits);
DEBUG_ENT("trying to extract %zu bits from %s\n",
nbytes * 8, r->name);

@@ -1104,7 +1103,7 @@ static void init_std_data(struct entropy_store *r)
r->entropy_total = 0;
r->last_data_init = false;
mix_pool_bytes(r, &now, sizeof(now), NULL);
- for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) {
+ for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) {
if (!arch_get_random_long(&rv))
break;
mix_pool_bytes(r, &rv, sizeof(rv), NULL);
--
1.7.11.7

2013-04-28 15:04:51

by Jiri Kosina

[permalink] [raw]
Subject: Re: [PATCH 2/3] random: Allow fractional bits to be tracked

On Sat, 27 Apr 2013, H. Peter Anvin wrote:

> From: "H. Peter Anvin" <[email protected]>
>
> Allow fractional bits of entropy to be tracked by scaling the entropy
> counter (fixed point). This will be used in a subsequent patch that
> accounts for entropy lost due to overwrites.
>
> Signed-off-by: H. Peter Anvin <[email protected]>
> Cc: <[email protected]>
> ---
> drivers/char/random.c | 92 +++++++++++++++++++++++++++++++++++++--------------
> 1 file changed, 68 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/char/random.c b/drivers/char/random.c
> index 106b9b2..5cc8e86 100644
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
[ ... snip ... ]
> @@ -852,28 +875,31 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
> {
> unsigned long flags;
> int wakeup_write = 0;
> + int have_bytes;
>
> /* Hold lock while accounting */
> spin_lock_irqsave(&r->lock, flags);
>
> - BUG_ON(r->entropy_count > r->poolinfo->poolbits);
> + BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
> DEBUG_ENT("trying to extract %zu bits from %s\n",
> nbytes * 8, r->name);
>
> /* Can we pull enough? */
> - if (r->entropy_count / 8 < min + reserved) {
> + have_bytes = r->entropy_count >> (ENTROPY_SHIFT + 3);
> + if (have_bytes < min + reserved) {
> nbytes = 0;
> } else {
> /* If limited, never pull more than available */
> - if (r->limit && nbytes + reserved >= r->entropy_count / 8)
> - nbytes = r->entropy_count/8 - reserved;
> + if (r->limit && nbytes + reserved >= have_bytes)
> + nbytes = have_bytes - reserved;
>
> - if (r->entropy_count / 8 >= nbytes + reserved)
> - r->entropy_count -= nbytes*8;
> + if (have_bytes >= nbytes + reserved)
> + r->entropy_count -= nbytes << (ENTROPY_SHIFT + 3);
> else
> - r->entropy_count = reserved;
> + r->entropy_count = reserved << (ENTROPY_SHIFT + 3);
>
> - if (r->entropy_count < random_write_wakeup_thresh)
> + if ((r->entropy_count >> ENTROPY_SHIFT)
> + < random_write_wakeup_thresh)
> wakeup_write = 1;
> }

Please note that the code you are patching here (as is in Linus' tree
currently) is buggy. I have sent a fix for this a week ago; it's now
sitting in -mm as
random-fix-accounting-race-condition-with-lockless-irq-entropy_count-update.patch
so you (or whoever is merging your patchset) are likely to get a conflict
here.

See my original submission at:

https://lkml.org/lkml/2013/4/20/33

The patch fixes the issue, and therefore should be merged as soon as
possible methinks ... but more generally, I am still wondering about the
actual improvement the lockless cmpxchg-based aproach is actually
providing. To quote my question from the original submission:

====
BTW, do we have some numbers that would prove how and why exactly is
902c098a3663 fixing real-time throughput by removing the spinlock?

Basically what we have now is producer and consumer over r->entropy_count
being serialized by retried cmpxchg loops, and I would think this could
actually make the whole situation less fair. The reason being that we are
basically spinning anyway in case of conflict on the critical section but
we are lacking the fairness comfort ticket-based spinlocks do provide
us ... hmm?
====

For completness, the patch below.




From: Jiri Kosina <[email protected]>
Subject: random: fix accounting race condition with lockless irq entropy_count update

Commit 902c098a3663 ("random: use lockless techniques in the interrupt
path") turned IRQ path from being spinlock protected into lockless
cmpxchg-retry update.

That commit removed r->lock serialization between crediting entropy bits
from IRQ context and accounting when extracting entropy on userspace read
path, but didn't turn the r->entropy_count reads/updates in account() to
use cmpxchg as well.

It has been observed, that under certain circumstances this leads to
read() on /dev/urandom to return 0 (EOF), as r->entropy_count gets
corrupted and becomes negative, which in turn results in propagating 0 all
the way from account() to the actual read() call.

Convert the accounting code to be the proper lockless counterpart of what
has been partially done by 902c098a3663.

Signed-off-by: Jiri Kosina <[email protected]>
Cc: Theodore Ts'o <[email protected]>
Cc: Greg KH <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

drivers/char/random.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)

diff -puN drivers/char/random.c~random-fix-accounting-race-condition-with-lockless-irq-entropy_count-update drivers/char/random.c
--- a/drivers/char/random.c~random-fix-accounting-race-condition-with-lockless-irq-entropy_count-update
+++ a/drivers/char/random.c
@@ -865,16 +865,24 @@ static size_t account(struct entropy_sto
if (r->entropy_count / 8 < min + reserved) {
nbytes = 0;
} else {
+ int entropy_count, orig;
+retry:
+ entropy_count = orig = ACCESS_ONCE(r->entropy_count);
/* If limited, never pull more than available */
- if (r->limit && nbytes + reserved >= r->entropy_count / 8)
- nbytes = r->entropy_count/8 - reserved;
+ if (r->limit && nbytes + reserved >= entropy_count / 8)
+ nbytes = entropy_count/8 - reserved;

- if (r->entropy_count / 8 >= nbytes + reserved)
- r->entropy_count -= nbytes*8;
- else
- r->entropy_count = reserved;
+ if (entropy_count / 8 >= nbytes + reserved) {
+ entropy_count -= nbytes*8;
+ if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ goto retry;
+ } else {
+ entropy_count = reserved;
+ if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ goto retry;
+ }

- if (r->entropy_count < random_write_wakeup_thresh)
+ if (entropy_count < random_write_wakeup_thresh)
wakeup_write = 1;
}

--
Jiri Kosina
SUSE Labs