2003-08-02 04:24:56

by Oliver Xymoron

[permalink] [raw]
Subject: [PATCH] [1/2] random: SMP locking

This patch adds locking for SMP. Apparently Willy never managed to
revive his laptop with his version so I revived mine.

The batch pool is copied as a block to avoid long lock hold times
while mixing it into the primary pool.

diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c
--- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500
+++ work/drivers/char/random.c 2003-08-01 22:15:42.000000000 -0500
@@ -484,6 +484,7 @@
int extract_count;
struct poolinfo poolinfo;
__u32 *pool;
+ spinlock_t lock;
};

/*
@@ -522,6 +523,7 @@
return -ENOMEM;
}
memset(r->pool, 0, POOLBYTES);
+ r->lock = SPIN_LOCK_UNLOCKED;
*ret_bucket = r;
return 0;
}
@@ -564,6 +566,8 @@
int wordmask = r->poolinfo.poolwords - 1;
__u32 w;

+ spin_lock(&r->lock);
+
while (nwords--) {
w = rotate_left(r->input_rotate, *in++);
i = r->add_ptr = (r->add_ptr - 1) & wordmask;
@@ -587,6 +591,8 @@
w ^= r->pool[i];
r->pool[i] = (w >> 3) ^ twist_table[w & 7];
}
+
+ spin_unlock(&r->lock);
}

/*
@@ -594,6 +600,8 @@
*/
static void credit_entropy_store(struct entropy_store *r, int nbits)
{
+ spin_lock(&r->lock);
+
if (r->entropy_count + nbits < 0) {
DEBUG_ENT("negative entropy/overflow (%d+%d)\n",
r->entropy_count, nbits);
@@ -608,6 +616,8 @@
r == random_state ? "primary" : "unknown",
nbits, r->entropy_count);
}
+
+ spin_unlock(&r->lock);
}

/**********************************************************************
@@ -618,27 +628,33 @@
*
**********************************************************************/

-static __u32 *batch_entropy_pool;
-static int *batch_entropy_credit;
-static int batch_max;
+struct sample {
+ __u32 data[2];
+ int credit;
+};
+
+static struct sample *batch_entropy_pool, *batch_entropy_copy;
static int batch_head, batch_tail;
+static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED;
+
+static int batch_max;
static void batch_entropy_process(void *private_);
static DECLARE_WORK(batch_work, batch_entropy_process, NULL);

/* note: the size must be a power of 2 */
static int __init batch_entropy_init(int size, struct entropy_store *r)
{
- batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL);
+ batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
if (!batch_entropy_pool)
return -1;
- batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL);
- if (!batch_entropy_credit) {
+ batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
+ if (!batch_entropy_copy) {
kfree(batch_entropy_pool);
return -1;
}
batch_head = batch_tail = 0;
- batch_max = size;
batch_work.data = r;
+ batch_max = size;
return 0;
}

@@ -650,27 +666,33 @@
*/
void batch_entropy_store(u32 a, u32 b, int num)
{
- int new;
+ int new;
+ unsigned long flags;

if (!batch_max)
return;
+
+ spin_lock_irqsave(&batch_lock, flags);

- batch_entropy_pool[2*batch_head] = a;
- batch_entropy_pool[(2*batch_head) + 1] = b;
- batch_entropy_credit[batch_head] = num;
+ batch_entropy_pool[batch_head].data[0] = a;
+ batch_entropy_pool[batch_head].data[1] = b;
+ batch_entropy_pool[batch_head].credit = num;

- new = (batch_head+1) & (batch_max-1);
- if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) {
+ if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) {
/*
* Schedule it for the next timer tick:
*/
schedule_delayed_work(&batch_work, 1);
- batch_head = new;
- } else if (new == batch_tail) {
+ }
+
+ new = (batch_head+1) & (batch_max-1);
+ if (new == batch_tail) {
DEBUG_ENT("batch entropy buffer full\n");
} else {
batch_head = new;
}
+
+ spin_unlock_irqrestore(&batch_lock, flags);
}

/*
@@ -682,20 +704,34 @@
{
struct entropy_store *r = (struct entropy_store *) private_, *p;
int max_entropy = r->poolinfo.POOLBITS;
+ unsigned head, tail;

- if (!batch_max)
- return;
+ /* Mixing into the pool is expensive, so copy over the batch
+ * data and release the batch lock. The pool is at least half
+ * full, so don't worry too much about copying only the used
+ * part.
+ */
+ spin_lock_irq(&batch_lock);
+
+ memcpy(batch_entropy_copy, batch_entropy_pool,
+ batch_max*sizeof(struct sample));
+
+ head = batch_head;
+ tail = batch_tail;
+ batch_tail = batch_head;
+
+ spin_unlock_irq(&batch_lock);

p = r;
- while (batch_head != batch_tail) {
+ while (head != tail) {
if (r->entropy_count >= max_entropy) {
r = (r == sec_random_state) ? random_state :
sec_random_state;
max_entropy = r->poolinfo.POOLBITS;
}
- add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2);
- credit_entropy_store(r, batch_entropy_credit[batch_tail]);
- batch_tail = (batch_tail+1) & (batch_max-1);
+ add_entropy_words(r, batch_entropy_copy[tail].data, 2);
+ credit_entropy_store(r, batch_entropy_copy[tail].credit);
+ tail = (tail+1) & (batch_max-1);
}
if (p->entropy_count >= random_read_wakeup_thresh)
wake_up_interruptible(&random_read_wait);
@@ -1286,6 +1322,9 @@

DEBUG_ENT("%s has %d bits, want %d bits\n",
r == sec_random_state ? "secondary" :
+ /* Hold lock while accounting */
+ spin_lock(&r->lock);
+
r == random_state ? "primary" : "unknown",
r->entropy_count, nbytes * 8);

@@ -1298,6 +1337,8 @@
wake_up_interruptible(&random_write_wait);

r->extract_count += nbytes;
+
+ spin_unlock(&r->lock);

ret = 0;
while (nbytes) {
@@ -1619,18 +1660,23 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
p = (int *) arg;
+ spin_lock(&random_state->lock);
ent_count = random_state->entropy_count;
if (put_user(ent_count, p++) ||
get_user(size, p) ||
put_user(random_state->poolinfo.poolwords, p++))
- return -EFAULT;
+ goto fail;
if (size < 0)
- return -EINVAL;
+ goto fail;
if (size > random_state->poolinfo.poolwords)
size = random_state->poolinfo.poolwords;
if (copy_to_user(p, random_state->pool, size * sizeof(__u32)))
- return -EFAULT;
+ goto fail;
+ spin_unlock(&random_state->lock);
return 0;
+ fail:
+ spin_unlock(&random_state->lock);
+ return -EFAULT;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;

--
"Love the dolphins," she advised him. "Write by W.A.S.T.E.."


2003-08-02 10:59:17

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

Oliver Xymoron <[email protected]> wrote:
>
> This patch adds locking for SMP. Apparently Willy never managed to
> revive his laptop with his version so I revived mine.

hrm. I'm a random ignoramus. I'll look it over...

Are you really sure that all the decisions about where to use spin_lock()
vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are
non-obvious.

> @@ -1619,18 +1660,23 @@
> if (!capable(CAP_SYS_ADMIN))
> return -EPERM;
> p = (int *) arg;
> + spin_lock(&random_state->lock);
> ent_count = random_state->entropy_count;
> if (put_user(ent_count, p++) ||
> get_user(size, p) ||
> put_user(random_state->poolinfo.poolwords, p++))

Cannot perform userspace access while holding a lock - a pagefault could
occur, perform IO, schedule away and the same CPU tries to take the same
lock via a different process.

2003-08-02 12:47:08

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, 2 Aug 2003, Andrew Morton wrote:

> Oliver Xymoron <[email protected]> wrote:
> >
> > This patch adds locking for SMP. Apparently Willy never managed to
> > revive his laptop with his version so I revived mine.
>
> hrm. I'm a random ignoramus. I'll look it over...
>
> Are you really sure that all the decisions about where to use spin_lock()
> vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are
> non-obvious.
>
> > @@ -1619,18 +1660,23 @@
> > if (!capable(CAP_SYS_ADMIN))
> > return -EPERM;
> > p = (int *) arg;
> > + spin_lock(&random_state->lock);
> > ent_count = random_state->entropy_count;
> > if (put_user(ent_count, p++) ||
> > get_user(size, p) ||
> > put_user(random_state->poolinfo.poolwords, p++))
>
> Cannot perform userspace access while holding a lock - a pagefault could
> occur, perform IO, schedule away and the same CPU tries to take the same
> lock via a different process.

Perhaps might_sleep() in *_user, copy_* etc is in order?

--
function.linuxpower.ca

2003-08-02 14:32:37

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, Aug 02, 2003 at 04:00:15AM -0700, Andrew Morton wrote:
> Oliver Xymoron <[email protected]> wrote:
> >
> > This patch adds locking for SMP. Apparently Willy never managed to
> > revive his laptop with his version so I revived mine.
>
> hrm. I'm a random ignoramus. I'll look it over...
>
> Are you really sure that all the decisions about where to use spin_lock()
> vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are
> non-obvious.

Aside from the put_user stuff below, yes.

&batch_lock
batch_entropy_store can be called from any context, and typically from
interrupts -> spin_lock_irqsave

batch_entropy_process is called called via schedule_delayed_work and
runs in process context -> spin_lock_irq

&r->lock
the mixing process is too expensive to be called from an interrupt
context and the basic worker function extract_entropy can sleep, so
all this stuff can be under a normal spin_lock


> > @@ -1619,18 +1660,23 @@
> > if (!capable(CAP_SYS_ADMIN))
> > return -EPERM;
> > p = (int *) arg;
> > + spin_lock(&random_state->lock);
> > ent_count = random_state->entropy_count;
> > if (put_user(ent_count, p++) ||
> > get_user(size, p) ||
> > put_user(random_state->poolinfo.poolwords, p++))
>
> Cannot perform userspace access while holding a lock - a pagefault could
> occur, perform IO, schedule away and the same CPU tries to take the same
> lock via a different process.

Doh. Fixed. All the other userspace transfers are done without locks.

diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c
--- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500
+++ work/drivers/char/random.c 2003-08-02 09:28:04.000000000 -0500
@@ -484,6 +484,7 @@
int extract_count;
struct poolinfo poolinfo;
__u32 *pool;
+ spinlock_t lock;
};

/*
@@ -522,6 +523,7 @@
return -ENOMEM;
}
memset(r->pool, 0, POOLBYTES);
+ r->lock = SPIN_LOCK_UNLOCKED;
*ret_bucket = r;
return 0;
}
@@ -564,6 +566,8 @@
int wordmask = r->poolinfo.poolwords - 1;
__u32 w;

+ spin_lock(&r->lock);
+
while (nwords--) {
w = rotate_left(r->input_rotate, *in++);
i = r->add_ptr = (r->add_ptr - 1) & wordmask;
@@ -587,6 +591,8 @@
w ^= r->pool[i];
r->pool[i] = (w >> 3) ^ twist_table[w & 7];
}
+
+ spin_unlock(&r->lock);
}

/*
@@ -594,6 +600,8 @@
*/
static void credit_entropy_store(struct entropy_store *r, int nbits)
{
+ spin_lock(&r->lock);
+
if (r->entropy_count + nbits < 0) {
DEBUG_ENT("negative entropy/overflow (%d+%d)\n",
r->entropy_count, nbits);
@@ -608,6 +616,8 @@
r == random_state ? "primary" : "unknown",
nbits, r->entropy_count);
}
+
+ spin_unlock(&r->lock);
}

/**********************************************************************
@@ -618,27 +628,33 @@
*
**********************************************************************/

-static __u32 *batch_entropy_pool;
-static int *batch_entropy_credit;
-static int batch_max;
+struct sample {
+ __u32 data[2];
+ int credit;
+};
+
+static struct sample *batch_entropy_pool, *batch_entropy_copy;
static int batch_head, batch_tail;
+static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED;
+
+static int batch_max;
static void batch_entropy_process(void *private_);
static DECLARE_WORK(batch_work, batch_entropy_process, NULL);

/* note: the size must be a power of 2 */
static int __init batch_entropy_init(int size, struct entropy_store *r)
{
- batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL);
+ batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
if (!batch_entropy_pool)
return -1;
- batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL);
- if (!batch_entropy_credit) {
+ batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
+ if (!batch_entropy_copy) {
kfree(batch_entropy_pool);
return -1;
}
batch_head = batch_tail = 0;
- batch_max = size;
batch_work.data = r;
+ batch_max = size;
return 0;
}

@@ -650,27 +666,33 @@
*/
void batch_entropy_store(u32 a, u32 b, int num)
{
- int new;
+ int new;
+ unsigned long flags;

if (!batch_max)
return;
+
+ spin_lock_irqsave(&batch_lock, flags);

- batch_entropy_pool[2*batch_head] = a;
- batch_entropy_pool[(2*batch_head) + 1] = b;
- batch_entropy_credit[batch_head] = num;
+ batch_entropy_pool[batch_head].data[0] = a;
+ batch_entropy_pool[batch_head].data[1] = b;
+ batch_entropy_pool[batch_head].credit = num;

- new = (batch_head+1) & (batch_max-1);
- if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) {
+ if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) {
/*
* Schedule it for the next timer tick:
*/
schedule_delayed_work(&batch_work, 1);
- batch_head = new;
- } else if (new == batch_tail) {
+ }
+
+ new = (batch_head+1) & (batch_max-1);
+ if (new == batch_tail) {
DEBUG_ENT("batch entropy buffer full\n");
} else {
batch_head = new;
}
+
+ spin_unlock_irqrestore(&batch_lock, flags);
}

/*
@@ -682,20 +704,34 @@
{
struct entropy_store *r = (struct entropy_store *) private_, *p;
int max_entropy = r->poolinfo.POOLBITS;
+ unsigned head, tail;

- if (!batch_max)
- return;
+ /* Mixing into the pool is expensive, so copy over the batch
+ * data and release the batch lock. The pool is at least half
+ * full, so don't worry too much about copying only the used
+ * part.
+ */
+ spin_lock_irq(&batch_lock);
+
+ memcpy(batch_entropy_copy, batch_entropy_pool,
+ batch_max*sizeof(struct sample));
+
+ head = batch_head;
+ tail = batch_tail;
+ batch_tail = batch_head;
+
+ spin_unlock_irq(&batch_lock);

p = r;
- while (batch_head != batch_tail) {
+ while (head != tail) {
if (r->entropy_count >= max_entropy) {
r = (r == sec_random_state) ? random_state :
sec_random_state;
max_entropy = r->poolinfo.POOLBITS;
}
- add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2);
- credit_entropy_store(r, batch_entropy_credit[batch_tail]);
- batch_tail = (batch_tail+1) & (batch_max-1);
+ add_entropy_words(r, batch_entropy_copy[tail].data, 2);
+ credit_entropy_store(r, batch_entropy_copy[tail].credit);
+ tail = (tail+1) & (batch_max-1);
}
if (p->entropy_count >= random_read_wakeup_thresh)
wake_up_interruptible(&random_read_wait);
@@ -1286,6 +1322,9 @@

DEBUG_ENT("%s has %d bits, want %d bits\n",
r == sec_random_state ? "secondary" :
+ /* Hold lock while accounting */
+ spin_lock(&r->lock);
+
r == random_state ? "primary" : "unknown",
r->entropy_count, nbytes * 8);

@@ -1298,6 +1337,8 @@
wake_up_interruptible(&random_write_wait);

r->extract_count += nbytes;
+
+ spin_unlock(&r->lock);

ret = 0;
while (nbytes) {
@@ -1593,7 +1634,7 @@
random_ioctl(struct inode * inode, struct file * file,
unsigned int cmd, unsigned long arg)
{
- int *p, size, ent_count;
+ int *p, *tmp, size, ent_count;
int retval;

switch (cmd) {
@@ -1619,18 +1660,40 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
p = (int *) arg;
- ent_count = random_state->entropy_count;
- if (put_user(ent_count, p++) ||
- get_user(size, p) ||
+ if (get_user(size, p) ||
put_user(random_state->poolinfo.poolwords, p++))
- return -EFAULT;
+ goto fail;
if (size < 0)
- return -EINVAL;
+ goto fail;
if (size > random_state->poolinfo.poolwords)
size = random_state->poolinfo.poolwords;
- if (copy_to_user(p, random_state->pool, size * sizeof(__u32)))
- return -EFAULT;
+
+ /* prepare to atomically snapshot pool */
+
+ tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL);
+
+ if (!tmp)
+ goto fail;
+
+ spin_lock(&random_state->lock);
+ ent_count = random_state->entropy_count;
+ memcpy(tmp, random_state->pool, size * sizeof(__u32));
+ spin_unlock(&random_state->lock);
+
+ if (!copy_to_user(p, tmp, size * sizeof(__u32))) {
+ kfree(tmp);
+ goto fail;
+ }
+
+ kfree(tmp);
+
+ if(put_user(ent_count, p++))
+ goto fail;
+
return 0;
+ fail:
+ spin_unlock(&random_state->lock);
+ return -EFAULT;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;


--
Matt Mackall : http://www.selenic.com : of or relating to the moon

2003-08-02 14:43:20

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, Aug 02, 2003 at 08:35:22AM -0400, Zwane Mwaikambo wrote:
> On Sat, 2 Aug 2003, Andrew Morton wrote:
> > Cannot perform userspace access while holding a lock - a pagefault could
> > occur, perform IO, schedule away and the same CPU tries to take the same
> > lock via a different process.
>
> Perhaps might_sleep() in *_user, copy_* etc is in order?

Wouldn't have caught this case - this interface hasn't actually been
used/useful for many years as it only gives access to one of the
pools and has never been atomic either.

--
Matt Mackall : http://www.selenic.com : of or relating to the moon

2003-08-02 15:14:36

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, 2 Aug 2003, Matt Mackall wrote:

> On Sat, Aug 02, 2003 at 08:35:22AM -0400, Zwane Mwaikambo wrote:
> > On Sat, 2 Aug 2003, Andrew Morton wrote:
> > > Cannot perform userspace access while holding a lock - a pagefault could
> > > occur, perform IO, schedule away and the same CPU tries to take the same
> > > lock via a different process.
> >
> > Perhaps might_sleep() in *_user, copy_* etc is in order?
>
> Wouldn't have caught this case - this interface hasn't actually been
> used/useful for many years as it only gives access to one of the
> pools and has never been atomic either.

Aha, thanks for the explanation.

Zwane
--
function.linuxpower.ca

2003-08-02 18:38:12

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

Zwane Mwaikambo <[email protected]> wrote:
>
> Perhaps might_sleep() in *_user, copy_* etc is in order?

Probably, with a little care.

A userspace copy while in an atomic region is actually legal, on behalf of
the read() and write() pagecache copy functions: if we take a fault while
holding an atomic kmap, the fault handler bales and the usercopy returns a
short copy.

In other words: if you stick a might_sleep() in __copy_from_user() and
__copy_to_user() you get a false positive on every read() and write().

We could probably add it to copy_to_user() and copy_from_user() though.

2003-08-02 18:42:42

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, Aug 02, 2003 at 08:35:22AM -0400, Zwane Mwaikambo wrote:
> On Sat, 2 Aug 2003, Andrew Morton wrote:
>
> > Cannot perform userspace access while holding a lock - a pagefault could
> > occur, perform IO, schedule away and the same CPU tries to take the same
> > lock via a different process.
>
> Perhaps might_sleep() in *_user, copy_* etc is in order?

I think it's been suggested before, but I threw a patch together for
i386 anyway.

This only checks in the non-__ versions, as those are occassionally
called inside things like kmap_atomic pairs which take a spinlock in
with highmem. It's all conditional on CONFIG_DEBUG_SPINLOCK_SLEEP
(which isn't quite the right name) so there's no overhead for normal
builds.

diff -urN -X dontdiff orig/arch/i386/lib/usercopy.c work/arch/i386/lib/usercopy.c
--- orig/arch/i386/lib/usercopy.c 2003-07-13 22:28:54.000000000 -0500
+++ work/arch/i386/lib/usercopy.c 2003-08-02 13:33:43.000000000 -0500
@@ -149,6 +149,7 @@
unsigned long
clear_user(void __user *to, unsigned long n)
{
+ might_sleep();
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
@@ -188,6 +189,8 @@
unsigned long mask = -__addr_ok(s);
unsigned long res, tmp;

+ might_sleep();
+
__asm__ __volatile__(
" testl %0, %0\n"
" jz 3f\n"
diff -urN -X dontdiff orig/include/asm-i386/uaccess.h work/include/asm-i386/uaccess.h
--- orig/include/asm-i386/uaccess.h 2003-07-13 22:30:48.000000000 -0500
+++ work/include/asm-i386/uaccess.h 2003-08-02 13:15:42.000000000 -0500
@@ -260,6 +260,7 @@
({ \
long __pu_err = -EFAULT; \
__typeof__(*(ptr)) *__pu_addr = (ptr); \
+ might_sleep(); \
if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
__put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
__pu_err; \
@@ -469,6 +470,7 @@
static inline unsigned long
copy_to_user(void __user *to, const void *from, unsigned long n)
{
+ might_sleep();
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
@@ -493,6 +495,7 @@
static inline unsigned long
copy_from_user(void *to, const void __user *from, unsigned long n)
{
+ might_sleep();
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else


--
Matt Mackall : http://www.selenic.com : of or relating to the moon

2003-08-02 18:59:22

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

Matt Mackall <[email protected]> wrote:
>
> > Are you really sure that all the decisions about where to use spin_lock()
> > vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are
> > non-obvious.
>
> Aside from the put_user stuff below, yes.

Well I see in Linus's current tree:

ndev->regen_timer.function = ipv6_regen_rndid;

And ipv6_regen_rndid() ends up calling get_random_bytes() which calls
extract_entropy() which now does a bare spin_lock().

So I think if the timer is run while some process-context code on the same
CPU is running get_random_bytes() we deadlock don't we?

Probably, we should make get_random_bytes() callable from any context.

2003-08-02 19:33:47

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, Aug 02, 2003 at 12:00:23PM -0700, Andrew Morton wrote:
> Matt Mackall <[email protected]> wrote:
> >
> > > Are you really sure that all the decisions about where to use spin_lock()
> > > vs spin_lock_irq() vs spin_lock_irqsave() are correct? They are
> > > non-obvious.
> >
> > Aside from the put_user stuff below, yes.
>
> Well I see in Linus's current tree:
>
> ndev->regen_timer.function = ipv6_regen_rndid;
>
> And ipv6_regen_rndid() ends up calling get_random_bytes() which calls
> extract_entropy() which now does a bare spin_lock().
>
> So I think if the timer is run while some process-context code on the same
> CPU is running get_random_bytes() we deadlock don't we?

Sigh. I had hastily assumed the rescheduling in extract_entropy would
break any such users in a non-process context, but that's conditional
on the EXTRACT_ENTROPY_USER flag so doesn't apply in the
get_random_bytes path.

> Probably, we should make get_random_bytes() callable from any context.

Ok, new patch follows.

diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c
--- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500
+++ work/drivers/char/random.c 2003-08-02 14:22:51.000000000 -0500
@@ -484,6 +484,7 @@
int extract_count;
struct poolinfo poolinfo;
__u32 *pool;
+ spinlock_t lock;
};

/*
@@ -522,6 +523,7 @@
return -ENOMEM;
}
memset(r->pool, 0, POOLBYTES);
+ r->lock = SPIN_LOCK_UNLOCKED;
*ret_bucket = r;
return 0;
}
@@ -563,6 +565,9 @@
int new_rotate;
int wordmask = r->poolinfo.poolwords - 1;
__u32 w;
+ unsigned long flags;
+
+ spin_lock_irqsave(&r->lock, flags);

while (nwords--) {
w = rotate_left(r->input_rotate, *in++);
@@ -587,6 +592,8 @@
w ^= r->pool[i];
r->pool[i] = (w >> 3) ^ twist_table[w & 7];
}
+
+ spin_unlock_irqrestore(&r->lock, flags);
}

/*
@@ -594,6 +601,10 @@
*/
static void credit_entropy_store(struct entropy_store *r, int nbits)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&r->lock, flags);
+
if (r->entropy_count + nbits < 0) {
DEBUG_ENT("negative entropy/overflow (%d+%d)\n",
r->entropy_count, nbits);
@@ -608,6 +619,8 @@
r == random_state ? "primary" : "unknown",
nbits, r->entropy_count);
}
+
+ spin_unlock_irqrestore(&r->lock, flags);
}

/**********************************************************************
@@ -618,27 +631,33 @@
*
**********************************************************************/

-static __u32 *batch_entropy_pool;
-static int *batch_entropy_credit;
-static int batch_max;
+struct sample {
+ __u32 data[2];
+ int credit;
+};
+
+static struct sample *batch_entropy_pool, *batch_entropy_copy;
static int batch_head, batch_tail;
+static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED;
+
+static int batch_max;
static void batch_entropy_process(void *private_);
static DECLARE_WORK(batch_work, batch_entropy_process, NULL);

/* note: the size must be a power of 2 */
static int __init batch_entropy_init(int size, struct entropy_store *r)
{
- batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL);
+ batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
if (!batch_entropy_pool)
return -1;
- batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL);
- if (!batch_entropy_credit) {
+ batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
+ if (!batch_entropy_copy) {
kfree(batch_entropy_pool);
return -1;
}
batch_head = batch_tail = 0;
- batch_max = size;
batch_work.data = r;
+ batch_max = size;
return 0;
}

@@ -650,27 +669,33 @@
*/
void batch_entropy_store(u32 a, u32 b, int num)
{
- int new;
+ int new;
+ unsigned long flags;

if (!batch_max)
return;
+
+ spin_lock_irqsave(&batch_lock, flags);

- batch_entropy_pool[2*batch_head] = a;
- batch_entropy_pool[(2*batch_head) + 1] = b;
- batch_entropy_credit[batch_head] = num;
+ batch_entropy_pool[batch_head].data[0] = a;
+ batch_entropy_pool[batch_head].data[1] = b;
+ batch_entropy_pool[batch_head].credit = num;

- new = (batch_head+1) & (batch_max-1);
- if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) {
+ if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) {
/*
* Schedule it for the next timer tick:
*/
schedule_delayed_work(&batch_work, 1);
- batch_head = new;
- } else if (new == batch_tail) {
+ }
+
+ new = (batch_head+1) & (batch_max-1);
+ if (new == batch_tail) {
DEBUG_ENT("batch entropy buffer full\n");
} else {
batch_head = new;
}
+
+ spin_unlock_irqrestore(&batch_lock, flags);
}

/*
@@ -682,20 +707,34 @@
{
struct entropy_store *r = (struct entropy_store *) private_, *p;
int max_entropy = r->poolinfo.POOLBITS;
+ unsigned head, tail;

- if (!batch_max)
- return;
+ /* Mixing into the pool is expensive, so copy over the batch
+ * data and release the batch lock. The pool is at least half
+ * full, so don't worry too much about copying only the used
+ * part.
+ */
+ spin_lock_irq(&batch_lock);
+
+ memcpy(batch_entropy_copy, batch_entropy_pool,
+ batch_max*sizeof(struct sample));
+
+ head = batch_head;
+ tail = batch_tail;
+ batch_tail = batch_head;
+
+ spin_unlock_irq(&batch_lock);

p = r;
- while (batch_head != batch_tail) {
+ while (head != tail) {
if (r->entropy_count >= max_entropy) {
r = (r == sec_random_state) ? random_state :
sec_random_state;
max_entropy = r->poolinfo.POOLBITS;
}
- add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2);
- credit_entropy_store(r, batch_entropy_credit[batch_tail]);
- batch_tail = (batch_tail+1) & (batch_max-1);
+ add_entropy_words(r, batch_entropy_copy[tail].data, 2);
+ credit_entropy_store(r, batch_entropy_copy[tail].credit);
+ tail = (tail+1) & (batch_max-1);
}
if (p->entropy_count >= random_read_wakeup_thresh)
wake_up_interruptible(&random_read_wait);
@@ -1274,6 +1313,7 @@
ssize_t ret, i;
__u32 tmp[TMP_BUF_SIZE];
__u32 x;
+ unsigned long cpuflags;

add_timer_randomness(&extract_timer_state, nbytes);

@@ -1284,6 +1324,9 @@
if (flags & EXTRACT_ENTROPY_SECONDARY)
xfer_secondary_pool(r, nbytes, tmp);

+ /* Hold lock while accounting */
+ spin_lock_irqsave(&r->lock, cpuflags);
+
DEBUG_ENT("%s has %d bits, want %d bits\n",
r == sec_random_state ? "secondary" :
r == random_state ? "primary" : "unknown",
@@ -1298,6 +1341,8 @@
wake_up_interruptible(&random_write_wait);

r->extract_count += nbytes;
+
+ spin_unlock_irqrestore(&r->lock, cpuflags);

ret = 0;
while (nbytes) {
@@ -1593,7 +1638,7 @@
random_ioctl(struct inode * inode, struct file * file,
unsigned int cmd, unsigned long arg)
{
- int *p, size, ent_count;
+ int *p, *tmp, size, ent_count;
int retval;

switch (cmd) {
@@ -1619,18 +1664,40 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
p = (int *) arg;
- ent_count = random_state->entropy_count;
- if (put_user(ent_count, p++) ||
- get_user(size, p) ||
+ if (get_user(size, p) ||
put_user(random_state->poolinfo.poolwords, p++))
- return -EFAULT;
+ goto fail;
if (size < 0)
- return -EINVAL;
+ goto fail;
if (size > random_state->poolinfo.poolwords)
size = random_state->poolinfo.poolwords;
- if (copy_to_user(p, random_state->pool, size * sizeof(__u32)))
- return -EFAULT;
+
+ /* prepare to atomically snapshot pool */
+
+ tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL);
+
+ if (!tmp)
+ goto fail;
+
+ spin_lock(&random_state->lock);
+ ent_count = random_state->entropy_count;
+ memcpy(tmp, random_state->pool, size * sizeof(__u32));
+ spin_unlock(&random_state->lock);
+
+ if (!copy_to_user(p, tmp, size * sizeof(__u32))) {
+ kfree(tmp);
+ goto fail;
+ }
+
+ kfree(tmp);
+
+ if(put_user(ent_count, p++))
+ goto fail;
+
return 0;
+ fail:
+ spin_unlock(&random_state->lock);
+ return -EFAULT;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;


--
Matt Mackall : http://www.selenic.com : of or relating to the moon

2003-08-02 19:45:53

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

Matt Mackall <[email protected]> wrote:
>
> + spin_lock(&random_state->lock);
> + ent_count = random_state->entropy_count;
> + memcpy(tmp, random_state->pool, size * sizeof(__u32));
> + spin_unlock(&random_state->lock);
> +

This needs to be spin_lock_irqsave().

> + if (!copy_to_user(p, tmp, size * sizeof(__u32))) {
> + kfree(tmp);
> + goto fail;
> + }
> +
> + kfree(tmp);
> +
> + if(put_user(ent_count, p++))
> + goto fail;
> +
> return 0;
> + fail:
> + spin_unlock(&random_state->lock);

Double unlock ;)

2003-08-02 20:15:15

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH] [1/2] random: SMP locking

On Sat, Aug 02, 2003 at 12:46:47PM -0700, Andrew Morton wrote:
> Matt Mackall <[email protected]> wrote:
> >
> > + spin_lock(&random_state->lock);
>
> This needs to be spin_lock_irqsave().
>
> > + fail:
> > + spin_unlock(&random_state->lock);
>
> Double unlock ;)

Damnit. Will you take a patch that just returns -ENOTTY here? Direct
access to the pool is only useful for debugging the driver (or after
you've broken root and want to do backtracking attacks) and hasn't
been useful for that since the second pool was added (sometime in 1.3
IIRC) because you need to be able to snapshot both pools atomically.

Anyway, fixed version.

diff -urN -X dontdiff orig/drivers/char/random.c work/drivers/char/random.c
--- orig/drivers/char/random.c 2003-07-13 22:30:36.000000000 -0500
+++ work/drivers/char/random.c 2003-08-02 15:05:18.000000000 -0500
@@ -484,6 +484,7 @@
int extract_count;
struct poolinfo poolinfo;
__u32 *pool;
+ spinlock_t lock;
};

/*
@@ -522,6 +523,7 @@
return -ENOMEM;
}
memset(r->pool, 0, POOLBYTES);
+ r->lock = SPIN_LOCK_UNLOCKED;
*ret_bucket = r;
return 0;
}
@@ -563,6 +565,9 @@
int new_rotate;
int wordmask = r->poolinfo.poolwords - 1;
__u32 w;
+ unsigned long flags;
+
+ spin_lock_irqsave(&r->lock, flags);

while (nwords--) {
w = rotate_left(r->input_rotate, *in++);
@@ -587,6 +592,8 @@
w ^= r->pool[i];
r->pool[i] = (w >> 3) ^ twist_table[w & 7];
}
+
+ spin_unlock_irqrestore(&r->lock, flags);
}

/*
@@ -594,6 +601,10 @@
*/
static void credit_entropy_store(struct entropy_store *r, int nbits)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&r->lock, flags);
+
if (r->entropy_count + nbits < 0) {
DEBUG_ENT("negative entropy/overflow (%d+%d)\n",
r->entropy_count, nbits);
@@ -608,6 +619,8 @@
r == random_state ? "primary" : "unknown",
nbits, r->entropy_count);
}
+
+ spin_unlock_irqrestore(&r->lock, flags);
}

/**********************************************************************
@@ -618,27 +631,33 @@
*
**********************************************************************/

-static __u32 *batch_entropy_pool;
-static int *batch_entropy_credit;
-static int batch_max;
+struct sample {
+ __u32 data[2];
+ int credit;
+};
+
+static struct sample *batch_entropy_pool, *batch_entropy_copy;
static int batch_head, batch_tail;
+static spinlock_t batch_lock = SPIN_LOCK_UNLOCKED;
+
+static int batch_max;
static void batch_entropy_process(void *private_);
static DECLARE_WORK(batch_work, batch_entropy_process, NULL);

/* note: the size must be a power of 2 */
static int __init batch_entropy_init(int size, struct entropy_store *r)
{
- batch_entropy_pool = kmalloc(2*size*sizeof(__u32), GFP_KERNEL);
+ batch_entropy_pool = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
if (!batch_entropy_pool)
return -1;
- batch_entropy_credit =kmalloc(size*sizeof(int), GFP_KERNEL);
- if (!batch_entropy_credit) {
+ batch_entropy_copy = kmalloc(size*sizeof(struct sample), GFP_KERNEL);
+ if (!batch_entropy_copy) {
kfree(batch_entropy_pool);
return -1;
}
batch_head = batch_tail = 0;
- batch_max = size;
batch_work.data = r;
+ batch_max = size;
return 0;
}

@@ -650,27 +669,33 @@
*/
void batch_entropy_store(u32 a, u32 b, int num)
{
- int new;
+ int new;
+ unsigned long flags;

if (!batch_max)
return;
+
+ spin_lock_irqsave(&batch_lock, flags);

- batch_entropy_pool[2*batch_head] = a;
- batch_entropy_pool[(2*batch_head) + 1] = b;
- batch_entropy_credit[batch_head] = num;
+ batch_entropy_pool[batch_head].data[0] = a;
+ batch_entropy_pool[batch_head].data[1] = b;
+ batch_entropy_pool[batch_head].credit = num;

- new = (batch_head+1) & (batch_max-1);
- if ((unsigned)(new - batch_tail) >= (unsigned)(batch_max / 2)) {
+ if (((batch_head - batch_tail) & (batch_max-1)) >= (batch_max / 2)) {
/*
* Schedule it for the next timer tick:
*/
schedule_delayed_work(&batch_work, 1);
- batch_head = new;
- } else if (new == batch_tail) {
+ }
+
+ new = (batch_head+1) & (batch_max-1);
+ if (new == batch_tail) {
DEBUG_ENT("batch entropy buffer full\n");
} else {
batch_head = new;
}
+
+ spin_unlock_irqrestore(&batch_lock, flags);
}

/*
@@ -682,20 +707,34 @@
{
struct entropy_store *r = (struct entropy_store *) private_, *p;
int max_entropy = r->poolinfo.POOLBITS;
+ unsigned head, tail;

- if (!batch_max)
- return;
+ /* Mixing into the pool is expensive, so copy over the batch
+ * data and release the batch lock. The pool is at least half
+ * full, so don't worry too much about copying only the used
+ * part.
+ */
+ spin_lock_irq(&batch_lock);
+
+ memcpy(batch_entropy_copy, batch_entropy_pool,
+ batch_max*sizeof(struct sample));
+
+ head = batch_head;
+ tail = batch_tail;
+ batch_tail = batch_head;
+
+ spin_unlock_irq(&batch_lock);

p = r;
- while (batch_head != batch_tail) {
+ while (head != tail) {
if (r->entropy_count >= max_entropy) {
r = (r == sec_random_state) ? random_state :
sec_random_state;
max_entropy = r->poolinfo.POOLBITS;
}
- add_entropy_words(r, batch_entropy_pool + 2*batch_tail, 2);
- credit_entropy_store(r, batch_entropy_credit[batch_tail]);
- batch_tail = (batch_tail+1) & (batch_max-1);
+ add_entropy_words(r, batch_entropy_copy[tail].data, 2);
+ credit_entropy_store(r, batch_entropy_copy[tail].credit);
+ tail = (tail+1) & (batch_max-1);
}
if (p->entropy_count >= random_read_wakeup_thresh)
wake_up_interruptible(&random_read_wait);
@@ -1274,6 +1313,7 @@
ssize_t ret, i;
__u32 tmp[TMP_BUF_SIZE];
__u32 x;
+ unsigned long cpuflags;

add_timer_randomness(&extract_timer_state, nbytes);

@@ -1284,6 +1324,9 @@
if (flags & EXTRACT_ENTROPY_SECONDARY)
xfer_secondary_pool(r, nbytes, tmp);

+ /* Hold lock while accounting */
+ spin_lock_irqsave(&r->lock, cpuflags);
+
DEBUG_ENT("%s has %d bits, want %d bits\n",
r == sec_random_state ? "secondary" :
r == random_state ? "primary" : "unknown",
@@ -1298,6 +1341,8 @@
wake_up_interruptible(&random_write_wait);

r->extract_count += nbytes;
+
+ spin_unlock_irqrestore(&r->lock, cpuflags);

ret = 0;
while (nbytes) {
@@ -1593,8 +1638,9 @@
random_ioctl(struct inode * inode, struct file * file,
unsigned int cmd, unsigned long arg)
{
- int *p, size, ent_count;
+ int *p, *tmp, size, ent_count;
int retval;
+ unsigned long flags;

switch (cmd) {
case RNDGETENTCNT:
@@ -1619,17 +1665,36 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
p = (int *) arg;
- ent_count = random_state->entropy_count;
- if (put_user(ent_count, p++) ||
- get_user(size, p) ||
+ if (get_user(size, p) ||
put_user(random_state->poolinfo.poolwords, p++))
return -EFAULT;
if (size < 0)
- return -EINVAL;
+ return -EFAULT;
if (size > random_state->poolinfo.poolwords)
size = random_state->poolinfo.poolwords;
- if (copy_to_user(p, random_state->pool, size * sizeof(__u32)))
+
+ /* prepare to atomically snapshot pool */
+
+ tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL);
+
+ if (!tmp)
+ return -EFAULT;
+
+ spin_lock_irqsave(&random_state->lock, flags);
+ ent_count = random_state->entropy_count;
+ memcpy(tmp, random_state->pool, size * sizeof(__u32));
+ spin_unlock_irqrestore(&random_state->lock, flags);
+
+ if (!copy_to_user(p, tmp, size * sizeof(__u32))) {
+ kfree(tmp);
return -EFAULT;
+ }
+
+ kfree(tmp);
+
+ if(put_user(ent_count, p++))
+ return -EFAULT;
+
return 0;
case RNDADDENTROPY:
if (!capable(CAP_SYS_ADMIN))


--
Matt Mackall : http://www.selenic.com : of or relating to the moon