2019-11-20 17:34:09

by Marco Elver

[permalink] [raw]
Subject: [PATCH] spinlock_debug: Fix various data races

This fixes various data races in spinlock_debug. By testing with KCSAN,
it is observable that the console gets spammed with data races reports,
suggesting these are extremely frequent.

Example data race report:

read to 0xffff8ab24f403c48 of 4 bytes by task 221 on cpu 2:
debug_spin_lock_before kernel/locking/spinlock_debug.c:85 [inline]
do_raw_spin_lock+0x9b/0x210 kernel/locking/spinlock_debug.c:112
__raw_spin_lock include/linux/spinlock_api_smp.h:143 [inline]
_raw_spin_lock+0x39/0x40 kernel/locking/spinlock.c:151
spin_lock include/linux/spinlock.h:338 [inline]
get_partial_node.isra.0.part.0+0x32/0x2f0 mm/slub.c:1873
get_partial_node mm/slub.c:1870 [inline]
<snip>

write to 0xffff8ab24f403c48 of 4 bytes by task 167 on cpu 3:
debug_spin_unlock kernel/locking/spinlock_debug.c:103 [inline]
do_raw_spin_unlock+0xc9/0x1a0 kernel/locking/spinlock_debug.c:138
__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:159 [inline]
_raw_spin_unlock_irqrestore+0x2d/0x50 kernel/locking/spinlock.c:191
spin_unlock_irqrestore include/linux/spinlock.h:393 [inline]
free_debug_processing+0x1b3/0x210 mm/slub.c:1214
__slab_free+0x292/0x400 mm/slub.c:2864
<snip>

As a side-effect, with KCSAN, this eventually locks up the console, most
likely due to deadlock, e.g. .. -> printk lock -> spinlock_debug ->
KCSAN detects data race -> kcsan_print_report() -> printk lock ->
deadlock.

This fix will 1) avoid the data races, and 2) allow using lock debugging
together with KCSAN.

Signed-off-by: Marco Elver <[email protected]>
Reported-by: Qian Cai <[email protected]>
---
kernel/locking/spinlock_debug.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 399669f7eba8..472dd462a40c 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -51,19 +51,19 @@ EXPORT_SYMBOL(__rwlock_init);

static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
- struct task_struct *owner = NULL;
+ struct task_struct *owner = READ_ONCE(lock->owner);

- if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
- owner = lock->owner;
+ if (owner == SPINLOCK_OWNER_INIT)
+ owner = NULL;
printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
msg, raw_smp_processor_id(),
current->comm, task_pid_nr(current));
printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
".owner_cpu: %d\n",
- lock, lock->magic,
+ lock, READ_ONCE(lock->magic),
owner ? owner->comm : "<none>",
owner ? task_pid_nr(owner) : -1,
- lock->owner_cpu);
+ READ_ONCE(lock->owner_cpu));
dump_stack();
}

@@ -80,16 +80,16 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
static inline void
debug_spin_lock_before(raw_spinlock_t *lock)
{
- SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
- SPIN_BUG_ON(lock->owner == current, lock, "recursion");
- SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic");
+ SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+ SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
lock, "cpu recursion");
}

static inline void debug_spin_lock_after(raw_spinlock_t *lock)
{
- lock->owner_cpu = raw_smp_processor_id();
- lock->owner = current;
+ WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+ WRITE_ONCE(lock->owner, current);
}

static inline void debug_spin_unlock(raw_spinlock_t *lock)
@@ -99,8 +99,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+ WRITE_ONCE(lock->owner_cpu, -1);
}

/*
@@ -187,8 +187,8 @@ static inline void debug_write_lock_before(rwlock_t *lock)

static inline void debug_write_lock_after(rwlock_t *lock)
{
- lock->owner_cpu = raw_smp_processor_id();
- lock->owner = current;
+ WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+ WRITE_ONCE(lock->owner, current);
}

static inline void debug_write_unlock(rwlock_t *lock)
@@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+ WRITE_ONCE(lock->owner_cpu, -1);
}

void do_raw_write_lock(rwlock_t *lock)
--
2.24.0.432.g9d3f5f5b63-goog



2019-11-21 18:34:38

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] spinlock_debug: Fix various data races


* Marco Elver <[email protected]> wrote:

> static inline void debug_spin_lock_after(raw_spinlock_t *lock)
> {
> - lock->owner_cpu = raw_smp_processor_id();
> - lock->owner = current;
> + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
> + WRITE_ONCE(lock->owner, current);
> }

debug_spin_lock_after() runs inside the spinlock itself - why do these
writes have to be WRITE_ONCE()?

> @@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
> RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
> RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
> lock, "wrong CPU");
> - lock->owner = SPINLOCK_OWNER_INIT;
> - lock->owner_cpu = -1;
> + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
> + WRITE_ONCE(lock->owner_cpu, -1);
> }

This too is running inside the critical section of the spinlock - why are
the WRITE_ONCE() calls necessary?

Thanks,

Ingo

2019-11-21 19:48:10

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH] spinlock_debug: Fix various data races

On Thu, 21 Nov 2019, Ingo Molnar wrote:

>
> * Marco Elver <[email protected]> wrote:
>
> > static inline void debug_spin_lock_after(raw_spinlock_t *lock)
> > {
> > - lock->owner_cpu = raw_smp_processor_id();
> > - lock->owner = current;
> > + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
> > + WRITE_ONCE(lock->owner, current);
> > }
>
> debug_spin_lock_after() runs inside the spinlock itself - why do these
> writes have to be WRITE_ONCE()?
>
> > @@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
> > RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
> > RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
> > lock, "wrong CPU");
> > - lock->owner = SPINLOCK_OWNER_INIT;
> > - lock->owner_cpu = -1;
> > + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
> > + WRITE_ONCE(lock->owner_cpu, -1);
> > }
>
> This too is running inside the critical section of the spinlock - why are
> the WRITE_ONCE() calls necessary?

Although the writes are inside the critical section, they are read
concurrently outside the critical section, e.g. in
debug_spin_lock_before(). In other words, the WRITE_ONCE pair with the
READ_ONCE that are *outside* the critical section.

Thanks,
-- Marco

> Thanks,
>
> Ingo

2019-11-29 07:06:42

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] spinlock_debug: Fix various data races


* Marco Elver <[email protected]> wrote:

> On Thu, 21 Nov 2019, Ingo Molnar wrote:
>
> >
> > * Marco Elver <[email protected]> wrote:
> >
> > > static inline void debug_spin_lock_after(raw_spinlock_t *lock)
> > > {
> > > - lock->owner_cpu = raw_smp_processor_id();
> > > - lock->owner = current;
> > > + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
> > > + WRITE_ONCE(lock->owner, current);
> > > }
> >
> > debug_spin_lock_after() runs inside the spinlock itself - why do these
> > writes have to be WRITE_ONCE()?
> >
> > > @@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
> > > RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
> > > RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
> > > lock, "wrong CPU");
> > > - lock->owner = SPINLOCK_OWNER_INIT;
> > > - lock->owner_cpu = -1;
> > > + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
> > > + WRITE_ONCE(lock->owner_cpu, -1);
> > > }
> >
> > This too is running inside the critical section of the spinlock - why are
> > the WRITE_ONCE() calls necessary?
>
> Although the writes are inside the critical section, they are read
> concurrently outside the critical section, e.g. in
> debug_spin_lock_before(). In other words, the WRITE_ONCE pair with the
> READ_ONCE that are *outside* the critical section.

Fair enough!

Thanks,

Ingo

Subject: [tip: locking/core] locking/spinlock/debug: Fix various data races

The following commit has been merged into the locking/core branch of tip:

Commit-ID: 1a365e822372ba24c9da0822bc583894f6f3d821
Gitweb: https://git.kernel.org/tip/1a365e822372ba24c9da0822bc583894f6f3d821
Author: Marco Elver <[email protected]>
AuthorDate: Wed, 20 Nov 2019 16:57:15 +01:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 29 Nov 2019 08:03:27 +01:00

locking/spinlock/debug: Fix various data races

This fixes various data races in spinlock_debug. By testing with KCSAN,
it is observable that the console gets spammed with data races reports,
suggesting these are extremely frequent.

Example data race report:

read to 0xffff8ab24f403c48 of 4 bytes by task 221 on cpu 2:
debug_spin_lock_before kernel/locking/spinlock_debug.c:85 [inline]
do_raw_spin_lock+0x9b/0x210 kernel/locking/spinlock_debug.c:112
__raw_spin_lock include/linux/spinlock_api_smp.h:143 [inline]
_raw_spin_lock+0x39/0x40 kernel/locking/spinlock.c:151
spin_lock include/linux/spinlock.h:338 [inline]
get_partial_node.isra.0.part.0+0x32/0x2f0 mm/slub.c:1873
get_partial_node mm/slub.c:1870 [inline]
<snip>

write to 0xffff8ab24f403c48 of 4 bytes by task 167 on cpu 3:
debug_spin_unlock kernel/locking/spinlock_debug.c:103 [inline]
do_raw_spin_unlock+0xc9/0x1a0 kernel/locking/spinlock_debug.c:138
__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:159 [inline]
_raw_spin_unlock_irqrestore+0x2d/0x50 kernel/locking/spinlock.c:191
spin_unlock_irqrestore include/linux/spinlock.h:393 [inline]
free_debug_processing+0x1b3/0x210 mm/slub.c:1214
__slab_free+0x292/0x400 mm/slub.c:2864
<snip>

As a side-effect, with KCSAN, this eventually locks up the console, most
likely due to deadlock, e.g. .. -> printk lock -> spinlock_debug ->
KCSAN detects data race -> kcsan_print_report() -> printk lock ->
deadlock.

This fix will 1) avoid the data races, and 2) allow using lock debugging
together with KCSAN.

Reported-by: Qian Cai <[email protected]>
Signed-off-by: Marco Elver <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Will Deacon <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/locking/spinlock_debug.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 399669f..472dd46 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -51,19 +51,19 @@ EXPORT_SYMBOL(__rwlock_init);

static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
- struct task_struct *owner = NULL;
+ struct task_struct *owner = READ_ONCE(lock->owner);

- if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
- owner = lock->owner;
+ if (owner == SPINLOCK_OWNER_INIT)
+ owner = NULL;
printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
msg, raw_smp_processor_id(),
current->comm, task_pid_nr(current));
printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
".owner_cpu: %d\n",
- lock, lock->magic,
+ lock, READ_ONCE(lock->magic),
owner ? owner->comm : "<none>",
owner ? task_pid_nr(owner) : -1,
- lock->owner_cpu);
+ READ_ONCE(lock->owner_cpu));
dump_stack();
}

@@ -80,16 +80,16 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
static inline void
debug_spin_lock_before(raw_spinlock_t *lock)
{
- SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
- SPIN_BUG_ON(lock->owner == current, lock, "recursion");
- SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic");
+ SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+ SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
lock, "cpu recursion");
}

static inline void debug_spin_lock_after(raw_spinlock_t *lock)
{
- lock->owner_cpu = raw_smp_processor_id();
- lock->owner = current;
+ WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+ WRITE_ONCE(lock->owner, current);
}

static inline void debug_spin_unlock(raw_spinlock_t *lock)
@@ -99,8 +99,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+ WRITE_ONCE(lock->owner_cpu, -1);
}

/*
@@ -187,8 +187,8 @@ static inline void debug_write_lock_before(rwlock_t *lock)

static inline void debug_write_lock_after(rwlock_t *lock)
{
- lock->owner_cpu = raw_smp_processor_id();
- lock->owner = current;
+ WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+ WRITE_ONCE(lock->owner, current);
}

static inline void debug_write_unlock(rwlock_t *lock)
@@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+ WRITE_ONCE(lock->owner_cpu, -1);
}

void do_raw_write_lock(rwlock_t *lock)