2023-03-17 03:15:21

by Boqun Feng

[permalink] [raw]
Subject: [PATCH rcu 4/7] locking/lockdep: Improve the deadlock scenario print for sync and read lock

Lock scenario print is always a weak spot of lockdep splats. Improvement
can be made if we rework the dependency search and the error printing.

However without touching the graph search, we can improve a little for
the circular deadlock case, since we have the to-be-added lock
dependency, and know whether these two locks are read/write/sync.

In order to know whether a held_lock is sync or not, a bit was
"stolen" from ->references, which reduce our limit for the same lock
class nesting from 2^12 to 2^11, and it should still be good enough.

Besides, since we now have bit in held_lock for sync, we don't need the
"hardirqoffs being 1" trick, and also we can avoid the __lock_release()
if we jump out of __lock_acquire() before the held_lock stored.

With these changes, a deadlock case evolved with read lock and sync gets
a better print-out from:

[...] Possible unsafe locking scenario:
[...]
[...] CPU0 CPU1
[...] ---- ----
[...] lock(srcuA);
[...] lock(srcuB);
[...] lock(srcuA);
[...] lock(srcuB);

to

[...] Possible unsafe locking scenario:
[...]
[...] CPU0 CPU1
[...] ---- ----
[...] rlock(srcuA);
[...] lock(srcuB);
[...] lock(srcuA);
[...] sync(srcuB);

Signed-off-by: Boqun Feng <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Boqun Feng <[email protected]>
---
include/linux/lockdep.h | 3 ++-
kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++--------------
2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 14d9dbedc6c1..b32256e9e944 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -134,7 +134,8 @@ struct held_lock {
unsigned int read:2; /* see lock_acquire() comment */
unsigned int check:1; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
- unsigned int references:12; /* 32 bits */
+ unsigned int sync:1;
+ unsigned int references:11; /* 32 bits */
unsigned int pin_count;
};

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 36430cf8e407..dcd1d5bfc1e0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1881,6 +1881,8 @@ print_circular_lock_scenario(struct held_lock *src,
struct lock_class *source = hlock_class(src);
struct lock_class *target = hlock_class(tgt);
struct lock_class *parent = prt->class;
+ int src_read = src->read;
+ int tgt_read = tgt->read;

/*
* A direct locking problem where unsafe_class lock is taken
@@ -1908,7 +1910,10 @@ print_circular_lock_scenario(struct held_lock *src,
printk(" Possible unsafe locking scenario:\n\n");
printk(" CPU0 CPU1\n");
printk(" ---- ----\n");
- printk(" lock(");
+ if (tgt_read != 0)
+ printk(" rlock(");
+ else
+ printk(" lock(");
__print_lock_name(target);
printk(KERN_CONT ");\n");
printk(" lock(");
@@ -1917,7 +1922,12 @@ print_circular_lock_scenario(struct held_lock *src,
printk(" lock(");
__print_lock_name(target);
printk(KERN_CONT ");\n");
- printk(" lock(");
+ if (src_read != 0)
+ printk(" rlock(");
+ else if (src->sync)
+ printk(" sync(");
+ else
+ printk(" lock(");
__print_lock_name(source);
printk(KERN_CONT ");\n");
printk("\n *** DEADLOCK ***\n\n");
@@ -4531,7 +4541,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
return 0;
}
}
- if (!hlock->hardirqs_off) {
+
+ /*
+ * For lock_sync(), don't mark the ENABLED usage, since lock_sync()
+ * creates no critical section and no extra dependency can be introduced
+ * by interrupts
+ */
+ if (!hlock->hardirqs_off && !hlock->sync) {
if (hlock->read) {
if (!mark_lock(curr, hlock,
LOCK_ENABLED_HARDIRQ_READ))
@@ -4910,7 +4926,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read);
static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, int hardirqs_off,
struct lockdep_map *nest_lock, unsigned long ip,
- int references, int pin_count)
+ int references, int pin_count, int sync)
{
struct task_struct *curr = current;
struct lock_class *class = NULL;
@@ -4961,7 +4977,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,

class_idx = class - lock_classes;

- if (depth) { /* we're holding locks */
+ if (depth && !sync) {
+ /* we're holding locks and the new held lock is not a sync */
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
if (!references)
@@ -4995,6 +5012,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->trylock = trylock;
hlock->read = read;
hlock->check = check;
+ hlock->sync = !!sync;
hlock->hardirqs_off = !!hardirqs_off;
hlock->references = references;
#ifdef CONFIG_LOCK_STAT
@@ -5056,6 +5074,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!validate_chain(curr, hlock, chain_head, chain_key))
return 0;

+ /* For lock_sync(), we are done here since no actual critical section */
+ if (hlock->sync)
+ return 1;
+
curr->curr_chain_key = chain_key;
curr->lockdep_depth++;
check_chain_key(curr);
@@ -5197,7 +5219,7 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
hlock->read, hlock->check,
hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
- hlock->references, hlock->pin_count)) {
+ hlock->references, hlock->pin_count, 0)) {
case 0:
return 1;
case 1:
@@ -5667,7 +5689,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,

lockdep_recursion_inc();
__lock_acquire(lock, subclass, trylock, read, check,
- irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 0);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
}
@@ -5700,11 +5722,6 @@ EXPORT_SYMBOL_GPL(lock_release);
* APIs are used to wait for one or multiple critical sections (on other CPUs
* or threads), and it means that calling these APIs inside these critical
* sections is potential deadlock.
- *
- * This annotation acts as an acqurie+release anontation pair with hardirqoff
- * being 1. Since there's no critical section, no interrupt can create extra
- * dependencies "inside" the annotation, hardirqoff == 1 allows us to avoid
- * false positives.
*/
void lock_sync(struct lockdep_map *lock, unsigned subclass, int read,
int check, struct lockdep_map *nest_lock, unsigned long ip)
@@ -5718,10 +5735,9 @@ void lock_sync(struct lockdep_map *lock, unsigned subclass, int read,
check_flags(flags);

lockdep_recursion_inc();
- __lock_acquire(lock, subclass, 0, read, check, 1, nest_lock, ip, 0, 0);
-
- if (__lock_release(lock, ip))
- check_chain_key(current);
+ __lock_acquire(lock, subclass, 0, read, check,
+ irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 1);
+ check_chain_key(current);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
}
--
2.39.2



2023-03-20 12:13:39

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH rcu 4/7] locking/lockdep: Improve the deadlock scenario print for sync and read lock

On Thu, Mar 16, 2023 at 08:13:36PM -0700, Boqun Feng wrote:
> Lock scenario print is always a weak spot of lockdep splats. Improvement
> can be made if we rework the dependency search and the error printing.
>
> However without touching the graph search, we can improve a little for
> the circular deadlock case, since we have the to-be-added lock
> dependency, and know whether these two locks are read/write/sync.
>
> In order to know whether a held_lock is sync or not, a bit was
> "stolen" from ->references, which reduce our limit for the same lock
> class nesting from 2^12 to 2^11, and it should still be good enough.
>
> Besides, since we now have bit in held_lock for sync, we don't need the
> "hardirqoffs being 1" trick, and also we can avoid the __lock_release()
> if we jump out of __lock_acquire() before the held_lock stored.
>
> With these changes, a deadlock case evolved with read lock and sync gets
> a better print-out from:
>
> [...] Possible unsafe locking scenario:
> [...]
> [...] CPU0 CPU1
> [...] ---- ----
> [...] lock(srcuA);
> [...] lock(srcuB);
> [...] lock(srcuA);
> [...] lock(srcuB);
>
> to
>
> [...] Possible unsafe locking scenario:
> [...]
> [...] CPU0 CPU1
> [...] ---- ----
> [...] rlock(srcuA);
> [...] lock(srcuB);
> [...] lock(srcuA);
> [...] sync(srcuB);
>
> Signed-off-by: Boqun Feng <[email protected]>
> Signed-off-by: Paul E. McKenney <[email protected]>
> Signed-off-by: Boqun Feng <[email protected]>
> ---
> include/linux/lockdep.h | 3 ++-
> kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++--------------
> 2 files changed, 34 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 14d9dbedc6c1..b32256e9e944 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -134,7 +134,8 @@ struct held_lock {
> unsigned int read:2; /* see lock_acquire() comment */
> unsigned int check:1; /* see lock_acquire() comment */
> unsigned int hardirqs_off:1;
> - unsigned int references:12; /* 32 bits */
> + unsigned int sync:1;
> + unsigned int references:11; /* 32 bits */
> unsigned int pin_count;
> };
>

Yeah, I suppose we can do that -- another option is to steal some bits
from pin_count, but whatever (references used to be 11 a long while ago,
no problem going back to that).

Acked-by: Peter Zijlstra (Intel) <[email protected]>

2023-03-20 17:57:22

by Boqun Feng

[permalink] [raw]
Subject: Re: [PATCH rcu 4/7] locking/lockdep: Improve the deadlock scenario print for sync and read lock

On Mon, Mar 20, 2023 at 01:13:05PM +0100, Peter Zijlstra wrote:
> On Thu, Mar 16, 2023 at 08:13:36PM -0700, Boqun Feng wrote:
> > Lock scenario print is always a weak spot of lockdep splats. Improvement
> > can be made if we rework the dependency search and the error printing.
> >
> > However without touching the graph search, we can improve a little for
> > the circular deadlock case, since we have the to-be-added lock
> > dependency, and know whether these two locks are read/write/sync.
> >
> > In order to know whether a held_lock is sync or not, a bit was
> > "stolen" from ->references, which reduce our limit for the same lock
> > class nesting from 2^12 to 2^11, and it should still be good enough.
> >
> > Besides, since we now have bit in held_lock for sync, we don't need the
> > "hardirqoffs being 1" trick, and also we can avoid the __lock_release()
> > if we jump out of __lock_acquire() before the held_lock stored.
> >
> > With these changes, a deadlock case evolved with read lock and sync gets
> > a better print-out from:
> >
> > [...] Possible unsafe locking scenario:
> > [...]
> > [...] CPU0 CPU1
> > [...] ---- ----
> > [...] lock(srcuA);
> > [...] lock(srcuB);
> > [...] lock(srcuA);
> > [...] lock(srcuB);
> >
> > to
> >
> > [...] Possible unsafe locking scenario:
> > [...]
> > [...] CPU0 CPU1
> > [...] ---- ----
> > [...] rlock(srcuA);
> > [...] lock(srcuB);
> > [...] lock(srcuA);
> > [...] sync(srcuB);
> >
> > Signed-off-by: Boqun Feng <[email protected]>
> > Signed-off-by: Paul E. McKenney <[email protected]>
> > Signed-off-by: Boqun Feng <[email protected]>
> > ---
> > include/linux/lockdep.h | 3 ++-
> > kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++--------------
> > 2 files changed, 34 insertions(+), 17 deletions(-)
> >
> > diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> > index 14d9dbedc6c1..b32256e9e944 100644
> > --- a/include/linux/lockdep.h
> > +++ b/include/linux/lockdep.h
> > @@ -134,7 +134,8 @@ struct held_lock {
> > unsigned int read:2; /* see lock_acquire() comment */
> > unsigned int check:1; /* see lock_acquire() comment */
> > unsigned int hardirqs_off:1;
> > - unsigned int references:12; /* 32 bits */
> > + unsigned int sync:1;
> > + unsigned int references:11; /* 32 bits */
> > unsigned int pin_count;
> > };
> >
>
> Yeah, I suppose we can do that -- another option is to steal some bits
> from pin_count, but whatever (references used to be 11 a long while ago,
> no problem going back to that).

Thanks!

>
> Acked-by: Peter Zijlstra (Intel) <[email protected]>

Applied locally.

Regards,
Boqun