Subject: [tip: locking/core] lockdep: Fix lockdep recursion

The following commit has been merged into the locking/core branch of tip:

Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
Gitweb: https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00

lockdep: Fix lockdep recursion

Steve reported that lockdep_assert*irq*(), when nested inside lockdep
itself, will trigger a false-positive.

One example is the stack-trace code, as called from inside lockdep,
triggering tracing, which in turn calls RCU, which then uses
lockdep_assert_irqs_disabled().

Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables")
Reported-by: Steven Rostedt <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/lockdep.h | 13 +++--
kernel/locking/lockdep.c | 99 ++++++++++++++++++++++----------------
2 files changed, 67 insertions(+), 45 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6a584b3..b1227be 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -534,6 +534,7 @@ do { \

DECLARE_PER_CPU(int, hardirqs_enabled);
DECLARE_PER_CPU(int, hardirq_context);
+DECLARE_PER_CPU(unsigned int, lockdep_recursion);

/*
* The below lockdep_assert_*() macros use raw_cpu_read() to access the above
@@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context);
* read the value from our previous CPU.
*/

+#define __lockdep_enabled (debug_locks && !raw_cpu_read(lockdep_recursion))
+
#define lockdep_assert_irqs_enabled() \
do { \
- WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \
+ WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \
} while (0)

#define lockdep_assert_irqs_disabled() \
do { \
- WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \
+ WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \
} while (0)

#define lockdep_assert_in_irq() \
do { \
- WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \
+ WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \
} while (0)

#define lockdep_assert_preemption_enabled() \
do { \
WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
- debug_locks && \
+ __lockdep_enabled && \
(preempt_count() != 0 || \
!raw_cpu_read(hardirqs_enabled))); \
} while (0)
@@ -569,7 +572,7 @@ do { \
#define lockdep_assert_preemption_disabled() \
do { \
WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
- debug_locks && \
+ __lockdep_enabled && \
(preempt_count() == 0 && \
raw_cpu_read(hardirqs_enabled))); \
} while (0)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index a430fbb..85d15f0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -76,6 +76,23 @@ module_param(lock_stat, int, 0644);
#define lock_stat 0
#endif

+DEFINE_PER_CPU(unsigned int, lockdep_recursion);
+EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion);
+
+static inline bool lockdep_enabled(void)
+{
+ if (!debug_locks)
+ return false;
+
+ if (raw_cpu_read(lockdep_recursion))
+ return false;
+
+ if (current->lockdep_recursion)
+ return false;
+
+ return true;
+}
+
/*
* lockdep_lock: protects the lockdep graph, the hashes and the
* class/list/hash allocators.
@@ -93,7 +110,7 @@ static inline void lockdep_lock(void)

arch_spin_lock(&__lock);
__owner = current;
- current->lockdep_recursion++;
+ __this_cpu_inc(lockdep_recursion);
}

static inline void lockdep_unlock(void)
@@ -101,7 +118,7 @@ static inline void lockdep_unlock(void)
if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
return;

- current->lockdep_recursion--;
+ __this_cpu_dec(lockdep_recursion);
__owner = NULL;
arch_spin_unlock(&__lock);
}
@@ -393,10 +410,15 @@ void lockdep_init_task(struct task_struct *task)
task->lockdep_recursion = 0;
}

+static __always_inline void lockdep_recursion_inc(void)
+{
+ __this_cpu_inc(lockdep_recursion);
+}
+
static __always_inline void lockdep_recursion_finish(void)
{
- if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
- current->lockdep_recursion = 0;
+ if (WARN_ON_ONCE(__this_cpu_dec_return(lockdep_recursion)))
+ __this_cpu_write(lockdep_recursion, 0);
}

void lockdep_set_selftest_task(struct task_struct *task)
@@ -3659,7 +3681,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(in_nmi()))
return;

- if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
+ if (unlikely(__this_cpu_read(lockdep_recursion)))
return;

if (unlikely(lockdep_hardirqs_enabled())) {
@@ -3695,7 +3717,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)

current->hardirq_chain_key = current->curr_chain_key;

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
__trace_hardirqs_on_caller();
lockdep_recursion_finish();
}
@@ -3728,7 +3750,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
goto skip_checks;
}

- if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
+ if (unlikely(__this_cpu_read(lockdep_recursion)))
return;

if (lockdep_hardirqs_enabled()) {
@@ -3781,7 +3803,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
if (in_nmi()) {
if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
return;
- } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
+ } else if (__this_cpu_read(lockdep_recursion))
return;

/*
@@ -3814,7 +3836,7 @@ void lockdep_softirqs_on(unsigned long ip)
{
struct irqtrace_events *trace = &current->irqtrace;

- if (unlikely(!debug_locks || current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

/*
@@ -3829,7 +3851,7 @@ void lockdep_softirqs_on(unsigned long ip)
return;
}

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
/*
* We'll do an OFF -> ON transition:
*/
@@ -3852,7 +3874,7 @@ void lockdep_softirqs_on(unsigned long ip)
*/
void lockdep_softirqs_off(unsigned long ip)
{
- if (unlikely(!debug_locks || current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

/*
@@ -4233,11 +4255,11 @@ void lockdep_init_map_waits(struct lockdep_map *lock, const char *name,
if (subclass) {
unsigned long flags;

- if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
+ if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
return;

raw_local_irq_save(flags);
- current->lockdep_recursion++;
+ lockdep_recursion_inc();
register_lock_class(lock, subclass, 1);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
@@ -4920,11 +4942,11 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
{
unsigned long flags;

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

raw_local_irq_save(flags);
- current->lockdep_recursion++;
+ lockdep_recursion_inc();
check_flags(flags);
if (__lock_set_class(lock, name, key, subclass, ip))
check_chain_key(current);
@@ -4937,11 +4959,11 @@ void lock_downgrade(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

raw_local_irq_save(flags);
- current->lockdep_recursion++;
+ lockdep_recursion_inc();
check_flags(flags);
if (__lock_downgrade(lock, ip))
check_chain_key(current);
@@ -4979,7 +5001,7 @@ static void verify_lock_unused(struct lockdep_map *lock, struct held_lock *hlock

static bool lockdep_nmi(void)
{
- if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
+ if (raw_cpu_read(lockdep_recursion))
return false;

if (!in_nmi())
@@ -5000,7 +5022,10 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,

trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);

- if (unlikely(current->lockdep_recursion)) {
+ if (!debug_locks)
+ return;
+
+ if (unlikely(!lockdep_enabled())) {
/* XXX allow trylock from NMI ?!? */
if (lockdep_nmi() && !trylock) {
struct held_lock hlock;
@@ -5023,7 +5048,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
raw_local_irq_save(flags);
check_flags(flags);

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
__lock_acquire(lock, subclass, trylock, read, check,
irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
lockdep_recursion_finish();
@@ -5037,13 +5062,13 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)

trace_lock_release(lock, ip);

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

raw_local_irq_save(flags);
check_flags(flags);

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
if (__lock_release(lock, ip))
check_chain_key(current);
lockdep_recursion_finish();
@@ -5056,13 +5081,13 @@ noinstr int lock_is_held_type(const struct lockdep_map *lock, int read)
unsigned long flags;
int ret = 0;

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return 1; /* avoid false negative lockdep_assert_held() */

raw_local_irq_save(flags);
check_flags(flags);

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
ret = __lock_is_held(lock, read);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
@@ -5077,13 +5102,13 @@ struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
struct pin_cookie cookie = NIL_COOKIE;
unsigned long flags;

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return cookie;

raw_local_irq_save(flags);
check_flags(flags);

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
cookie = __lock_pin_lock(lock);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
@@ -5096,13 +5121,13 @@ void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{
unsigned long flags;

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

raw_local_irq_save(flags);
check_flags(flags);

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
__lock_repin_lock(lock, cookie);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
@@ -5113,13 +5138,13 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{
unsigned long flags;

- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lockdep_enabled()))
return;

raw_local_irq_save(flags);
check_flags(flags);

- current->lockdep_recursion++;
+ lockdep_recursion_inc();
__lock_unpin_lock(lock, cookie);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
@@ -5249,15 +5274,12 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)

trace_lock_acquired(lock, ip);

- if (unlikely(!lock_stat || !debug_locks))
- return;
-
- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lock_stat || !lockdep_enabled()))
return;

raw_local_irq_save(flags);
check_flags(flags);
- current->lockdep_recursion++;
+ lockdep_recursion_inc();
__lock_contended(lock, ip);
lockdep_recursion_finish();
raw_local_irq_restore(flags);
@@ -5270,15 +5292,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)

trace_lock_contended(lock, ip);

- if (unlikely(!lock_stat || !debug_locks))
- return;
-
- if (unlikely(current->lockdep_recursion))
+ if (unlikely(!lock_stat || !lockdep_enabled()))
return;

raw_local_irq_save(flags);
check_flags(flags);
- current->lockdep_recursion++;
+ lockdep_recursion_inc();
__lock_acquired(lock, ip);
lockdep_recursion_finish();
raw_local_irq_restore(flags);


2020-10-09 14:04:41

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the locking/core branch of tip:
>
> Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> Gitweb:
> https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> Author: Peter Zijlstra <[email protected]>
> AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> Committer: Ingo Molnar <[email protected]>
> CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
>
> lockdep: Fix lockdep recursion
>
> Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> itself, will trigger a false-positive.
>
> One example is the stack-trace code, as called from inside lockdep,
> triggering tracing, which in turn calls RCU, which then uses
> lockdep_assert_irqs_disabled().
>
> Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu
> variables")
> Reported-by: Steven Rostedt <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Ingo Molnar <[email protected]>

Reverting this linux-next commit fixed booting RCU-list warnings everywhere.

== x86 ==
[ 8.101841][ T1] rcu: Hierarchical SRCU implementation.
[ 8.110615][ T5] NMI watchdog: Enabled. Permanently consumes one hw-PMU counter.
[ 8.153506][ T1] smp: Bringing up secondary CPUs ...
[ 8.163075][ T1] x86: Booting SMP configuration:
[ 8.167843][ T1] .... node #0, CPUs: #1
[ 4.002695][ T0]
[ 4.002695][ T0] =============================
[ 4.002695][ T0] WARNING: suspicious RCU usage
[ 4.002695][ T0] 5.9.0-rc8-next-20201009 #2 Not tainted
[ 4.002695][ T0] -----------------------------
[ 4.002695][ T0] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
[ 4.002695][ T0]
[ 4.002695][ T0] other info that might help us debug this:
[ 4.002695][ T0]
[ 4.002695][ T0]
[ 4.002695][ T0] RCU used illegally from offline CPU!
[ 4.002695][ T0] rcu_scheduler_active = 1, debug_locks = 1
[ 4.002695][ T0] no locks held by swapper/1/0.
[ 4.002695][ T0]
[ 4.002695][ T0] stack backtrace:
[ 4.002695][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-20201009 #2
[ 4.002695][ T0] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
[ 4.002695][ T0] Call Trace:
[ 4.002695][ T0] dump_stack+0x99/0xcb
[ 4.002695][ T0] __lock_acquire.cold.76+0x2ad/0x3e0
lookup_chain_cache at kernel/locking/lockdep.c:3497
(inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
(inlined by) validate_chain at kernel/locking/lockdep.c:3572
(inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
[ 4.002695][ T0] ? lockdep_hardirqs_on_prepare+0x3d0/0x3d0
[ 4.002695][ T0] lock_acquire+0x1c8/0x820
lockdep_recursion_finish at kernel/locking/lockdep.c:435
(inlined by) lock_acquire at kernel/locking/lockdep.c:5444
(inlined by) lock_acquire at kernel/locking/lockdep.c:5407
[ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
[ 4.002695][ T0] ? memset+0x1f/0x40
[ 4.002695][ T0] ? rcu_read_unlock+0x40/0x40
[ 4.002695][ T0] ? mce_gather_info+0x170/0x170
[ 4.002695][ T0] ? arch_freq_get_on_cpu+0x270/0x270
[ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
[ 4.002695][ T0] _raw_spin_lock_irqsave+0x30/0x50
[ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
[ 4.002695][ T0] __debug_object_init+0xb4/0xf50
[ 4.002695][ T0] ? mce_amd_feature_init+0x80c/0xa70
[ 4.002695][ T0] ? debug_object_fixup+0x30/0x30
[ 4.002695][ T0] ? machine_check_poll+0x2d0/0x2d0
[ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
[ 4.002695][ T0] init_timer_key+0x29/0x220
[ 4.002695][ T0] identify_cpu+0xfcb/0x1980
[ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
[ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
[ 4.002695][ T0] start_secondary+0x5b/0x290
[ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
[ 8.379508][ T1] #2
[ 8.389728][ T1] #3
[ 8.399901][ T1]

== s390 ==
00: [ 1.539768] rcu: Hierarchical SRCU implementation.
00: [ 1.561622] smp: Bringing up secondary CPUs ...
00: [ 1.568677]
00: [ 1.568681] =============================
00: [ 1.568682] WARNING: suspicious RCU usage
00: [ 1.568688] 5.9.0-rc8-next-20201009 #2 Not tainted
00: [ 1.568688] -----------------------------
00: [ 1.568691] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reade
00: r section!!
00: [ 1.568692]
00: [ 1.568692] other info that might help us debug this:
00: [ 1.568692]
00: [ 1.568694]
00: [ 1.568694] RCU used illegally from offline CPU!
00: [ 1.568694] rcu_scheduler_active = 1, debug_locks = 1
00: [ 1.568697] no locks held by swapper/1/0.
00: [ 1.568697]
00: [ 1.568697] stack backtrace:
00: [ 1.568702] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-2020
00: 1009 #2
00: [ 1.568704] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
00: [ 1.568706] Call Trace:
00: [ 1.568719] [<000000011fb85370>] show_stack+0x158/0x1f0
00: [ 1.568723] [<000000011fb90402>] dump_stack+0x1f2/0x238
00: [ 1.568730] [<000000011ebd89d8>] __lock_acquire+0x2640/0x4dd0
lookup_chain_cache at kernel/locking/lockdep.c:3497
(inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
(inlined by) validate_chain at kernel/locking/lockdep.c:3572
(inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
00: [ 1.568732] [<000000011ebdd230>] lock_acquire+0x3a8/0xd08
lockdep_recursion_finish at kernel/locking/lockdep.c:435
(inlined by) lock_acquire at kernel/locking/lockdep.c:5444
(inlined by) lock_acquire at kernel/locking/lockdep.c:5407
00: [ 1.568738] [<000000011fbb5ca8>] _raw_spin_lock_irqsave+0xc0/0xf0
__raw_spin_lock_irqsave at include/linux/spinlock_api_smp.h:117
(inlined by) _raw_spin_lock_irqsave at kernel/locking/spinlock.c:159
00: [ 1.568745] [<000000011ec6e7e8>] clockevents_register_device+0xa8/0x528
00:
00: [ 1.568748] [<000000011ea55246>] init_cpu_timer+0x33e/0x468
00: [ 1.568754] [<000000011ea7f4d2>] smp_init_secondary+0x11a/0x328
00: [ 1.568757] [<000000011ea7f3b2>] smp_start_secondary+0x82/0x88
smp_start_secondary at arch/s390/kernel/smp.c:892
00: [ 1.568759] no locks held by swapper/1/0.
00: [ 1.569956] smp: Brought up 1 node, 2 CPUs

> ---
> include/linux/lockdep.h | 13 +++--
> kernel/locking/lockdep.c | 99 ++++++++++++++++++++++----------------
> 2 files changed, 67 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 6a584b3..b1227be 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -534,6 +534,7 @@ do {
> \
>
> DECLARE_PER_CPU(int, hardirqs_enabled);
> DECLARE_PER_CPU(int, hardirq_context);
> +DECLARE_PER_CPU(unsigned int, lockdep_recursion);
>
> /*
> * The below lockdep_assert_*() macros use raw_cpu_read() to access the above
> @@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context);
> * read the value from our previous CPU.
> */
>
> +#define __lockdep_enabled (debug_locks &&
> !raw_cpu_read(lockdep_recursion))
> +
> #define lockdep_assert_irqs_enabled()
> \
> do { \
> - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \
> + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \
> } while (0)
>
> #define lockdep_assert_irqs_disabled()
> \
> do { \
> - WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \
> + WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \
> } while (0)
>
> #define lockdep_assert_in_irq()
> \
> do { \
> - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \
> + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \
> } while (0)
>
> #define lockdep_assert_preemption_enabled() \
> do { \
> WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> - debug_locks && \
> + __lockdep_enabled && \
> (preempt_count() != 0 || \
> !raw_cpu_read(hardirqs_enabled))); \
> } while (0)
> @@ -569,7 +572,7 @@ do {
> \
> #define lockdep_assert_preemption_disabled() \
> do { \
> WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> - debug_locks && \
> + __lockdep_enabled && \
> (preempt_count() == 0 && \
> raw_cpu_read(hardirqs_enabled))); \
> } while (0)
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index a430fbb..85d15f0 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -76,6 +76,23 @@ module_param(lock_stat, int, 0644);
> #define lock_stat 0
> #endif
>
> +DEFINE_PER_CPU(unsigned int, lockdep_recursion);
> +EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion);
> +
> +static inline bool lockdep_enabled(void)
> +{
> + if (!debug_locks)
> + return false;
> +
> + if (raw_cpu_read(lockdep_recursion))
> + return false;
> +
> + if (current->lockdep_recursion)
> + return false;
> +
> + return true;
> +}
> +
> /*
> * lockdep_lock: protects the lockdep graph, the hashes and the
> * class/list/hash allocators.
> @@ -93,7 +110,7 @@ static inline void lockdep_lock(void)
>
> arch_spin_lock(&__lock);
> __owner = current;
> - current->lockdep_recursion++;
> + __this_cpu_inc(lockdep_recursion);
> }
>
> static inline void lockdep_unlock(void)
> @@ -101,7 +118,7 @@ static inline void lockdep_unlock(void)
> if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
> return;
>
> - current->lockdep_recursion--;
> + __this_cpu_dec(lockdep_recursion);
> __owner = NULL;
> arch_spin_unlock(&__lock);
> }
> @@ -393,10 +410,15 @@ void lockdep_init_task(struct task_struct *task)
> task->lockdep_recursion = 0;
> }
>
> +static __always_inline void lockdep_recursion_inc(void)
> +{
> + __this_cpu_inc(lockdep_recursion);
> +}
> +
> static __always_inline void lockdep_recursion_finish(void)
> {
> - if (WARN_ON_ONCE((--current->lockdep_recursion) &
> LOCKDEP_RECURSION_MASK))
> - current->lockdep_recursion = 0;
> + if (WARN_ON_ONCE(__this_cpu_dec_return(lockdep_recursion)))
> + __this_cpu_write(lockdep_recursion, 0);
> }
>
> void lockdep_set_selftest_task(struct task_struct *task)
> @@ -3659,7 +3681,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> if (unlikely(in_nmi()))
> return;
>
> - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> + if (unlikely(__this_cpu_read(lockdep_recursion)))
> return;
>
> if (unlikely(lockdep_hardirqs_enabled())) {
> @@ -3695,7 +3717,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
>
> current->hardirq_chain_key = current->curr_chain_key;
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> __trace_hardirqs_on_caller();
> lockdep_recursion_finish();
> }
> @@ -3728,7 +3750,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
> goto skip_checks;
> }
>
> - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> + if (unlikely(__this_cpu_read(lockdep_recursion)))
> return;
>
> if (lockdep_hardirqs_enabled()) {
> @@ -3781,7 +3803,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
> if (in_nmi()) {
> if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
> return;
> - } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> + } else if (__this_cpu_read(lockdep_recursion))
> return;
>
> /*
> @@ -3814,7 +3836,7 @@ void lockdep_softirqs_on(unsigned long ip)
> {
> struct irqtrace_events *trace = &current->irqtrace;
>
> - if (unlikely(!debug_locks || current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> /*
> @@ -3829,7 +3851,7 @@ void lockdep_softirqs_on(unsigned long ip)
> return;
> }
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> /*
> * We'll do an OFF -> ON transition:
> */
> @@ -3852,7 +3874,7 @@ void lockdep_softirqs_on(unsigned long ip)
> */
> void lockdep_softirqs_off(unsigned long ip)
> {
> - if (unlikely(!debug_locks || current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> /*
> @@ -4233,11 +4255,11 @@ void lockdep_init_map_waits(struct lockdep_map *lock,
> const char *name,
> if (subclass) {
> unsigned long flags;
>
> - if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
> + if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> register_lock_class(lock, subclass, 1);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);
> @@ -4920,11 +4942,11 @@ void lock_set_class(struct lockdep_map *lock, const
> char *name,
> {
> unsigned long flags;
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> check_flags(flags);
> if (__lock_set_class(lock, name, key, subclass, ip))
> check_chain_key(current);
> @@ -4937,11 +4959,11 @@ void lock_downgrade(struct lockdep_map *lock, unsigned
> long ip)
> {
> unsigned long flags;
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> check_flags(flags);
> if (__lock_downgrade(lock, ip))
> check_chain_key(current);
> @@ -4979,7 +5001,7 @@ static void verify_lock_unused(struct lockdep_map *lock,
> struct held_lock *hlock
>
> static bool lockdep_nmi(void)
> {
> - if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> + if (raw_cpu_read(lockdep_recursion))
> return false;
>
> if (!in_nmi())
> @@ -5000,7 +5022,10 @@ void lock_acquire(struct lockdep_map *lock, unsigned
> int subclass,
>
> trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
>
> - if (unlikely(current->lockdep_recursion)) {
> + if (!debug_locks)
> + return;
> +
> + if (unlikely(!lockdep_enabled())) {
> /* XXX allow trylock from NMI ?!? */
> if (lockdep_nmi() && !trylock) {
> struct held_lock hlock;
> @@ -5023,7 +5048,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int
> subclass,
> raw_local_irq_save(flags);
> check_flags(flags);
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> __lock_acquire(lock, subclass, trylock, read, check,
> irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
> lockdep_recursion_finish();
> @@ -5037,13 +5062,13 @@ void lock_release(struct lockdep_map *lock, unsigned
> long ip)
>
> trace_lock_release(lock, ip);
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> check_flags(flags);
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> if (__lock_release(lock, ip))
> check_chain_key(current);
> lockdep_recursion_finish();
> @@ -5056,13 +5081,13 @@ noinstr int lock_is_held_type(const struct lockdep_map
> *lock, int read)
> unsigned long flags;
> int ret = 0;
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return 1; /* avoid false negative lockdep_assert_held() */
>
> raw_local_irq_save(flags);
> check_flags(flags);
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> ret = __lock_is_held(lock, read);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);
> @@ -5077,13 +5102,13 @@ struct pin_cookie lock_pin_lock(struct lockdep_map
> *lock)
> struct pin_cookie cookie = NIL_COOKIE;
> unsigned long flags;
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return cookie;
>
> raw_local_irq_save(flags);
> check_flags(flags);
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> cookie = __lock_pin_lock(lock);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);
> @@ -5096,13 +5121,13 @@ void lock_repin_lock(struct lockdep_map *lock, struct
> pin_cookie cookie)
> {
> unsigned long flags;
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> check_flags(flags);
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> __lock_repin_lock(lock, cookie);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);
> @@ -5113,13 +5138,13 @@ void lock_unpin_lock(struct lockdep_map *lock, struct
> pin_cookie cookie)
> {
> unsigned long flags;
>
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> check_flags(flags);
>
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> __lock_unpin_lock(lock, cookie);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);
> @@ -5249,15 +5274,12 @@ void lock_contended(struct lockdep_map *lock, unsigned
> long ip)
>
> trace_lock_acquired(lock, ip);
>
> - if (unlikely(!lock_stat || !debug_locks))
> - return;
> -
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lock_stat || !lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> check_flags(flags);
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> __lock_contended(lock, ip);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);
> @@ -5270,15 +5292,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned
> long ip)
>
> trace_lock_contended(lock, ip);
>
> - if (unlikely(!lock_stat || !debug_locks))
> - return;
> -
> - if (unlikely(current->lockdep_recursion))
> + if (unlikely(!lock_stat || !lockdep_enabled()))
> return;
>
> raw_local_irq_save(flags);
> check_flags(flags);
> - current->lockdep_recursion++;
> + lockdep_recursion_inc();
> __lock_acquired(lock, ip);
> lockdep_recursion_finish();
> raw_local_irq_restore(flags);

2020-10-09 15:32:36

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, 2020-10-09 at 06:58 -0700, Paul E. McKenney wrote:
> On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > The following commit has been merged into the locking/core branch of tip:
> > >
> > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Gitweb:
> > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Author: Peter Zijlstra <[email protected]>
> > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > Committer: Ingo Molnar <[email protected]>
> > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > >
> > > lockdep: Fix lockdep recursion
> > >
> > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > itself, will trigger a false-positive.
> > >
> > > One example is the stack-trace code, as called from inside lockdep,
> > > triggering tracing, which in turn calls RCU, which then uses
> > > lockdep_assert_irqs_disabled().
> > >
> > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-
> > > cpu
> > > variables")
> > > Reported-by: Steven Rostedt <[email protected]>
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > Signed-off-by: Ingo Molnar <[email protected]>
> >
> > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
>
> Is it possible that the RCU-list warnings were being wrongly suppressed
> without a21ee6055c30? As in are you certain that these RCU-list warnings
> are in fact false positives?

I guess you mean this commit a046a86082cc ("lockdep: Fix lockdep recursion")
instead of a21ee6055c30. It is unclear to me how that commit a046a86082cc would
suddenly start to generate those warnings, although I can see it starts to use
percpu variables even though the CPU is not yet set online.

DECLARE_PER_CPU(unsigned int, lockdep_recursion);

Anyway, the problem is that when we in the early boot:

start_secondary()
smp_init_secondary()
init_cpu_timer()
clockevents_register_device()

We are taking a lock there but the CPU is not yet online, and the
__lock_acquire() would call things like hlist_for_each_entry_rcu() from
lookup_chain_cache() or register_lock_class(). Thus, triggering the RCU-list
from an offline CPU warnings.

I am not entirely sure how to fix those though.

2020-10-09 16:15:08

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, Oct 09, 2020 at 11:30:38AM -0400, Qian Cai wrote:
> On Fri, 2020-10-09 at 06:58 -0700, Paul E. McKenney wrote:
> > On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > > The following commit has been merged into the locking/core branch of tip:
> > > >
> > > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Gitweb:
> > > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Author: Peter Zijlstra <[email protected]>
> > > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > > Committer: Ingo Molnar <[email protected]>
> > > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > > >
> > > > lockdep: Fix lockdep recursion
> > > >
> > > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > > itself, will trigger a false-positive.
> > > >
> > > > One example is the stack-trace code, as called from inside lockdep,
> > > > triggering tracing, which in turn calls RCU, which then uses
> > > > lockdep_assert_irqs_disabled().
> > > >
> > > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-
> > > > cpu
> > > > variables")
> > > > Reported-by: Steven Rostedt <[email protected]>
> > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > Signed-off-by: Ingo Molnar <[email protected]>
> > >
> > > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
> >
> > Is it possible that the RCU-list warnings were being wrongly suppressed
> > without a21ee6055c30? As in are you certain that these RCU-list warnings
> > are in fact false positives?
>
> I guess you mean this commit a046a86082cc ("lockdep: Fix lockdep recursion")
> instead of a21ee6055c30. It is unclear to me how that commit a046a86082cc would
> suddenly start to generate those warnings, although I can see it starts to use
> percpu variables even though the CPU is not yet set online.
>
> DECLARE_PER_CPU(unsigned int, lockdep_recursion);
>
> Anyway, the problem is that when we in the early boot:
>
> start_secondary()
> smp_init_secondary()
> init_cpu_timer()
> clockevents_register_device()
>
> We are taking a lock there but the CPU is not yet online, and the
> __lock_acquire() would call things like hlist_for_each_entry_rcu() from
> lookup_chain_cache() or register_lock_class(). Thus, triggering the RCU-list
> from an offline CPU warnings.
>
> I am not entirely sure how to fix those though.

One approach is to move the call to rcu_cpu_starting() earlier in the
start_secondary() processing. It is OK to invoke rcu_cpu_starting()
multiple times, so for experiemental purposes you should be able to add
a new call to it just before that lock is acquired.

Thanx, Paul

2020-10-09 16:26:12

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, Oct 09, 2020 at 06:58:37AM -0700, Paul E. McKenney wrote:
> On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > The following commit has been merged into the locking/core branch of tip:
> > >
> > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Gitweb:
> > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Author: Peter Zijlstra <[email protected]>
> > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > Committer: Ingo Molnar <[email protected]>
> > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > >
> > > lockdep: Fix lockdep recursion
> > >
> > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > itself, will trigger a false-positive.
> > >
> > > One example is the stack-trace code, as called from inside lockdep,
> > > triggering tracing, which in turn calls RCU, which then uses
> > > lockdep_assert_irqs_disabled().
> > >
> > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu
> > > variables")
> > > Reported-by: Steven Rostedt <[email protected]>
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > Signed-off-by: Ingo Molnar <[email protected]>
> >
> > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
>
> Is it possible that the RCU-list warnings were being wrongly suppressed
> without a21ee6055c30? As in are you certain that these RCU-list warnings
> are in fact false positives?

> > [ 4.002695][ T0] init_timer_key+0x29/0x220
> > [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> > [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> > [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> > [ 4.002695][ T0] start_secondary+0x5b/0x290
> > [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb


They're actually correct warnings, this is trying to use RCU before that
CPU is reported to RCU.

Possibly something like the below works, but I've not tested it, nor
have I really thought hard about it, bring up tricky and this is just
moving code.

---

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 35ad8480c464..9173d64ee69d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1670,6 +1670,9 @@ void __init identify_boot_cpu(void)
void identify_secondary_cpu(struct cpuinfo_x86 *c)
{
BUG_ON(c == &boot_cpu_data);
+
+ rcu_cpu_starting(smp_processor_id());
+
identify_cpu(c);
#ifdef CONFIG_X86_32
enable_sep_cpu();
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 6a80f36b5d59..5f436cb4f7c4 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -794,8 +794,6 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init)
return;

- rcu_cpu_starting(smp_processor_id());
-
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,

2020-10-09 16:39:32

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, Oct 09, 2020 at 06:23:52PM +0200, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 06:58:37AM -0700, Paul E. McKenney wrote:
> > On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > > The following commit has been merged into the locking/core branch of tip:
> > > >
> > > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Gitweb:
> > > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Author: Peter Zijlstra <[email protected]>
> > > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > > Committer: Ingo Molnar <[email protected]>
> > > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > > >
> > > > lockdep: Fix lockdep recursion
> > > >
> > > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > > itself, will trigger a false-positive.
> > > >
> > > > One example is the stack-trace code, as called from inside lockdep,
> > > > triggering tracing, which in turn calls RCU, which then uses
> > > > lockdep_assert_irqs_disabled().
> > > >
> > > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu
> > > > variables")
> > > > Reported-by: Steven Rostedt <[email protected]>
> > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > Signed-off-by: Ingo Molnar <[email protected]>
> > >
> > > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
> >
> > Is it possible that the RCU-list warnings were being wrongly suppressed
> > without a21ee6055c30? As in are you certain that these RCU-list warnings
> > are in fact false positives?
>
> > > [ 4.002695][ T0] init_timer_key+0x29/0x220
> > > [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> > > [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> > > [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> > > [ 4.002695][ T0] start_secondary+0x5b/0x290
> > > [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
>
>
> They're actually correct warnings, this is trying to use RCU before that
> CPU is reported to RCU.
>
> Possibly something like the below works, but I've not tested it, nor
> have I really thought hard about it, bring up tricky and this is just
> moving code.

Looks to me like a good thing to try! ;-)

Thanx, Paul

> ---
>
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 35ad8480c464..9173d64ee69d 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1670,6 +1670,9 @@ void __init identify_boot_cpu(void)
> void identify_secondary_cpu(struct cpuinfo_x86 *c)
> {
> BUG_ON(c == &boot_cpu_data);
> +
> + rcu_cpu_starting(smp_processor_id());
> +
> identify_cpu(c);
> #ifdef CONFIG_X86_32
> enable_sep_cpu();
> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
> index 6a80f36b5d59..5f436cb4f7c4 100644
> --- a/arch/x86/kernel/cpu/mtrr/mtrr.c
> +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
> @@ -794,8 +794,6 @@ void mtrr_ap_init(void)
> if (!use_intel() || mtrr_aps_delayed_init)
> return;
>
> - rcu_cpu_starting(smp_processor_id());
> -
> /*
> * Ideally we should hold mtrr_mutex here to avoid mtrr entries
> * changed, but this routine will be called in cpu boot time,

2020-10-09 18:41:06

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > The following commit has been merged into the locking/core branch of tip:
> >
> > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > Gitweb:
> > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > Author: Peter Zijlstra <[email protected]>
> > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > Committer: Ingo Molnar <[email protected]>
> > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> >
> > lockdep: Fix lockdep recursion
> >
> > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > itself, will trigger a false-positive.
> >
> > One example is the stack-trace code, as called from inside lockdep,
> > triggering tracing, which in turn calls RCU, which then uses
> > lockdep_assert_irqs_disabled().
> >
> > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu
> > variables")
> > Reported-by: Steven Rostedt <[email protected]>
> > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > Signed-off-by: Ingo Molnar <[email protected]>
>
> Reverting this linux-next commit fixed booting RCU-list warnings everywhere.

Is it possible that the RCU-list warnings were being wrongly suppressed
without a21ee6055c30? As in are you certain that these RCU-list warnings
are in fact false positives?

Thanx, Paul

> == x86 ==
> [ 8.101841][ T1] rcu: Hierarchical SRCU implementation.
> [ 8.110615][ T5] NMI watchdog: Enabled. Permanently consumes one hw-PMU counter.
> [ 8.153506][ T1] smp: Bringing up secondary CPUs ...
> [ 8.163075][ T1] x86: Booting SMP configuration:
> [ 8.167843][ T1] .... node #0, CPUs: #1
> [ 4.002695][ T0]
> [ 4.002695][ T0] =============================
> [ 4.002695][ T0] WARNING: suspicious RCU usage
> [ 4.002695][ T0] 5.9.0-rc8-next-20201009 #2 Not tainted
> [ 4.002695][ T0] -----------------------------
> [ 4.002695][ T0] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
> [ 4.002695][ T0]
> [ 4.002695][ T0] other info that might help us debug this:
> [ 4.002695][ T0]
> [ 4.002695][ T0]
> [ 4.002695][ T0] RCU used illegally from offline CPU!
> [ 4.002695][ T0] rcu_scheduler_active = 1, debug_locks = 1
> [ 4.002695][ T0] no locks held by swapper/1/0.
> [ 4.002695][ T0]
> [ 4.002695][ T0] stack backtrace:
> [ 4.002695][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-20201009 #2
> [ 4.002695][ T0] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
> [ 4.002695][ T0] Call Trace:
> [ 4.002695][ T0] dump_stack+0x99/0xcb
> [ 4.002695][ T0] __lock_acquire.cold.76+0x2ad/0x3e0
> lookup_chain_cache at kernel/locking/lockdep.c:3497
> (inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
> (inlined by) validate_chain at kernel/locking/lockdep.c:3572
> (inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
> [ 4.002695][ T0] ? lockdep_hardirqs_on_prepare+0x3d0/0x3d0
> [ 4.002695][ T0] lock_acquire+0x1c8/0x820
> lockdep_recursion_finish at kernel/locking/lockdep.c:435
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5444
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5407
> [ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
> [ 4.002695][ T0] ? memset+0x1f/0x40
> [ 4.002695][ T0] ? rcu_read_unlock+0x40/0x40
> [ 4.002695][ T0] ? mce_gather_info+0x170/0x170
> [ 4.002695][ T0] ? arch_freq_get_on_cpu+0x270/0x270
> [ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
> [ 4.002695][ T0] _raw_spin_lock_irqsave+0x30/0x50
> [ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
> [ 4.002695][ T0] __debug_object_init+0xb4/0xf50
> [ 4.002695][ T0] ? mce_amd_feature_init+0x80c/0xa70
> [ 4.002695][ T0] ? debug_object_fixup+0x30/0x30
> [ 4.002695][ T0] ? machine_check_poll+0x2d0/0x2d0
> [ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
> [ 4.002695][ T0] init_timer_key+0x29/0x220
> [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> [ 4.002695][ T0] start_secondary+0x5b/0x290
> [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
> [ 8.379508][ T1] #2
> [ 8.389728][ T1] #3
> [ 8.399901][ T1]
>
> == s390 ==
> 00: [ 1.539768] rcu: Hierarchical SRCU implementation.
> 00: [ 1.561622] smp: Bringing up secondary CPUs ...
> 00: [ 1.568677]
> 00: [ 1.568681] =============================
> 00: [ 1.568682] WARNING: suspicious RCU usage
> 00: [ 1.568688] 5.9.0-rc8-next-20201009 #2 Not tainted
> 00: [ 1.568688] -----------------------------
> 00: [ 1.568691] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reade
> 00: r section!!
> 00: [ 1.568692]
> 00: [ 1.568692] other info that might help us debug this:
> 00: [ 1.568692]
> 00: [ 1.568694]
> 00: [ 1.568694] RCU used illegally from offline CPU!
> 00: [ 1.568694] rcu_scheduler_active = 1, debug_locks = 1
> 00: [ 1.568697] no locks held by swapper/1/0.
> 00: [ 1.568697]
> 00: [ 1.568697] stack backtrace:
> 00: [ 1.568702] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-2020
> 00: 1009 #2
> 00: [ 1.568704] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> 00: [ 1.568706] Call Trace:
> 00: [ 1.568719] [<000000011fb85370>] show_stack+0x158/0x1f0
> 00: [ 1.568723] [<000000011fb90402>] dump_stack+0x1f2/0x238
> 00: [ 1.568730] [<000000011ebd89d8>] __lock_acquire+0x2640/0x4dd0
> lookup_chain_cache at kernel/locking/lockdep.c:3497
> (inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
> (inlined by) validate_chain at kernel/locking/lockdep.c:3572
> (inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
> 00: [ 1.568732] [<000000011ebdd230>] lock_acquire+0x3a8/0xd08
> lockdep_recursion_finish at kernel/locking/lockdep.c:435
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5444
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5407
> 00: [ 1.568738] [<000000011fbb5ca8>] _raw_spin_lock_irqsave+0xc0/0xf0
> __raw_spin_lock_irqsave at include/linux/spinlock_api_smp.h:117
> (inlined by) _raw_spin_lock_irqsave at kernel/locking/spinlock.c:159
> 00: [ 1.568745] [<000000011ec6e7e8>] clockevents_register_device+0xa8/0x528
> 00:
> 00: [ 1.568748] [<000000011ea55246>] init_cpu_timer+0x33e/0x468
> 00: [ 1.568754] [<000000011ea7f4d2>] smp_init_secondary+0x11a/0x328
> 00: [ 1.568757] [<000000011ea7f3b2>] smp_start_secondary+0x82/0x88
> smp_start_secondary at arch/s390/kernel/smp.c:892
> 00: [ 1.568759] no locks held by swapper/1/0.
> 00: [ 1.569956] smp: Brought up 1 node, 2 CPUs
>
> > ---
> > include/linux/lockdep.h | 13 +++--
> > kernel/locking/lockdep.c | 99 ++++++++++++++++++++++----------------
> > 2 files changed, 67 insertions(+), 45 deletions(-)
> >
> > diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> > index 6a584b3..b1227be 100644
> > --- a/include/linux/lockdep.h
> > +++ b/include/linux/lockdep.h
> > @@ -534,6 +534,7 @@ do {
> > \
> >
> > DECLARE_PER_CPU(int, hardirqs_enabled);
> > DECLARE_PER_CPU(int, hardirq_context);
> > +DECLARE_PER_CPU(unsigned int, lockdep_recursion);
> >
> > /*
> > * The below lockdep_assert_*() macros use raw_cpu_read() to access the above
> > @@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context);
> > * read the value from our previous CPU.
> > */
> >
> > +#define __lockdep_enabled (debug_locks &&
> > !raw_cpu_read(lockdep_recursion))
> > +
> > #define lockdep_assert_irqs_enabled()
> > \
> > do { \
> > - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \
> > + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \
> > } while (0)
> >
> > #define lockdep_assert_irqs_disabled()
> > \
> > do { \
> > - WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \
> > + WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \
> > } while (0)
> >
> > #define lockdep_assert_in_irq()
> > \
> > do { \
> > - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \
> > + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \
> > } while (0)
> >
> > #define lockdep_assert_preemption_enabled() \
> > do { \
> > WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> > - debug_locks && \
> > + __lockdep_enabled && \
> > (preempt_count() != 0 || \
> > !raw_cpu_read(hardirqs_enabled))); \
> > } while (0)
> > @@ -569,7 +572,7 @@ do {
> > \
> > #define lockdep_assert_preemption_disabled() \
> > do { \
> > WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> > - debug_locks && \
> > + __lockdep_enabled && \
> > (preempt_count() == 0 && \
> > raw_cpu_read(hardirqs_enabled))); \
> > } while (0)
> > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> > index a430fbb..85d15f0 100644
> > --- a/kernel/locking/lockdep.c
> > +++ b/kernel/locking/lockdep.c
> > @@ -76,6 +76,23 @@ module_param(lock_stat, int, 0644);
> > #define lock_stat 0
> > #endif
> >
> > +DEFINE_PER_CPU(unsigned int, lockdep_recursion);
> > +EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion);
> > +
> > +static inline bool lockdep_enabled(void)
> > +{
> > + if (!debug_locks)
> > + return false;
> > +
> > + if (raw_cpu_read(lockdep_recursion))
> > + return false;
> > +
> > + if (current->lockdep_recursion)
> > + return false;
> > +
> > + return true;
> > +}
> > +
> > /*
> > * lockdep_lock: protects the lockdep graph, the hashes and the
> > * class/list/hash allocators.
> > @@ -93,7 +110,7 @@ static inline void lockdep_lock(void)
> >
> > arch_spin_lock(&__lock);
> > __owner = current;
> > - current->lockdep_recursion++;
> > + __this_cpu_inc(lockdep_recursion);
> > }
> >
> > static inline void lockdep_unlock(void)
> > @@ -101,7 +118,7 @@ static inline void lockdep_unlock(void)
> > if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
> > return;
> >
> > - current->lockdep_recursion--;
> > + __this_cpu_dec(lockdep_recursion);
> > __owner = NULL;
> > arch_spin_unlock(&__lock);
> > }
> > @@ -393,10 +410,15 @@ void lockdep_init_task(struct task_struct *task)
> > task->lockdep_recursion = 0;
> > }
> >
> > +static __always_inline void lockdep_recursion_inc(void)
> > +{
> > + __this_cpu_inc(lockdep_recursion);
> > +}
> > +
> > static __always_inline void lockdep_recursion_finish(void)
> > {
> > - if (WARN_ON_ONCE((--current->lockdep_recursion) &
> > LOCKDEP_RECURSION_MASK))
> > - current->lockdep_recursion = 0;
> > + if (WARN_ON_ONCE(__this_cpu_dec_return(lockdep_recursion)))
> > + __this_cpu_write(lockdep_recursion, 0);
> > }
> >
> > void lockdep_set_selftest_task(struct task_struct *task)
> > @@ -3659,7 +3681,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> > if (unlikely(in_nmi()))
> > return;
> >
> > - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> > + if (unlikely(__this_cpu_read(lockdep_recursion)))
> > return;
> >
> > if (unlikely(lockdep_hardirqs_enabled())) {
> > @@ -3695,7 +3717,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> >
> > current->hardirq_chain_key = current->curr_chain_key;
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __trace_hardirqs_on_caller();
> > lockdep_recursion_finish();
> > }
> > @@ -3728,7 +3750,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
> > goto skip_checks;
> > }
> >
> > - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> > + if (unlikely(__this_cpu_read(lockdep_recursion)))
> > return;
> >
> > if (lockdep_hardirqs_enabled()) {
> > @@ -3781,7 +3803,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
> > if (in_nmi()) {
> > if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
> > return;
> > - } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> > + } else if (__this_cpu_read(lockdep_recursion))
> > return;
> >
> > /*
> > @@ -3814,7 +3836,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > {
> > struct irqtrace_events *trace = &current->irqtrace;
> >
> > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > /*
> > @@ -3829,7 +3851,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > return;
> > }
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > /*
> > * We'll do an OFF -> ON transition:
> > */
> > @@ -3852,7 +3874,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > */
> > void lockdep_softirqs_off(unsigned long ip)
> > {
> > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > /*
> > @@ -4233,11 +4255,11 @@ void lockdep_init_map_waits(struct lockdep_map *lock,
> > const char *name,
> > if (subclass) {
> > unsigned long flags;
> >
> > - if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
> > + if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > register_lock_class(lock, subclass, 1);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -4920,11 +4942,11 @@ void lock_set_class(struct lockdep_map *lock, const
> > char *name,
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > check_flags(flags);
> > if (__lock_set_class(lock, name, key, subclass, ip))
> > check_chain_key(current);
> > @@ -4937,11 +4959,11 @@ void lock_downgrade(struct lockdep_map *lock, unsigned
> > long ip)
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > check_flags(flags);
> > if (__lock_downgrade(lock, ip))
> > check_chain_key(current);
> > @@ -4979,7 +5001,7 @@ static void verify_lock_unused(struct lockdep_map *lock,
> > struct held_lock *hlock
> >
> > static bool lockdep_nmi(void)
> > {
> > - if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> > + if (raw_cpu_read(lockdep_recursion))
> > return false;
> >
> > if (!in_nmi())
> > @@ -5000,7 +5022,10 @@ void lock_acquire(struct lockdep_map *lock, unsigned
> > int subclass,
> >
> > trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
> >
> > - if (unlikely(current->lockdep_recursion)) {
> > + if (!debug_locks)
> > + return;
> > +
> > + if (unlikely(!lockdep_enabled())) {
> > /* XXX allow trylock from NMI ?!? */
> > if (lockdep_nmi() && !trylock) {
> > struct held_lock hlock;
> > @@ -5023,7 +5048,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int
> > subclass,
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_acquire(lock, subclass, trylock, read, check,
> > irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
> > lockdep_recursion_finish();
> > @@ -5037,13 +5062,13 @@ void lock_release(struct lockdep_map *lock, unsigned
> > long ip)
> >
> > trace_lock_release(lock, ip);
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > if (__lock_release(lock, ip))
> > check_chain_key(current);
> > lockdep_recursion_finish();
> > @@ -5056,13 +5081,13 @@ noinstr int lock_is_held_type(const struct lockdep_map
> > *lock, int read)
> > unsigned long flags;
> > int ret = 0;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return 1; /* avoid false negative lockdep_assert_held() */
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > ret = __lock_is_held(lock, read);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5077,13 +5102,13 @@ struct pin_cookie lock_pin_lock(struct lockdep_map
> > *lock)
> > struct pin_cookie cookie = NIL_COOKIE;
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return cookie;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > cookie = __lock_pin_lock(lock);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5096,13 +5121,13 @@ void lock_repin_lock(struct lockdep_map *lock, struct
> > pin_cookie cookie)
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_repin_lock(lock, cookie);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5113,13 +5138,13 @@ void lock_unpin_lock(struct lockdep_map *lock, struct
> > pin_cookie cookie)
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_unpin_lock(lock, cookie);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5249,15 +5274,12 @@ void lock_contended(struct lockdep_map *lock, unsigned
> > long ip)
> >
> > trace_lock_acquired(lock, ip);
> >
> > - if (unlikely(!lock_stat || !debug_locks))
> > - return;
> > -
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lock_stat || !lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_contended(lock, ip);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5270,15 +5292,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned
> > long ip)
> >
> > trace_lock_contended(lock, ip);
> >
> > - if (unlikely(!lock_stat || !debug_locks))
> > - return;
> > -
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lock_stat || !lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_acquired(lock, ip);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
>

2020-10-09 23:34:50

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, 2020-10-09 at 13:36 -0400, Qian Cai wrote:
> Back to x86, we have:
>
> start_secondary()
> smp_callin()
> apic_ap_setup()
> setup_local_APIC()
> printk() in certain conditions.
>
> which is before smp_store_cpu_info().
>
> Can't we add a rcu_cpu_starting() at the very top for each start_secondary(),
> secondary_start_kernel(), smp_start_secondary() etc, so we don't worry about
> any printk() later?

This is rather irony. rcu_cpu_starting() is taking a lock and then reports
itself.

[ 8.826732][ T0] __lock_acquire.cold.76+0x2ad/0x3e0
[ 8.826732][ T0] lock_acquire+0x1c8/0x820
[ 8.826732][ T0] _raw_spin_lock_irqsave+0x30/0x50
[ 8.826732][ T0] rcu_cpu_starting+0xd0/0x2c0
[ 8.826732][ T0] start_secondary+0x10/0x2a0
[ 8.826732][ T0] secondary_startup_64_no_verify+0xb8/0xbb


2020-10-09 23:34:50

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, Oct 09, 2020 at 01:36:47PM -0400, Qian Cai wrote:
> On Fri, 2020-10-09 at 18:23 +0200, Peter Zijlstra wrote:
> > On Fri, Oct 09, 2020 at 06:58:37AM -0700, Paul E. McKenney wrote:
> > > On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > > > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > > > The following commit has been merged into the locking/core branch of
> > > > > tip:
> > > > >
> > > > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > > Gitweb:
> > > > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > > Author: Peter Zijlstra <[email protected]>
> > > > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > > > Committer: Ingo Molnar <[email protected]>
> > > > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > > > >
> > > > > lockdep: Fix lockdep recursion
> > > > >
> > > > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > > > itself, will trigger a false-positive.
> > > > >
> > > > > One example is the stack-trace code, as called from inside lockdep,
> > > > > triggering tracing, which in turn calls RCU, which then uses
> > > > > lockdep_assert_irqs_disabled().
> > > > >
> > > > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to
> > > > > per-cpu
> > > > > variables")
> > > > > Reported-by: Steven Rostedt <[email protected]>
> > > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > > Signed-off-by: Ingo Molnar <[email protected]>
> > > >
> > > > Reverting this linux-next commit fixed booting RCU-list warnings
> > > > everywhere.
> > >
> > > Is it possible that the RCU-list warnings were being wrongly suppressed
> > > without a21ee6055c30? As in are you certain that these RCU-list warnings
> > > are in fact false positives?
> > > > [ 4.002695][ T0] init_timer_key+0x29/0x220
> > > > [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> > > > [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> > > > [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> > > > [ 4.002695][ T0] start_secondary+0x5b/0x290
> > > > [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
> >
> > They're actually correct warnings, this is trying to use RCU before that
> > CPU is reported to RCU.
> >
> > Possibly something like the below works, but I've not tested it, nor
> > have I really thought hard about it, bring up tricky and this is just
> > moving code.
>
> I don't think this will always work. Basically, anything like printk() would
> trigger the warning because it tries to acquire a lock. For example, on arm64:
>
> [ 0.418627] lockdep_rcu_suspicious+0x134/0x14c
> [ 0.418629] __lock_acquire+0x1c30/0x2600
> [ 0.418631] lock_acquire+0x274/0xc48
> [ 0.418632] _raw_spin_lock+0xc8/0x140
> [ 0.418634] vprintk_emit+0x90/0x3d0
> [ 0.418636] vprintk_default+0x34/0x40
> [ 0.418638] vprintk_func+0x378/0x590
> [ 0.418640] printk+0xa8/0xd4
> [ 0.418642] __cpuinfo_store_cpu+0x71c/0x868
> [ 0.418644] cpuinfo_store_cpu+0x2c/0xc8
> [ 0.418645] secondary_start_kernel+0x244/0x318
>
> Back to x86, we have:
>
> start_secondary()
> smp_callin()
> apic_ap_setup()
> setup_local_APIC()
> printk() in certain conditions.
>
> which is before smp_store_cpu_info().
>
> Can't we add a rcu_cpu_starting() at the very top for each start_secondary(),
> secondary_start_kernel(), smp_start_secondary() etc, so we don't worry about any
> printk() later?

I can give you a definite "I do not know". As Peter said, CPU bringup
is a tricky process.

But why not try it and see what happens?

Thanx, Paul

> > ---
> >
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index 35ad8480c464..9173d64ee69d 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -1670,6 +1670,9 @@ void __init identify_boot_cpu(void)
> > void identify_secondary_cpu(struct cpuinfo_x86 *c)
> > {
> > BUG_ON(c == &boot_cpu_data);
> > +
> > + rcu_cpu_starting(smp_processor_id());
> > +
> > identify_cpu(c);
> > #ifdef CONFIG_X86_32
> > enable_sep_cpu();
> > diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
> > index 6a80f36b5d59..5f436cb4f7c4 100644
> > --- a/arch/x86/kernel/cpu/mtrr/mtrr.c
> > +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
> > @@ -794,8 +794,6 @@ void mtrr_ap_init(void)
> > if (!use_intel() || mtrr_aps_delayed_init)
> > return;
> >
> > - rcu_cpu_starting(smp_processor_id());
> > -
> > /*
> > * Ideally we should hold mtrr_mutex here to avoid mtrr entries
> > * changed, but this routine will be called in cpu boot time,
> >
>

2020-10-10 00:13:08

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, 2020-10-09 at 18:23 +0200, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 06:58:37AM -0700, Paul E. McKenney wrote:
> > On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > > The following commit has been merged into the locking/core branch of
> > > > tip:
> > > >
> > > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Gitweb:
> > > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Author: Peter Zijlstra <[email protected]>
> > > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > > Committer: Ingo Molnar <[email protected]>
> > > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > > >
> > > > lockdep: Fix lockdep recursion
> > > >
> > > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > > itself, will trigger a false-positive.
> > > >
> > > > One example is the stack-trace code, as called from inside lockdep,
> > > > triggering tracing, which in turn calls RCU, which then uses
> > > > lockdep_assert_irqs_disabled().
> > > >
> > > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to
> > > > per-cpu
> > > > variables")
> > > > Reported-by: Steven Rostedt <[email protected]>
> > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > Signed-off-by: Ingo Molnar <[email protected]>
> > >
> > > Reverting this linux-next commit fixed booting RCU-list warnings
> > > everywhere.
> >
> > Is it possible that the RCU-list warnings were being wrongly suppressed
> > without a21ee6055c30? As in are you certain that these RCU-list warnings
> > are in fact false positives?
> > > [ 4.002695][ T0] init_timer_key+0x29/0x220
> > > [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> > > [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> > > [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> > > [ 4.002695][ T0] start_secondary+0x5b/0x290
> > > [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
>
> They're actually correct warnings, this is trying to use RCU before that
> CPU is reported to RCU.
>
> Possibly something like the below works, but I've not tested it, nor
> have I really thought hard about it, bring up tricky and this is just
> moving code.

I don't think this will always work. Basically, anything like printk() would
trigger the warning because it tries to acquire a lock. For example, on arm64:

[ 0.418627] lockdep_rcu_suspicious+0x134/0x14c
[ 0.418629] __lock_acquire+0x1c30/0x2600
[ 0.418631] lock_acquire+0x274/0xc48
[ 0.418632] _raw_spin_lock+0xc8/0x140
[ 0.418634] vprintk_emit+0x90/0x3d0
[ 0.418636] vprintk_default+0x34/0x40
[ 0.418638] vprintk_func+0x378/0x590
[ 0.418640] printk+0xa8/0xd4
[ 0.418642] __cpuinfo_store_cpu+0x71c/0x868
[ 0.418644] cpuinfo_store_cpu+0x2c/0xc8
[ 0.418645] secondary_start_kernel+0x244/0x318

Back to x86, we have:

start_secondary()
smp_callin()
apic_ap_setup()
setup_local_APIC()
printk() in certain conditions.

which is before smp_store_cpu_info().

Can't we add a rcu_cpu_starting() at the very top for each start_secondary(),
secondary_start_kernel(), smp_start_secondary() etc, so we don't worry about any
printk() later?

>
> ---
>
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 35ad8480c464..9173d64ee69d 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1670,6 +1670,9 @@ void __init identify_boot_cpu(void)
> void identify_secondary_cpu(struct cpuinfo_x86 *c)
> {
> BUG_ON(c == &boot_cpu_data);
> +
> + rcu_cpu_starting(smp_processor_id());
> +
> identify_cpu(c);
> #ifdef CONFIG_X86_32
> enable_sep_cpu();
> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
> index 6a80f36b5d59..5f436cb4f7c4 100644
> --- a/arch/x86/kernel/cpu/mtrr/mtrr.c
> +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
> @@ -794,8 +794,6 @@ void mtrr_ap_init(void)
> if (!use_intel() || mtrr_aps_delayed_init)
> return;
>
> - rcu_cpu_starting(smp_processor_id());
> -
> /*
> * Ideally we should hold mtrr_mutex here to avoid mtrr entries
> * changed, but this routine will be called in cpu boot time,
>

2020-10-10 00:23:18

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Fri, Oct 09, 2020 at 01:54:34PM -0400, Qian Cai wrote:
> On Fri, 2020-10-09 at 13:36 -0400, Qian Cai wrote:
> > Back to x86, we have:
> >
> > start_secondary()
> > smp_callin()
> > apic_ap_setup()
> > setup_local_APIC()
> > printk() in certain conditions.
> >
> > which is before smp_store_cpu_info().
> >
> > Can't we add a rcu_cpu_starting() at the very top for each start_secondary(),
> > secondary_start_kernel(), smp_start_secondary() etc, so we don't worry about
> > any printk() later?
>
> This is rather irony. rcu_cpu_starting() is taking a lock and then reports
> itself.
>
> [ 8.826732][ T0] __lock_acquire.cold.76+0x2ad/0x3e0
> [ 8.826732][ T0] lock_acquire+0x1c8/0x820
> [ 8.826732][ T0] _raw_spin_lock_irqsave+0x30/0x50
> [ 8.826732][ T0] rcu_cpu_starting+0xd0/0x2c0
> [ 8.826732][ T0] start_secondary+0x10/0x2a0
> [ 8.826732][ T0] secondary_startup_64_no_verify+0xb8/0xbb

Fun!!!

There should be some way around this. I cannot safely record the
offline-to-online transition without acquiring a lock. I suppose
I could trick lockdep into thinking that it was a recursive lockdep
report. Any other approaches?

Thanx, Paul

2020-10-12 04:42:41

by Boqun Feng

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

Hi,

On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > The following commit has been merged into the locking/core branch of tip:
> >
> > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > Gitweb:
> > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > Author: Peter Zijlstra <[email protected]>
> > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > Committer: Ingo Molnar <[email protected]>
> > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> >
> > lockdep: Fix lockdep recursion
> >
> > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > itself, will trigger a false-positive.
> >
> > One example is the stack-trace code, as called from inside lockdep,
> > triggering tracing, which in turn calls RCU, which then uses
> > lockdep_assert_irqs_disabled().
> >
> > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu
> > variables")
> > Reported-by: Steven Rostedt <[email protected]>
> > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > Signed-off-by: Ingo Molnar <[email protected]>
>
> Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
>

I think this happened because in this commit debug_lockdep_rcu_enabled()
didn't adopt to the change that made lockdep_recursion a percpu
variable?

Qian, mind to try the following?

Although, arguably the problem still exists, i.e. we still have an RCU
read-side critical section inside lock_acquire(), which may be called on
a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
because we don't "free" anything from lockdep, IOW, there is no
synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
read-side critical sections inside lockdep. But now we lock class
recycling, so it might be a problem.

That said, currently validate_chain() and lock class recycling are
mutually excluded via graph_lock, so we are safe for this one ;-)

----------->8
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39334d2d2b37..35d9bab65b75 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);

noinstr int notrace debug_lockdep_rcu_enabled(void)
{
- return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
- current->lockdep_recursion == 0;
+ return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
+ __lockdep_enabled;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);


> == x86 ==
> [ 8.101841][ T1] rcu: Hierarchical SRCU implementation.
> [ 8.110615][ T5] NMI watchdog: Enabled. Permanently consumes one hw-PMU counter.
> [ 8.153506][ T1] smp: Bringing up secondary CPUs ...
> [ 8.163075][ T1] x86: Booting SMP configuration:
> [ 8.167843][ T1] .... node #0, CPUs: #1
> [ 4.002695][ T0]
> [ 4.002695][ T0] =============================
> [ 4.002695][ T0] WARNING: suspicious RCU usage
> [ 4.002695][ T0] 5.9.0-rc8-next-20201009 #2 Not tainted
> [ 4.002695][ T0] -----------------------------
> [ 4.002695][ T0] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
> [ 4.002695][ T0]
> [ 4.002695][ T0] other info that might help us debug this:
> [ 4.002695][ T0]
> [ 4.002695][ T0]
> [ 4.002695][ T0] RCU used illegally from offline CPU!
> [ 4.002695][ T0] rcu_scheduler_active = 1, debug_locks = 1
> [ 4.002695][ T0] no locks held by swapper/1/0.
> [ 4.002695][ T0]
> [ 4.002695][ T0] stack backtrace:
> [ 4.002695][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-20201009 #2
> [ 4.002695][ T0] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
> [ 4.002695][ T0] Call Trace:
> [ 4.002695][ T0] dump_stack+0x99/0xcb
> [ 4.002695][ T0] __lock_acquire.cold.76+0x2ad/0x3e0
> lookup_chain_cache at kernel/locking/lockdep.c:3497
> (inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
> (inlined by) validate_chain at kernel/locking/lockdep.c:3572
> (inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
> [ 4.002695][ T0] ? lockdep_hardirqs_on_prepare+0x3d0/0x3d0
> [ 4.002695][ T0] lock_acquire+0x1c8/0x820
> lockdep_recursion_finish at kernel/locking/lockdep.c:435
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5444
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5407
> [ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
> [ 4.002695][ T0] ? memset+0x1f/0x40
> [ 4.002695][ T0] ? rcu_read_unlock+0x40/0x40
> [ 4.002695][ T0] ? mce_gather_info+0x170/0x170
> [ 4.002695][ T0] ? arch_freq_get_on_cpu+0x270/0x270
> [ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
> [ 4.002695][ T0] _raw_spin_lock_irqsave+0x30/0x50
> [ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
> [ 4.002695][ T0] __debug_object_init+0xb4/0xf50
> [ 4.002695][ T0] ? mce_amd_feature_init+0x80c/0xa70
> [ 4.002695][ T0] ? debug_object_fixup+0x30/0x30
> [ 4.002695][ T0] ? machine_check_poll+0x2d0/0x2d0
> [ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
> [ 4.002695][ T0] init_timer_key+0x29/0x220
> [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> [ 4.002695][ T0] start_secondary+0x5b/0x290
> [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
> [ 8.379508][ T1] #2
> [ 8.389728][ T1] #3
> [ 8.399901][ T1]
>
> == s390 ==
> 00: [ 1.539768] rcu: Hierarchical SRCU implementation.
> 00: [ 1.561622] smp: Bringing up secondary CPUs ...
> 00: [ 1.568677]
> 00: [ 1.568681] =============================
> 00: [ 1.568682] WARNING: suspicious RCU usage
> 00: [ 1.568688] 5.9.0-rc8-next-20201009 #2 Not tainted
> 00: [ 1.568688] -----------------------------
> 00: [ 1.568691] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reade
> 00: r section!!
> 00: [ 1.568692]
> 00: [ 1.568692] other info that might help us debug this:
> 00: [ 1.568692]
> 00: [ 1.568694]
> 00: [ 1.568694] RCU used illegally from offline CPU!
> 00: [ 1.568694] rcu_scheduler_active = 1, debug_locks = 1
> 00: [ 1.568697] no locks held by swapper/1/0.
> 00: [ 1.568697]
> 00: [ 1.568697] stack backtrace:
> 00: [ 1.568702] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-2020
> 00: 1009 #2
> 00: [ 1.568704] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> 00: [ 1.568706] Call Trace:
> 00: [ 1.568719] [<000000011fb85370>] show_stack+0x158/0x1f0
> 00: [ 1.568723] [<000000011fb90402>] dump_stack+0x1f2/0x238
> 00: [ 1.568730] [<000000011ebd89d8>] __lock_acquire+0x2640/0x4dd0
> lookup_chain_cache at kernel/locking/lockdep.c:3497
> (inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
> (inlined by) validate_chain at kernel/locking/lockdep.c:3572
> (inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
> 00: [ 1.568732] [<000000011ebdd230>] lock_acquire+0x3a8/0xd08
> lockdep_recursion_finish at kernel/locking/lockdep.c:435
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5444
> (inlined by) lock_acquire at kernel/locking/lockdep.c:5407
> 00: [ 1.568738] [<000000011fbb5ca8>] _raw_spin_lock_irqsave+0xc0/0xf0
> __raw_spin_lock_irqsave at include/linux/spinlock_api_smp.h:117
> (inlined by) _raw_spin_lock_irqsave at kernel/locking/spinlock.c:159
> 00: [ 1.568745] [<000000011ec6e7e8>] clockevents_register_device+0xa8/0x528
> 00:
> 00: [ 1.568748] [<000000011ea55246>] init_cpu_timer+0x33e/0x468
> 00: [ 1.568754] [<000000011ea7f4d2>] smp_init_secondary+0x11a/0x328
> 00: [ 1.568757] [<000000011ea7f3b2>] smp_start_secondary+0x82/0x88
> smp_start_secondary at arch/s390/kernel/smp.c:892
> 00: [ 1.568759] no locks held by swapper/1/0.
> 00: [ 1.569956] smp: Brought up 1 node, 2 CPUs
>
> > ---
> > include/linux/lockdep.h | 13 +++--
> > kernel/locking/lockdep.c | 99 ++++++++++++++++++++++----------------
> > 2 files changed, 67 insertions(+), 45 deletions(-)
> >
> > diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> > index 6a584b3..b1227be 100644
> > --- a/include/linux/lockdep.h
> > +++ b/include/linux/lockdep.h
> > @@ -534,6 +534,7 @@ do {
> > \
> >
> > DECLARE_PER_CPU(int, hardirqs_enabled);
> > DECLARE_PER_CPU(int, hardirq_context);
> > +DECLARE_PER_CPU(unsigned int, lockdep_recursion);
> >
> > /*
> > * The below lockdep_assert_*() macros use raw_cpu_read() to access the above
> > @@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context);
> > * read the value from our previous CPU.
> > */
> >
> > +#define __lockdep_enabled (debug_locks &&
> > !raw_cpu_read(lockdep_recursion))
> > +
> > #define lockdep_assert_irqs_enabled()
> > \
> > do { \
> > - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \
> > + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \
> > } while (0)
> >
> > #define lockdep_assert_irqs_disabled()
> > \
> > do { \
> > - WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \
> > + WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \
> > } while (0)
> >
> > #define lockdep_assert_in_irq()
> > \
> > do { \
> > - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \
> > + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \
> > } while (0)
> >
> > #define lockdep_assert_preemption_enabled() \
> > do { \
> > WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> > - debug_locks && \
> > + __lockdep_enabled && \
> > (preempt_count() != 0 || \
> > !raw_cpu_read(hardirqs_enabled))); \
> > } while (0)
> > @@ -569,7 +572,7 @@ do {
> > \
> > #define lockdep_assert_preemption_disabled() \
> > do { \
> > WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> > - debug_locks && \
> > + __lockdep_enabled && \
> > (preempt_count() == 0 && \
> > raw_cpu_read(hardirqs_enabled))); \
> > } while (0)
> > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> > index a430fbb..85d15f0 100644
> > --- a/kernel/locking/lockdep.c
> > +++ b/kernel/locking/lockdep.c
> > @@ -76,6 +76,23 @@ module_param(lock_stat, int, 0644);
> > #define lock_stat 0
> > #endif
> >
> > +DEFINE_PER_CPU(unsigned int, lockdep_recursion);
> > +EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion);
> > +
> > +static inline bool lockdep_enabled(void)
> > +{
> > + if (!debug_locks)
> > + return false;
> > +
> > + if (raw_cpu_read(lockdep_recursion))
> > + return false;
> > +
> > + if (current->lockdep_recursion)
> > + return false;
> > +
> > + return true;
> > +}
> > +
> > /*
> > * lockdep_lock: protects the lockdep graph, the hashes and the
> > * class/list/hash allocators.
> > @@ -93,7 +110,7 @@ static inline void lockdep_lock(void)
> >
> > arch_spin_lock(&__lock);
> > __owner = current;
> > - current->lockdep_recursion++;
> > + __this_cpu_inc(lockdep_recursion);
> > }
> >
> > static inline void lockdep_unlock(void)
> > @@ -101,7 +118,7 @@ static inline void lockdep_unlock(void)
> > if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
> > return;
> >
> > - current->lockdep_recursion--;
> > + __this_cpu_dec(lockdep_recursion);
> > __owner = NULL;
> > arch_spin_unlock(&__lock);
> > }
> > @@ -393,10 +410,15 @@ void lockdep_init_task(struct task_struct *task)
> > task->lockdep_recursion = 0;
> > }
> >
> > +static __always_inline void lockdep_recursion_inc(void)
> > +{
> > + __this_cpu_inc(lockdep_recursion);
> > +}
> > +
> > static __always_inline void lockdep_recursion_finish(void)
> > {
> > - if (WARN_ON_ONCE((--current->lockdep_recursion) &
> > LOCKDEP_RECURSION_MASK))
> > - current->lockdep_recursion = 0;
> > + if (WARN_ON_ONCE(__this_cpu_dec_return(lockdep_recursion)))
> > + __this_cpu_write(lockdep_recursion, 0);
> > }
> >
> > void lockdep_set_selftest_task(struct task_struct *task)
> > @@ -3659,7 +3681,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> > if (unlikely(in_nmi()))
> > return;
> >
> > - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> > + if (unlikely(__this_cpu_read(lockdep_recursion)))
> > return;
> >
> > if (unlikely(lockdep_hardirqs_enabled())) {
> > @@ -3695,7 +3717,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> >
> > current->hardirq_chain_key = current->curr_chain_key;
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __trace_hardirqs_on_caller();
> > lockdep_recursion_finish();
> > }
> > @@ -3728,7 +3750,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
> > goto skip_checks;
> > }
> >
> > - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> > + if (unlikely(__this_cpu_read(lockdep_recursion)))
> > return;
> >
> > if (lockdep_hardirqs_enabled()) {
> > @@ -3781,7 +3803,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
> > if (in_nmi()) {
> > if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
> > return;
> > - } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> > + } else if (__this_cpu_read(lockdep_recursion))
> > return;
> >
> > /*
> > @@ -3814,7 +3836,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > {
> > struct irqtrace_events *trace = &current->irqtrace;
> >
> > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > /*
> > @@ -3829,7 +3851,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > return;
> > }
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > /*
> > * We'll do an OFF -> ON transition:
> > */
> > @@ -3852,7 +3874,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > */
> > void lockdep_softirqs_off(unsigned long ip)
> > {
> > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > /*
> > @@ -4233,11 +4255,11 @@ void lockdep_init_map_waits(struct lockdep_map *lock,
> > const char *name,
> > if (subclass) {
> > unsigned long flags;
> >
> > - if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
> > + if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > register_lock_class(lock, subclass, 1);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -4920,11 +4942,11 @@ void lock_set_class(struct lockdep_map *lock, const
> > char *name,
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > check_flags(flags);
> > if (__lock_set_class(lock, name, key, subclass, ip))
> > check_chain_key(current);
> > @@ -4937,11 +4959,11 @@ void lock_downgrade(struct lockdep_map *lock, unsigned
> > long ip)
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > check_flags(flags);
> > if (__lock_downgrade(lock, ip))
> > check_chain_key(current);
> > @@ -4979,7 +5001,7 @@ static void verify_lock_unused(struct lockdep_map *lock,
> > struct held_lock *hlock
> >
> > static bool lockdep_nmi(void)
> > {
> > - if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> > + if (raw_cpu_read(lockdep_recursion))
> > return false;
> >
> > if (!in_nmi())
> > @@ -5000,7 +5022,10 @@ void lock_acquire(struct lockdep_map *lock, unsigned
> > int subclass,
> >
> > trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
> >
> > - if (unlikely(current->lockdep_recursion)) {
> > + if (!debug_locks)
> > + return;
> > +
> > + if (unlikely(!lockdep_enabled())) {
> > /* XXX allow trylock from NMI ?!? */
> > if (lockdep_nmi() && !trylock) {
> > struct held_lock hlock;
> > @@ -5023,7 +5048,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int
> > subclass,
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_acquire(lock, subclass, trylock, read, check,
> > irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
> > lockdep_recursion_finish();
> > @@ -5037,13 +5062,13 @@ void lock_release(struct lockdep_map *lock, unsigned
> > long ip)
> >
> > trace_lock_release(lock, ip);
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > if (__lock_release(lock, ip))
> > check_chain_key(current);
> > lockdep_recursion_finish();
> > @@ -5056,13 +5081,13 @@ noinstr int lock_is_held_type(const struct lockdep_map
> > *lock, int read)
> > unsigned long flags;
> > int ret = 0;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return 1; /* avoid false negative lockdep_assert_held() */
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > ret = __lock_is_held(lock, read);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5077,13 +5102,13 @@ struct pin_cookie lock_pin_lock(struct lockdep_map
> > *lock)
> > struct pin_cookie cookie = NIL_COOKIE;
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return cookie;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > cookie = __lock_pin_lock(lock);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5096,13 +5121,13 @@ void lock_repin_lock(struct lockdep_map *lock, struct
> > pin_cookie cookie)
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_repin_lock(lock, cookie);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5113,13 +5138,13 @@ void lock_unpin_lock(struct lockdep_map *lock, struct
> > pin_cookie cookie)
> > {
> > unsigned long flags;
> >
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> >
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_unpin_lock(lock, cookie);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5249,15 +5274,12 @@ void lock_contended(struct lockdep_map *lock, unsigned
> > long ip)
> >
> > trace_lock_acquired(lock, ip);
> >
> > - if (unlikely(!lock_stat || !debug_locks))
> > - return;
> > -
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lock_stat || !lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_contended(lock, ip);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
> > @@ -5270,15 +5292,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned
> > long ip)
> >
> > trace_lock_contended(lock, ip);
> >
> > - if (unlikely(!lock_stat || !debug_locks))
> > - return;
> > -
> > - if (unlikely(current->lockdep_recursion))
> > + if (unlikely(!lock_stat || !lockdep_enabled()))
> > return;
> >
> > raw_local_irq_save(flags);
> > check_flags(flags);
> > - current->lockdep_recursion++;
> > + lockdep_recursion_inc();
> > __lock_acquired(lock, ip);
> > lockdep_recursion_finish();
> > raw_local_irq_restore(flags);
>

2020-10-12 14:16:08

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Mon, 2020-10-12 at 11:11 +0800, Boqun Feng wrote:
> Hi,
>
> On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > The following commit has been merged into the locking/core branch of tip:
> > >
> > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Gitweb:
> > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Author: Peter Zijlstra <[email protected]>
> > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > Committer: Ingo Molnar <[email protected]>
> > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > >
> > > lockdep: Fix lockdep recursion
> > >
> > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > itself, will trigger a false-positive.
> > >
> > > One example is the stack-trace code, as called from inside lockdep,
> > > triggering tracing, which in turn calls RCU, which then uses
> > > lockdep_assert_irqs_disabled().
> > >
> > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-
> > > cpu
> > > variables")
> > > Reported-by: Steven Rostedt <[email protected]>
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > Signed-off-by: Ingo Molnar <[email protected]>
> >
> > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
> >
>
> I think this happened because in this commit debug_lockdep_rcu_enabled()
> didn't adopt to the change that made lockdep_recursion a percpu
> variable?
>
> Qian, mind to try the following?

Yes, it works fine.

>
> Although, arguably the problem still exists, i.e. we still have an RCU
> read-side critical section inside lock_acquire(), which may be called on
> a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
> because we don't "free" anything from lockdep, IOW, there is no
> synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
> read-side critical sections inside lockdep. But now we lock class
> recycling, so it might be a problem.
>
> That said, currently validate_chain() and lock class recycling are
> mutually excluded via graph_lock, so we are safe for this one ;-)
>
> ----------->8
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 39334d2d2b37..35d9bab65b75 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> - current->lockdep_recursion == 0;
> + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> + __lockdep_enabled;
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
>

2020-10-13 12:03:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 12:34:06PM +0200, Peter Zijlstra wrote:
> On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> > It is certainly an accident waiting to happen. Would something like
> > the following make sense?
>
> Sadly no.
>
> > ------------------------------------------------------------------------
> >
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index bfd38f2..52a63bc 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
> >
> > rnp = rdp->mynode;
> > mask = rdp->grpmask;
> > + lockdep_off();
> > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> > newcpu = !(rnp->expmaskinitnext & mask);
> > @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > } else {
> > raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > }
> > + lockdep_on();
> > smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> > }
>
> This will just shut it up, but will not fix the actual problem of that
> spin-lock ending up in trace_lock_acquire() which relies on RCU which
> isn't looking.
>
> What we need here is to supress tracing not lockdep. Let me consider.

We appear to have a similar problem with rcu_report_dead(), it's
raw_spin_unlock()s can end up in trace_lock_release() while we just
killed RCU.


2020-10-13 12:04:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> It is certainly an accident waiting to happen. Would something like
> the following make sense?

Sadly no.

> ------------------------------------------------------------------------
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index bfd38f2..52a63bc 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
>
> rnp = rdp->mynode;
> mask = rdp->grpmask;
> + lockdep_off();
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
> WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> newcpu = !(rnp->expmaskinitnext & mask);
> @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> } else {
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> }
> + lockdep_on();
> smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> }

This will just shut it up, but will not fix the actual problem of that
spin-lock ending up in trace_lock_acquire() which relies on RCU which
isn't looking.

What we need here is to supress tracing not lockdep. Let me consider.

2020-10-13 12:05:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Mon, Oct 12, 2020 at 11:11:10AM +0800, Boqun Feng wrote:

> I think this happened because in this commit debug_lockdep_rcu_enabled()
> didn't adopt to the change that made lockdep_recursion a percpu
> variable?
>
> Qian, mind to try the following?
>
> Although, arguably the problem still exists, i.e. we still have an RCU
> read-side critical section inside lock_acquire(), which may be called on

There is actual RCU usage from the trace_lock_acquire().

> a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
> because we don't "free" anything from lockdep, IOW, there is no
> synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
> read-side critical sections inside lockdep. But now we lock class
> recycling, so it might be a problem.
>
> That said, currently validate_chain() and lock class recycling are
> mutually excluded via graph_lock, so we are safe for this one ;-)

We should have a comment on that somewhere, could you write one?

> ----------->8
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 39334d2d2b37..35d9bab65b75 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> - current->lockdep_recursion == 0;
> + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> + __lockdep_enabled;
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);

Urgh, I didn't expect (and forgot to grep) lockdep_recursion users
outside of lockdep itself :/ It looks like this is indeed the only one.


2020-10-13 12:05:57

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 12:44:50PM +0200, Peter Zijlstra wrote:
> On Tue, Oct 13, 2020 at 12:34:06PM +0200, Peter Zijlstra wrote:
> > On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> > > It is certainly an accident waiting to happen. Would something like
> > > the following make sense?
> >
> > Sadly no.
> >
> > > ------------------------------------------------------------------------
> > >
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index bfd38f2..52a63bc 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > >
> > > rnp = rdp->mynode;
> > > mask = rdp->grpmask;
> > > + lockdep_off();
> > > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > > WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> > > newcpu = !(rnp->expmaskinitnext & mask);
> > > @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > } else {
> > > raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > > }
> > > + lockdep_on();
> > > smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> > > }
> >
> > This will just shut it up, but will not fix the actual problem of that
> > spin-lock ending up in trace_lock_acquire() which relies on RCU which
> > isn't looking.
> >
> > What we need here is to supress tracing not lockdep. Let me consider.
>
> We appear to have a similar problem with rcu_report_dead(), it's
> raw_spin_unlock()s can end up in trace_lock_release() while we just
> killed RCU.

So we can deal with the explicit trace_*() calls like the below, but I
really don't like it much. It also doesn't help with function tracing.
This is really early/late in the hotplug cycle and should be considered
entry, we shouldn't be tracing anything here.

Paul, would it be possible to use a scheme similar to IRQ/NMI for
hotplug? That seems to mostly rely on atomic ops, not locks.

---
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d05db575f60f..22e3a3523ad3 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -159,7 +159,7 @@ extern void lockdep_init_task(struct task_struct *task);
*/
#define LOCKDEP_RECURSION_BITS 16
#define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS)
-#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1)
+#define LOCKDEP_TRACE_MASK (LOCKDEP_OFF - 1)

/*
* lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due
@@ -176,6 +176,16 @@ do { \
current->lockdep_recursion -= LOCKDEP_OFF; \
} while (0)

+#define lockdep_trace_off() \
+do { \
+ current->lockdep_recursion++; \
+} while (0)
+
+#define lockdep_trace_on() \
+do { \
+ current->lockdep_recursion-- \
+} while (0)
+
extern void lockdep_register_key(struct lock_class_key *key);
extern void lockdep_unregister_key(struct lock_class_key *key);

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 3e99dfef8408..2df98abee82e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -87,7 +87,7 @@ static inline bool lockdep_enabled(void)
if (raw_cpu_read(lockdep_recursion))
return false;

- if (current->lockdep_recursion)
+ if (current->lockdep_recursion >> LOCKDEP_RECURSION_BITS)
return false;

return true;
@@ -5410,7 +5410,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
{
unsigned long flags;

- trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
+ if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
+ trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);

if (!debug_locks)
return;
@@ -5450,7 +5451,8 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;

- trace_lock_release(lock, ip);
+ if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
+ trace_lock_release(lock, ip);

if (unlikely(!lockdep_enabled()))
return;
@@ -5662,7 +5664,8 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;

- trace_lock_acquired(lock, ip);
+ if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
+ trace_lock_acquired(lock, ip);

if (unlikely(!lock_stat || !lockdep_enabled()))
return;
@@ -5680,7 +5683,8 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;

- trace_lock_contended(lock, ip);
+ if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
+ trace_lock_contended(lock, ip);

if (unlikely(!lock_stat || !lockdep_enabled()))
return;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index edeabc232c21..dbd56603fc0a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4047,6 +4047,11 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
+
+ /*
+ * Lockdep will call tracing, which requires RCU, but RCU isn't on yet.
+ */
+ lockdep_trace_off();
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4064,6 +4069,7 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ lockdep_trace_on();
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}

@@ -4091,6 +4097,11 @@ void rcu_report_dead(unsigned int cpu)

/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
+
+ /*
+ * Lockdep will call tracing, which requires RCU, but we're switching RCU off.
+ */
+ lockdep_trace_off();
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4101,8 +4112,10 @@ void rcu_report_dead(unsigned int cpu)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
+ /* RCU is off, locks must not call into tracing */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
+ lockdep_trace_on();

rdp->cpu_started = false;
}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39334d2d2b37..403b138f7cd4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);

noinstr int notrace debug_lockdep_rcu_enabled(void)
{
- return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
- current->lockdep_recursion == 0;
+ return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && __lockdep_enabled;
+
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);

2020-10-13 14:18:50

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Mon, Oct 12, 2020 at 11:11:10AM +0800, Boqun Feng wrote:
> Hi,
>
> On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > The following commit has been merged into the locking/core branch of tip:
> > >
> > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Gitweb:
> > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Author: Peter Zijlstra <[email protected]>
> > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > Committer: Ingo Molnar <[email protected]>
> > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > >
> > > lockdep: Fix lockdep recursion
> > >
> > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > itself, will trigger a false-positive.
> > >
> > > One example is the stack-trace code, as called from inside lockdep,
> > > triggering tracing, which in turn calls RCU, which then uses
> > > lockdep_assert_irqs_disabled().
> > >
> > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu
> > > variables")
> > > Reported-by: Steven Rostedt <[email protected]>
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > Signed-off-by: Ingo Molnar <[email protected]>
> >
> > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
> >
>
> I think this happened because in this commit debug_lockdep_rcu_enabled()
> didn't adopt to the change that made lockdep_recursion a percpu
> variable?
>
> Qian, mind to try the following?
>
> Although, arguably the problem still exists, i.e. we still have an RCU
> read-side critical section inside lock_acquire(), which may be called on
> a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
> because we don't "free" anything from lockdep, IOW, there is no
> synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
> read-side critical sections inside lockdep. But now we lock class
> recycling, so it might be a problem.

It is certainly an accident waiting to happen. Would something like
the following make sense?

Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index bfd38f2..52a63bc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
+ lockdep_off();
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ lockdep_on();
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}


> That said, currently validate_chain() and lock class recycling are
> mutually excluded via graph_lock, so we are safe for this one ;-)
>
> ----------->8
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 39334d2d2b37..35d9bab65b75 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> - current->lockdep_recursion == 0;
> + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> + __lockdep_enabled;
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
>
>
> > == x86 ==
> > [ 8.101841][ T1] rcu: Hierarchical SRCU implementation.
> > [ 8.110615][ T5] NMI watchdog: Enabled. Permanently consumes one hw-PMU counter.
> > [ 8.153506][ T1] smp: Bringing up secondary CPUs ...
> > [ 8.163075][ T1] x86: Booting SMP configuration:
> > [ 8.167843][ T1] .... node #0, CPUs: #1
> > [ 4.002695][ T0]
> > [ 4.002695][ T0] =============================
> > [ 4.002695][ T0] WARNING: suspicious RCU usage
> > [ 4.002695][ T0] 5.9.0-rc8-next-20201009 #2 Not tainted
> > [ 4.002695][ T0] -----------------------------
> > [ 4.002695][ T0] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
> > [ 4.002695][ T0]
> > [ 4.002695][ T0] other info that might help us debug this:
> > [ 4.002695][ T0]
> > [ 4.002695][ T0]
> > [ 4.002695][ T0] RCU used illegally from offline CPU!
> > [ 4.002695][ T0] rcu_scheduler_active = 1, debug_locks = 1
> > [ 4.002695][ T0] no locks held by swapper/1/0.
> > [ 4.002695][ T0]
> > [ 4.002695][ T0] stack backtrace:
> > [ 4.002695][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-20201009 #2
> > [ 4.002695][ T0] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
> > [ 4.002695][ T0] Call Trace:
> > [ 4.002695][ T0] dump_stack+0x99/0xcb
> > [ 4.002695][ T0] __lock_acquire.cold.76+0x2ad/0x3e0
> > lookup_chain_cache at kernel/locking/lockdep.c:3497
> > (inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
> > (inlined by) validate_chain at kernel/locking/lockdep.c:3572
> > (inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
> > [ 4.002695][ T0] ? lockdep_hardirqs_on_prepare+0x3d0/0x3d0
> > [ 4.002695][ T0] lock_acquire+0x1c8/0x820
> > lockdep_recursion_finish at kernel/locking/lockdep.c:435
> > (inlined by) lock_acquire at kernel/locking/lockdep.c:5444
> > (inlined by) lock_acquire at kernel/locking/lockdep.c:5407
> > [ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
> > [ 4.002695][ T0] ? memset+0x1f/0x40
> > [ 4.002695][ T0] ? rcu_read_unlock+0x40/0x40
> > [ 4.002695][ T0] ? mce_gather_info+0x170/0x170
> > [ 4.002695][ T0] ? arch_freq_get_on_cpu+0x270/0x270
> > [ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
> > [ 4.002695][ T0] _raw_spin_lock_irqsave+0x30/0x50
> > [ 4.002695][ T0] ? __debug_object_init+0xb4/0xf50
> > [ 4.002695][ T0] __debug_object_init+0xb4/0xf50
> > [ 4.002695][ T0] ? mce_amd_feature_init+0x80c/0xa70
> > [ 4.002695][ T0] ? debug_object_fixup+0x30/0x30
> > [ 4.002695][ T0] ? machine_check_poll+0x2d0/0x2d0
> > [ 4.002695][ T0] ? mce_cpu_restart+0x40/0x40
> > [ 4.002695][ T0] init_timer_key+0x29/0x220
> > [ 4.002695][ T0] identify_cpu+0xfcb/0x1980
> > [ 4.002695][ T0] identify_secondary_cpu+0x1d/0x190
> > [ 4.002695][ T0] smp_store_cpu_info+0x167/0x1f0
> > [ 4.002695][ T0] start_secondary+0x5b/0x290
> > [ 4.002695][ T0] secondary_startup_64_no_verify+0xb8/0xbb
> > [ 8.379508][ T1] #2
> > [ 8.389728][ T1] #3
> > [ 8.399901][ T1]
> >
> > == s390 ==
> > 00: [ 1.539768] rcu: Hierarchical SRCU implementation.
> > 00: [ 1.561622] smp: Bringing up secondary CPUs ...
> > 00: [ 1.568677]
> > 00: [ 1.568681] =============================
> > 00: [ 1.568682] WARNING: suspicious RCU usage
> > 00: [ 1.568688] 5.9.0-rc8-next-20201009 #2 Not tainted
> > 00: [ 1.568688] -----------------------------
> > 00: [ 1.568691] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reade
> > 00: r section!!
> > 00: [ 1.568692]
> > 00: [ 1.568692] other info that might help us debug this:
> > 00: [ 1.568692]
> > 00: [ 1.568694]
> > 00: [ 1.568694] RCU used illegally from offline CPU!
> > 00: [ 1.568694] rcu_scheduler_active = 1, debug_locks = 1
> > 00: [ 1.568697] no locks held by swapper/1/0.
> > 00: [ 1.568697]
> > 00: [ 1.568697] stack backtrace:
> > 00: [ 1.568702] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc8-next-2020
> > 00: 1009 #2
> > 00: [ 1.568704] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> > 00: [ 1.568706] Call Trace:
> > 00: [ 1.568719] [<000000011fb85370>] show_stack+0x158/0x1f0
> > 00: [ 1.568723] [<000000011fb90402>] dump_stack+0x1f2/0x238
> > 00: [ 1.568730] [<000000011ebd89d8>] __lock_acquire+0x2640/0x4dd0
> > lookup_chain_cache at kernel/locking/lockdep.c:3497
> > (inlined by) lookup_chain_cache_add at kernel/locking/lockdep.c:3517
> > (inlined by) validate_chain at kernel/locking/lockdep.c:3572
> > (inlined by) __lock_acquire at kernel/locking/lockdep.c:4837
> > 00: [ 1.568732] [<000000011ebdd230>] lock_acquire+0x3a8/0xd08
> > lockdep_recursion_finish at kernel/locking/lockdep.c:435
> > (inlined by) lock_acquire at kernel/locking/lockdep.c:5444
> > (inlined by) lock_acquire at kernel/locking/lockdep.c:5407
> > 00: [ 1.568738] [<000000011fbb5ca8>] _raw_spin_lock_irqsave+0xc0/0xf0
> > __raw_spin_lock_irqsave at include/linux/spinlock_api_smp.h:117
> > (inlined by) _raw_spin_lock_irqsave at kernel/locking/spinlock.c:159
> > 00: [ 1.568745] [<000000011ec6e7e8>] clockevents_register_device+0xa8/0x528
> > 00:
> > 00: [ 1.568748] [<000000011ea55246>] init_cpu_timer+0x33e/0x468
> > 00: [ 1.568754] [<000000011ea7f4d2>] smp_init_secondary+0x11a/0x328
> > 00: [ 1.568757] [<000000011ea7f3b2>] smp_start_secondary+0x82/0x88
> > smp_start_secondary at arch/s390/kernel/smp.c:892
> > 00: [ 1.568759] no locks held by swapper/1/0.
> > 00: [ 1.569956] smp: Brought up 1 node, 2 CPUs
> >
> > > ---
> > > include/linux/lockdep.h | 13 +++--
> > > kernel/locking/lockdep.c | 99 ++++++++++++++++++++++----------------
> > > 2 files changed, 67 insertions(+), 45 deletions(-)
> > >
> > > diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> > > index 6a584b3..b1227be 100644
> > > --- a/include/linux/lockdep.h
> > > +++ b/include/linux/lockdep.h
> > > @@ -534,6 +534,7 @@ do {
> > > \
> > >
> > > DECLARE_PER_CPU(int, hardirqs_enabled);
> > > DECLARE_PER_CPU(int, hardirq_context);
> > > +DECLARE_PER_CPU(unsigned int, lockdep_recursion);
> > >
> > > /*
> > > * The below lockdep_assert_*() macros use raw_cpu_read() to access the above
> > > @@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context);
> > > * read the value from our previous CPU.
> > > */
> > >
> > > +#define __lockdep_enabled (debug_locks &&
> > > !raw_cpu_read(lockdep_recursion))
> > > +
> > > #define lockdep_assert_irqs_enabled()
> > > \
> > > do { \
> > > - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \
> > > + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \
> > > } while (0)
> > >
> > > #define lockdep_assert_irqs_disabled()
> > > \
> > > do { \
> > > - WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \
> > > + WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \
> > > } while (0)
> > >
> > > #define lockdep_assert_in_irq()
> > > \
> > > do { \
> > > - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \
> > > + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \
> > > } while (0)
> > >
> > > #define lockdep_assert_preemption_enabled() \
> > > do { \
> > > WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> > > - debug_locks && \
> > > + __lockdep_enabled && \
> > > (preempt_count() != 0 || \
> > > !raw_cpu_read(hardirqs_enabled))); \
> > > } while (0)
> > > @@ -569,7 +572,7 @@ do {
> > > \
> > > #define lockdep_assert_preemption_disabled() \
> > > do { \
> > > WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
> > > - debug_locks && \
> > > + __lockdep_enabled && \
> > > (preempt_count() == 0 && \
> > > raw_cpu_read(hardirqs_enabled))); \
> > > } while (0)
> > > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> > > index a430fbb..85d15f0 100644
> > > --- a/kernel/locking/lockdep.c
> > > +++ b/kernel/locking/lockdep.c
> > > @@ -76,6 +76,23 @@ module_param(lock_stat, int, 0644);
> > > #define lock_stat 0
> > > #endif
> > >
> > > +DEFINE_PER_CPU(unsigned int, lockdep_recursion);
> > > +EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion);
> > > +
> > > +static inline bool lockdep_enabled(void)
> > > +{
> > > + if (!debug_locks)
> > > + return false;
> > > +
> > > + if (raw_cpu_read(lockdep_recursion))
> > > + return false;
> > > +
> > > + if (current->lockdep_recursion)
> > > + return false;
> > > +
> > > + return true;
> > > +}
> > > +
> > > /*
> > > * lockdep_lock: protects the lockdep graph, the hashes and the
> > > * class/list/hash allocators.
> > > @@ -93,7 +110,7 @@ static inline void lockdep_lock(void)
> > >
> > > arch_spin_lock(&__lock);
> > > __owner = current;
> > > - current->lockdep_recursion++;
> > > + __this_cpu_inc(lockdep_recursion);
> > > }
> > >
> > > static inline void lockdep_unlock(void)
> > > @@ -101,7 +118,7 @@ static inline void lockdep_unlock(void)
> > > if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
> > > return;
> > >
> > > - current->lockdep_recursion--;
> > > + __this_cpu_dec(lockdep_recursion);
> > > __owner = NULL;
> > > arch_spin_unlock(&__lock);
> > > }
> > > @@ -393,10 +410,15 @@ void lockdep_init_task(struct task_struct *task)
> > > task->lockdep_recursion = 0;
> > > }
> > >
> > > +static __always_inline void lockdep_recursion_inc(void)
> > > +{
> > > + __this_cpu_inc(lockdep_recursion);
> > > +}
> > > +
> > > static __always_inline void lockdep_recursion_finish(void)
> > > {
> > > - if (WARN_ON_ONCE((--current->lockdep_recursion) &
> > > LOCKDEP_RECURSION_MASK))
> > > - current->lockdep_recursion = 0;
> > > + if (WARN_ON_ONCE(__this_cpu_dec_return(lockdep_recursion)))
> > > + __this_cpu_write(lockdep_recursion, 0);
> > > }
> > >
> > > void lockdep_set_selftest_task(struct task_struct *task)
> > > @@ -3659,7 +3681,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> > > if (unlikely(in_nmi()))
> > > return;
> > >
> > > - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> > > + if (unlikely(__this_cpu_read(lockdep_recursion)))
> > > return;
> > >
> > > if (unlikely(lockdep_hardirqs_enabled())) {
> > > @@ -3695,7 +3717,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> > >
> > > current->hardirq_chain_key = current->curr_chain_key;
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > __trace_hardirqs_on_caller();
> > > lockdep_recursion_finish();
> > > }
> > > @@ -3728,7 +3750,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
> > > goto skip_checks;
> > > }
> > >
> > > - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
> > > + if (unlikely(__this_cpu_read(lockdep_recursion)))
> > > return;
> > >
> > > if (lockdep_hardirqs_enabled()) {
> > > @@ -3781,7 +3803,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
> > > if (in_nmi()) {
> > > if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI))
> > > return;
> > > - } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> > > + } else if (__this_cpu_read(lockdep_recursion))
> > > return;
> > >
> > > /*
> > > @@ -3814,7 +3836,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > > {
> > > struct irqtrace_events *trace = &current->irqtrace;
> > >
> > > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > /*
> > > @@ -3829,7 +3851,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > > return;
> > > }
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > /*
> > > * We'll do an OFF -> ON transition:
> > > */
> > > @@ -3852,7 +3874,7 @@ void lockdep_softirqs_on(unsigned long ip)
> > > */
> > > void lockdep_softirqs_off(unsigned long ip)
> > > {
> > > - if (unlikely(!debug_locks || current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > /*
> > > @@ -4233,11 +4255,11 @@ void lockdep_init_map_waits(struct lockdep_map *lock,
> > > const char *name,
> > > if (subclass) {
> > > unsigned long flags;
> > >
> > > - if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
> > > + if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > register_lock_class(lock, subclass, 1);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> > > @@ -4920,11 +4942,11 @@ void lock_set_class(struct lockdep_map *lock, const
> > > char *name,
> > > {
> > > unsigned long flags;
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > check_flags(flags);
> > > if (__lock_set_class(lock, name, key, subclass, ip))
> > > check_chain_key(current);
> > > @@ -4937,11 +4959,11 @@ void lock_downgrade(struct lockdep_map *lock, unsigned
> > > long ip)
> > > {
> > > unsigned long flags;
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > check_flags(flags);
> > > if (__lock_downgrade(lock, ip))
> > > check_chain_key(current);
> > > @@ -4979,7 +5001,7 @@ static void verify_lock_unused(struct lockdep_map *lock,
> > > struct held_lock *hlock
> > >
> > > static bool lockdep_nmi(void)
> > > {
> > > - if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK)
> > > + if (raw_cpu_read(lockdep_recursion))
> > > return false;
> > >
> > > if (!in_nmi())
> > > @@ -5000,7 +5022,10 @@ void lock_acquire(struct lockdep_map *lock, unsigned
> > > int subclass,
> > >
> > > trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
> > >
> > > - if (unlikely(current->lockdep_recursion)) {
> > > + if (!debug_locks)
> > > + return;
> > > +
> > > + if (unlikely(!lockdep_enabled())) {
> > > /* XXX allow trylock from NMI ?!? */
> > > if (lockdep_nmi() && !trylock) {
> > > struct held_lock hlock;
> > > @@ -5023,7 +5048,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int
> > > subclass,
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > __lock_acquire(lock, subclass, trylock, read, check,
> > > irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
> > > lockdep_recursion_finish();
> > > @@ -5037,13 +5062,13 @@ void lock_release(struct lockdep_map *lock, unsigned
> > > long ip)
> > >
> > > trace_lock_release(lock, ip);
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > if (__lock_release(lock, ip))
> > > check_chain_key(current);
> > > lockdep_recursion_finish();
> > > @@ -5056,13 +5081,13 @@ noinstr int lock_is_held_type(const struct lockdep_map
> > > *lock, int read)
> > > unsigned long flags;
> > > int ret = 0;
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return 1; /* avoid false negative lockdep_assert_held() */
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > ret = __lock_is_held(lock, read);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> > > @@ -5077,13 +5102,13 @@ struct pin_cookie lock_pin_lock(struct lockdep_map
> > > *lock)
> > > struct pin_cookie cookie = NIL_COOKIE;
> > > unsigned long flags;
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return cookie;
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > cookie = __lock_pin_lock(lock);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> > > @@ -5096,13 +5121,13 @@ void lock_repin_lock(struct lockdep_map *lock, struct
> > > pin_cookie cookie)
> > > {
> > > unsigned long flags;
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > __lock_repin_lock(lock, cookie);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> > > @@ -5113,13 +5138,13 @@ void lock_unpin_lock(struct lockdep_map *lock, struct
> > > pin_cookie cookie)
> > > {
> > > unsigned long flags;
> > >
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > >
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > __lock_unpin_lock(lock, cookie);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> > > @@ -5249,15 +5274,12 @@ void lock_contended(struct lockdep_map *lock, unsigned
> > > long ip)
> > >
> > > trace_lock_acquired(lock, ip);
> > >
> > > - if (unlikely(!lock_stat || !debug_locks))
> > > - return;
> > > -
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lock_stat || !lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > __lock_contended(lock, ip);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> > > @@ -5270,15 +5292,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned
> > > long ip)
> > >
> > > trace_lock_contended(lock, ip);
> > >
> > > - if (unlikely(!lock_stat || !debug_locks))
> > > - return;
> > > -
> > > - if (unlikely(current->lockdep_recursion))
> > > + if (unlikely(!lock_stat || !lockdep_enabled()))
> > > return;
> > >
> > > raw_local_irq_save(flags);
> > > check_flags(flags);
> > > - current->lockdep_recursion++;
> > > + lockdep_recursion_inc();
> > > __lock_acquired(lock, ip);
> > > lockdep_recursion_finish();
> > > raw_local_irq_restore(flags);
> >

2020-10-13 18:43:51

by Boqun Feng

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 12:27:15PM +0200, Peter Zijlstra wrote:
> On Mon, Oct 12, 2020 at 11:11:10AM +0800, Boqun Feng wrote:
>
> > I think this happened because in this commit debug_lockdep_rcu_enabled()
> > didn't adopt to the change that made lockdep_recursion a percpu
> > variable?
> >
> > Qian, mind to try the following?
> >
> > Although, arguably the problem still exists, i.e. we still have an RCU
> > read-side critical section inside lock_acquire(), which may be called on
>
> There is actual RCU usage from the trace_lock_acquire().
>
> > a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
> > because we don't "free" anything from lockdep, IOW, there is no
> > synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
> > read-side critical sections inside lockdep. But now we lock class
> > recycling, so it might be a problem.
> >
> > That said, currently validate_chain() and lock class recycling are
> > mutually excluded via graph_lock, so we are safe for this one ;-)
>
> We should have a comment on that somewhere, could you write one?
>

Sure, I will write something tomorrow.

Regards,
Boqun

> > ----------->8
> > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > index 39334d2d2b37..35d9bab65b75 100644
> > --- a/kernel/rcu/update.c
> > +++ b/kernel/rcu/update.c
> > @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
> >
> > noinstr int notrace debug_lockdep_rcu_enabled(void)
> > {
> > - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> > - current->lockdep_recursion == 0;
> > + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> > + __lockdep_enabled;
> > }
> > EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
>
> Urgh, I didn't expect (and forgot to grep) lockdep_recursion users
> outside of lockdep itself :/ It looks like this is indeed the only one.
>
>

2020-10-13 18:44:31

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 01:25:44PM +0200, Peter Zijlstra wrote:
> On Tue, Oct 13, 2020 at 12:44:50PM +0200, Peter Zijlstra wrote:
> > On Tue, Oct 13, 2020 at 12:34:06PM +0200, Peter Zijlstra wrote:
> > > On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> > > > It is certainly an accident waiting to happen. Would something like
> > > > the following make sense?
> > >
> > > Sadly no.
> > >
> > > > ------------------------------------------------------------------------
> > > >
> > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > index bfd38f2..52a63bc 100644
> > > > --- a/kernel/rcu/tree.c
> > > > +++ b/kernel/rcu/tree.c
> > > > @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > >
> > > > rnp = rdp->mynode;
> > > > mask = rdp->grpmask;
> > > > + lockdep_off();
> > > > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > > > WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> > > > newcpu = !(rnp->expmaskinitnext & mask);
> > > > @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > > } else {
> > > > raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > > > }
> > > > + lockdep_on();
> > > > smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> > > > }
> > >
> > > This will just shut it up, but will not fix the actual problem of that
> > > spin-lock ending up in trace_lock_acquire() which relies on RCU which
> > > isn't looking.
> > >
> > > What we need here is to supress tracing not lockdep. Let me consider.
> >
> > We appear to have a similar problem with rcu_report_dead(), it's
> > raw_spin_unlock()s can end up in trace_lock_release() while we just
> > killed RCU.
>
> So we can deal with the explicit trace_*() calls like the below, but I
> really don't like it much. It also doesn't help with function tracing.
> This is really early/late in the hotplug cycle and should be considered
> entry, we shouldn't be tracing anything here.
>
> Paul, would it be possible to use a scheme similar to IRQ/NMI for
> hotplug? That seems to mostly rely on atomic ops, not locks.

The rest of the rcu_node tree and the various grace-period/hotplug races
makes that question non-trivial. I will look into it, but I have no
reason for optimism.

But there is only one way to find out... ;-)

Thanx, Paul

> ---
> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index d05db575f60f..22e3a3523ad3 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -159,7 +159,7 @@ extern void lockdep_init_task(struct task_struct *task);
> */
> #define LOCKDEP_RECURSION_BITS 16
> #define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS)
> -#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1)
> +#define LOCKDEP_TRACE_MASK (LOCKDEP_OFF - 1)
>
> /*
> * lockdep_{off,on}() are macros to avoid tracing and kprobes; not inlines due
> @@ -176,6 +176,16 @@ do { \
> current->lockdep_recursion -= LOCKDEP_OFF; \
> } while (0)
>
> +#define lockdep_trace_off() \
> +do { \
> + current->lockdep_recursion++; \
> +} while (0)
> +
> +#define lockdep_trace_on() \
> +do { \
> + current->lockdep_recursion-- \
> +} while (0)
> +
> extern void lockdep_register_key(struct lock_class_key *key);
> extern void lockdep_unregister_key(struct lock_class_key *key);
>
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 3e99dfef8408..2df98abee82e 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -87,7 +87,7 @@ static inline bool lockdep_enabled(void)
> if (raw_cpu_read(lockdep_recursion))
> return false;
>
> - if (current->lockdep_recursion)
> + if (current->lockdep_recursion >> LOCKDEP_RECURSION_BITS)
> return false;
>
> return true;
> @@ -5410,7 +5410,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
> {
> unsigned long flags;
>
> - trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
> + if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
> + trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
>
> if (!debug_locks)
> return;
> @@ -5450,7 +5451,8 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)
> {
> unsigned long flags;
>
> - trace_lock_release(lock, ip);
> + if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
> + trace_lock_release(lock, ip);
>
> if (unlikely(!lockdep_enabled()))
> return;
> @@ -5662,7 +5664,8 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
> {
> unsigned long flags;
>
> - trace_lock_acquired(lock, ip);
> + if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
> + trace_lock_acquired(lock, ip);
>
> if (unlikely(!lock_stat || !lockdep_enabled()))
> return;
> @@ -5680,7 +5683,8 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
> {
> unsigned long flags;
>
> - trace_lock_contended(lock, ip);
> + if (!(current->lockdep_recursion & LOCKDEP_TRACE_MASK))
> + trace_lock_contended(lock, ip);
>
> if (unlikely(!lock_stat || !lockdep_enabled()))
> return;
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index edeabc232c21..dbd56603fc0a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -4047,6 +4047,11 @@ void rcu_cpu_starting(unsigned int cpu)
>
> rnp = rdp->mynode;
> mask = rdp->grpmask;
> +
> + /*
> + * Lockdep will call tracing, which requires RCU, but RCU isn't on yet.
> + */
> + lockdep_trace_off();
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
> WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> newcpu = !(rnp->expmaskinitnext & mask);
> @@ -4064,6 +4069,7 @@ void rcu_cpu_starting(unsigned int cpu)
> } else {
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> }
> + lockdep_trace_on();
> smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> }
>
> @@ -4091,6 +4097,11 @@ void rcu_report_dead(unsigned int cpu)
>
> /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
> mask = rdp->grpmask;
> +
> + /*
> + * Lockdep will call tracing, which requires RCU, but we're switching RCU off.
> + */
> + lockdep_trace_off();
> raw_spin_lock(&rcu_state.ofl_lock);
> raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
> rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
> @@ -4101,8 +4112,10 @@ void rcu_report_dead(unsigned int cpu)
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
> }
> WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
> + /* RCU is off, locks must not call into tracing */
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> raw_spin_unlock(&rcu_state.ofl_lock);
> + lockdep_trace_on();
>
> rdp->cpu_started = false;
> }
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 39334d2d2b37..403b138f7cd4 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> - current->lockdep_recursion == 0;
> + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && __lockdep_enabled;
> +
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
>

2020-10-14 02:11:19

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 12:44:50PM +0200, Peter Zijlstra wrote:
> On Tue, Oct 13, 2020 at 12:34:06PM +0200, Peter Zijlstra wrote:
> > On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> > > It is certainly an accident waiting to happen. Would something like
> > > the following make sense?
> >
> > Sadly no.

Hey, I was hoping! ;-)

> > > ------------------------------------------------------------------------
> > >
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index bfd38f2..52a63bc 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > >
> > > rnp = rdp->mynode;
> > > mask = rdp->grpmask;
> > > + lockdep_off();
> > > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > > WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> > > newcpu = !(rnp->expmaskinitnext & mask);
> > > @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > } else {
> > > raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > > }
> > > + lockdep_on();
> > > smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> > > }
> >
> > This will just shut it up, but will not fix the actual problem of that
> > spin-lock ending up in trace_lock_acquire() which relies on RCU which
> > isn't looking.
> >
> > What we need here is to supress tracing not lockdep. Let me consider.

OK, I certainly didn't think in those terms.

> We appear to have a similar problem with rcu_report_dead(), it's
> raw_spin_unlock()s can end up in trace_lock_release() while we just
> killed RCU.

In theory, rcu_report_dead() is just fine. The reason is that a new
grace period that is ignoring the outgoing CPU cannot start until after:

1. This CPU releases the leaf rcu_node ->lock -and-

2. The grace-period kthread acquires this same lock.
Multiple times.

In practice, too bad about those diagnostics! :-(

So good catch!!!

Thanx, Paul

2020-10-14 06:22:38

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 09:26:50AM -0700, Paul E. McKenney wrote:
> On Tue, Oct 13, 2020 at 01:25:44PM +0200, Peter Zijlstra wrote:
> > On Tue, Oct 13, 2020 at 12:44:50PM +0200, Peter Zijlstra wrote:
> > > On Tue, Oct 13, 2020 at 12:34:06PM +0200, Peter Zijlstra wrote:
> > > > On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> > > > > It is certainly an accident waiting to happen. Would something like
> > > > > the following make sense?
> > > >
> > > > Sadly no.
> > > >
> > > > > ------------------------------------------------------------------------
> > > > >
> > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > > index bfd38f2..52a63bc 100644
> > > > > --- a/kernel/rcu/tree.c
> > > > > +++ b/kernel/rcu/tree.c
> > > > > @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > > >
> > > > > rnp = rdp->mynode;
> > > > > mask = rdp->grpmask;
> > > > > + lockdep_off();
> > > > > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > > > > WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> > > > > newcpu = !(rnp->expmaskinitnext & mask);
> > > > > @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > > > } else {
> > > > > raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > > > > }
> > > > > + lockdep_on();
> > > > > smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> > > > > }
> > > >
> > > > This will just shut it up, but will not fix the actual problem of that
> > > > spin-lock ending up in trace_lock_acquire() which relies on RCU which
> > > > isn't looking.
> > > >
> > > > What we need here is to supress tracing not lockdep. Let me consider.
> > >
> > > We appear to have a similar problem with rcu_report_dead(), it's
> > > raw_spin_unlock()s can end up in trace_lock_release() while we just
> > > killed RCU.
> >
> > So we can deal with the explicit trace_*() calls like the below, but I
> > really don't like it much. It also doesn't help with function tracing.
> > This is really early/late in the hotplug cycle and should be considered
> > entry, we shouldn't be tracing anything here.
> >
> > Paul, would it be possible to use a scheme similar to IRQ/NMI for
> > hotplug? That seems to mostly rely on atomic ops, not locks.
>
> The rest of the rcu_node tree and the various grace-period/hotplug races
> makes that question non-trivial. I will look into it, but I have no
> reason for optimism.
>
> But there is only one way to find out... ;-)

The aforementioned races get really ugly really fast. So I do not
believe that a lockless approach is a strategy to win here.

But why not use something sort of like a sequence counter, but adapted
for local on-CPU use? This should quiet the diagnostics for the full
time that RCU needs its locks. Untested patch below.

Thoughts?

Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1d42909..5b06886 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1152,13 +1152,15 @@ bool rcu_lockdep_current_cpu_online(void)
struct rcu_data *rdp;
struct rcu_node *rnp;
bool ret = false;
+ unsigned long seq;

if (in_nmi() || !rcu_scheduler_fully_active)
return true;
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+ seq = READ_ONCE(rnp->ofl_seq) & ~0x1;
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rnp->ofl_seq))
ret = true;
preempt_enable_notrace();
return ret;
@@ -4065,6 +4067,8 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4084,6 +4088,8 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}

@@ -4111,6 +4117,8 @@ void rcu_report_dead(unsigned int cpu)

/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4123,6 +4131,8 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);

rdp->cpu_started = false;
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 805c9eb..7d802b6 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -57,6 +57,7 @@ struct rcu_node {
/* beginning of each grace period. */
unsigned long qsmaskinitnext;
/* Online CPUs for next grace period. */
+ unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
unsigned long expmask; /* CPUs or groups that need to check in */
/* to allow the current expedited GP */
/* to complete. */

2020-10-14 18:36:13

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 13, 2020 at 12:30:25PM -0700, Paul E. McKenney wrote:
> On Tue, Oct 13, 2020 at 09:26:50AM -0700, Paul E. McKenney wrote:
> > On Tue, Oct 13, 2020 at 01:25:44PM +0200, Peter Zijlstra wrote:
> > > On Tue, Oct 13, 2020 at 12:44:50PM +0200, Peter Zijlstra wrote:
> > > > On Tue, Oct 13, 2020 at 12:34:06PM +0200, Peter Zijlstra wrote:
> > > > > On Mon, Oct 12, 2020 at 02:28:12PM -0700, Paul E. McKenney wrote:
> > > > > > It is certainly an accident waiting to happen. Would something like
> > > > > > the following make sense?
> > > > >
> > > > > Sadly no.
> > > > >
> > > > > > ------------------------------------------------------------------------
> > > > > >
> > > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > > > index bfd38f2..52a63bc 100644
> > > > > > --- a/kernel/rcu/tree.c
> > > > > > +++ b/kernel/rcu/tree.c
> > > > > > @@ -4067,6 +4067,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > > > >
> > > > > > rnp = rdp->mynode;
> > > > > > mask = rdp->grpmask;
> > > > > > + lockdep_off();
> > > > > > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > > > > > WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> > > > > > newcpu = !(rnp->expmaskinitnext & mask);
> > > > > > @@ -4086,6 +4087,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > > > > > } else {
> > > > > > raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > > > > > }
> > > > > > + lockdep_on();
> > > > > > smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> > > > > > }
> > > > >
> > > > > This will just shut it up, but will not fix the actual problem of that
> > > > > spin-lock ending up in trace_lock_acquire() which relies on RCU which
> > > > > isn't looking.
> > > > >
> > > > > What we need here is to supress tracing not lockdep. Let me consider.
> > > >
> > > > We appear to have a similar problem with rcu_report_dead(), it's
> > > > raw_spin_unlock()s can end up in trace_lock_release() while we just
> > > > killed RCU.
> > >
> > > So we can deal with the explicit trace_*() calls like the below, but I
> > > really don't like it much. It also doesn't help with function tracing.
> > > This is really early/late in the hotplug cycle and should be considered
> > > entry, we shouldn't be tracing anything here.
> > >
> > > Paul, would it be possible to use a scheme similar to IRQ/NMI for
> > > hotplug? That seems to mostly rely on atomic ops, not locks.
> >
> > The rest of the rcu_node tree and the various grace-period/hotplug races
> > makes that question non-trivial. I will look into it, but I have no
> > reason for optimism.
> >
> > But there is only one way to find out... ;-)
>
> The aforementioned races get really ugly really fast. So I do not
> believe that a lockless approach is a strategy to win here.
>
> But why not use something sort of like a sequence counter, but adapted
> for local on-CPU use? This should quiet the diagnostics for the full
> time that RCU needs its locks. Untested patch below.
>
> Thoughts?

Well, one problem with the previous patch is that it can result in false
negatives. If a CPU incorrectly executes an RCU operation while offline,
but does so just when some other CPU sharing that same leaf rcu_node
structure is itself coming online or going offline, the incorrect RCU
operation will be ignored. So this version avoids these false negatives
by putting the new ->ofl-seq field in the rcu_data structure instead of
the rcu_node structure.

Thanx, Paul

------------------------------------------------------------------------

commit 7deaa04b02298001426730ed0e6214ac20d1a1c1
Author: Paul E. McKenney <[email protected]>
Date: Tue Oct 13 12:39:23 2020 -0700

rcu: Prevent lockdep-RCU splats on lock acquisition/release

The rcu_cpu_starting() and rcu_report_dead() functions transition the
current CPU between online and offline state from an RCU perspective.
Unfortunately, this means that the rcu_cpu_starting() function's lock
acquisition and the rcu_report_dead() function's lock releases happen
while the CPU is offline from an RCU perspective, which can result in
lockdep-RCU splats about using RCU from an offline CPU. In reality,
aside from the splats, both transitions are safe because a new grace
period cannot start until these functions release their locks.

But the false-positive splats nevertheless need to be eliminated.

This commit therefore uses sequence-count-like synchronization to forgive
use of RCU while RCU thinks a CPU is offline across the full extent of
the rcu_cpu_starting() and rcu_report_dead() function's lock acquisitions
and releases.

One approach would have been to use the actual sequence-count primitives
provided by the Linux kernel. Unfortunately, the resulting code looks
completely broken and wrong, and is likely to result in patches that
break RCU in an attempt to address this broken wrongness. Plus there
is little or no net savings in lines of code.

Therefore, this sequence count is instead implemented by a new ->ofl_seq
field in the rcu_data structure. This field could instead be placed in
the rcu_node structure, but the resulting improved cache locality is not
important for the debug kernels in which this will be used on the read
side, and the update side is only invoked twice per CPU-hotplug operation,
so the improvement is not important there, either. More importantly,
placing this in the rcu_data structure allows each CPU to have its own
counter, which avoids possible false negatives that could otherwise occur
when a buggy access from an offline CPU occurred while another CPU sharing
that same leaf rcu_node structure was undergoing a CPU-hotplug operation.
Therefore, this new ->ofl_seq field is added to the rcu_data structure,
not the rcu_node structure.

Signed-off-by: Paul E. McKenney <[email protected]>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1d42909..286dc0a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1152,13 +1152,15 @@ bool rcu_lockdep_current_cpu_online(void)
struct rcu_data *rdp;
struct rcu_node *rnp;
bool ret = false;
+ unsigned long seq;

if (in_nmi() || !rcu_scheduler_fully_active)
return true;
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+ seq = READ_ONCE(rdp->ofl_seq) & ~0x1;
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rdp->ofl_seq))
ret = true;
preempt_enable_notrace();
return ret;
@@ -4065,6 +4067,8 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
+ WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rdp->ofl_seq & 0x1));
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4084,6 +4088,8 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
+ WARN_ON_ONCE(rdp->ofl_seq & 0x1);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}

@@ -4111,6 +4117,8 @@ void rcu_report_dead(unsigned int cpu)

/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
+ WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rdp->ofl_seq & 0x1));
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4123,6 +4131,8 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
+ WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
+ WARN_ON_ONCE(rdp->ofl_seq & 0x1);

rdp->cpu_started = false;
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 805c9eb..bf0198d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -250,6 +250,7 @@ struct rcu_data {
unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
+ unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */

int cpu;
};

2020-10-15 03:25:27

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 12:39:54AM +0200, Peter Zijlstra wrote:
> On Wed, Oct 14, 2020 at 03:11:52PM -0700, Paul E. McKenney wrote:
> > On Wed, Oct 14, 2020 at 11:53:19PM +0200, Peter Zijlstra wrote:
> > > On Wed, Oct 14, 2020 at 11:34:05AM -0700, Paul E. McKenney wrote:
> > > > commit 7deaa04b02298001426730ed0e6214ac20d1a1c1
> > > > Author: Paul E. McKenney <[email protected]>
> > > > Date: Tue Oct 13 12:39:23 2020 -0700
> > > >
> > > > rcu: Prevent lockdep-RCU splats on lock acquisition/release
> > > >
> > > > The rcu_cpu_starting() and rcu_report_dead() functions transition the
> > > > current CPU between online and offline state from an RCU perspective.
> > > > Unfortunately, this means that the rcu_cpu_starting() function's lock
> > > > acquisition and the rcu_report_dead() function's lock releases happen
> > > > while the CPU is offline from an RCU perspective, which can result in
> > > > lockdep-RCU splats about using RCU from an offline CPU. In reality,
> > > > aside from the splats, both transitions are safe because a new grace
> > > > period cannot start until these functions release their locks.
> > >
> > > But we call the trace_* crud before we acquire the lock. Are you sure
> > > that's a false-positive?
> >
> > You lost me on this one.
> >
> > I am assuming that you are talking about rcu_cpu_starting(), because
> > that is the one where RCU is not initially watching, that is, the
> > case where tracing before the lock acquisition would be a problem.
> > You cannot be talking about rcu_cpu_starting() itself, because it does
> > not do any tracing before acquiring the lock. But if you are talking
> > about the caller of rcu_cpu_starting(), then that caller should put the
> > rcu_cpu_starting() before the tracing. But that would be the other
> > patch earlier in this thread that was proposing moving the call to
> > rcu_cpu_starting() much earlier in CPU bringup.
> >
> > So what am I missing here?
>
> rcu_cpu_starting();
> raw_spin_lock_irqsave();
> local_irq_save();
> preempt_disable();
> spin_acquire()
> lock_acquire()
> trace_lock_acquire() <--- *whoopsie-doodle*
> /* uses RCU for tracing */
> arch_spin_lock_flags() <--- the actual spinlock

Gah! Idiot here left out the most important part, so good catch!!!
Much easier this way than finding out about it the hard way...

I should have asked myself harder questions earlier today about moving
the counter from the rcu_node structure to the rcu_data structure.

Perhaps something like the following untested patch on top of the
earlier patch?

Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 286dc0a..8b5215e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1159,8 +1159,8 @@ bool rcu_lockdep_current_cpu_online(void)
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
- seq = READ_ONCE(rdp->ofl_seq) & ~0x1;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rdp->ofl_seq))
+ seq = READ_ONCE(rnp->ofl_seq) & ~0x1;
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rnp->ofl_seq))
ret = true;
preempt_enable_notrace();
return ret;
@@ -1982,6 +1982,7 @@ static void rcu_gp_fqs_loop(void)
static void rcu_gp_cleanup(void)
{
int cpu;
+ unsigned long firstseq;
bool needgp = false;
unsigned long gp_duration;
unsigned long new_gp_seq;
@@ -2019,6 +2020,12 @@ static void rcu_gp_cleanup(void)
new_gp_seq = rcu_state.gp_seq;
rcu_seq_end(&new_gp_seq);
rcu_for_each_node_breadth_first(rnp) {
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+ firstseq = READ_ONCE(rnp->ofl_seq);
+ if (firstseq & 0x1)
+ while (firstseq == smp_load_acquire(&rnp->ofl_seq))
+ schedule_timeout_idle(1); // Can't wake unless RCU is watching.
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock_irq_rcu_node(rnp);
if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
dump_blkd_tasks(rnp, 10);
@@ -4067,8 +4074,9 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
- WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
- WARN_ON_ONCE(!(rdp->ofl_seq & 0x1));
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4088,8 +4096,9 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
- WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
- WARN_ON_ONCE(rdp->ofl_seq & 0x1);
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}

@@ -4117,8 +4126,9 @@ void rcu_report_dead(unsigned int cpu)

/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
- WARN_ON_ONCE(!(rdp->ofl_seq & 0x1));
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4131,8 +4141,9 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
- WRITE_ONCE(rdp->ofl_seq, rdp->ofl_seq + 1);
- WARN_ON_ONCE(rdp->ofl_seq & 0x1);
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);

rdp->cpu_started = false;
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bf0198d..7708ed1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,6 +56,7 @@ struct rcu_node {
/* Initialized from ->qsmaskinitnext at the */
/* beginning of each grace period. */
unsigned long qsmaskinitnext;
+ unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
/* Online CPUs for next grace period. */
unsigned long expmask; /* CPUs or groups that need to check in */
/* to allow the current expedited GP */
@@ -250,7 +251,6 @@ struct rcu_data {
unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
- unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */

int cpu;
};

2020-10-15 03:40:07

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 14, 2020 at 11:34:05AM -0700, Paul E. McKenney wrote:
> commit 7deaa04b02298001426730ed0e6214ac20d1a1c1
> Author: Paul E. McKenney <[email protected]>
> Date: Tue Oct 13 12:39:23 2020 -0700
>
> rcu: Prevent lockdep-RCU splats on lock acquisition/release
>
> The rcu_cpu_starting() and rcu_report_dead() functions transition the
> current CPU between online and offline state from an RCU perspective.
> Unfortunately, this means that the rcu_cpu_starting() function's lock
> acquisition and the rcu_report_dead() function's lock releases happen
> while the CPU is offline from an RCU perspective, which can result in
> lockdep-RCU splats about using RCU from an offline CPU. In reality,
> aside from the splats, both transitions are safe because a new grace
> period cannot start until these functions release their locks.

But we call the trace_* crud before we acquire the lock. Are you sure
that's a false-positive?

2020-10-15 04:13:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 14, 2020 at 03:11:52PM -0700, Paul E. McKenney wrote:
> On Wed, Oct 14, 2020 at 11:53:19PM +0200, Peter Zijlstra wrote:
> > On Wed, Oct 14, 2020 at 11:34:05AM -0700, Paul E. McKenney wrote:
> > > commit 7deaa04b02298001426730ed0e6214ac20d1a1c1
> > > Author: Paul E. McKenney <[email protected]>
> > > Date: Tue Oct 13 12:39:23 2020 -0700
> > >
> > > rcu: Prevent lockdep-RCU splats on lock acquisition/release
> > >
> > > The rcu_cpu_starting() and rcu_report_dead() functions transition the
> > > current CPU between online and offline state from an RCU perspective.
> > > Unfortunately, this means that the rcu_cpu_starting() function's lock
> > > acquisition and the rcu_report_dead() function's lock releases happen
> > > while the CPU is offline from an RCU perspective, which can result in
> > > lockdep-RCU splats about using RCU from an offline CPU. In reality,
> > > aside from the splats, both transitions are safe because a new grace
> > > period cannot start until these functions release their locks.
> >
> > But we call the trace_* crud before we acquire the lock. Are you sure
> > that's a false-positive?
>
> You lost me on this one.
>
> I am assuming that you are talking about rcu_cpu_starting(), because
> that is the one where RCU is not initially watching, that is, the
> case where tracing before the lock acquisition would be a problem.
> You cannot be talking about rcu_cpu_starting() itself, because it does
> not do any tracing before acquiring the lock. But if you are talking
> about the caller of rcu_cpu_starting(), then that caller should put the
> rcu_cpu_starting() before the tracing. But that would be the other
> patch earlier in this thread that was proposing moving the call to
> rcu_cpu_starting() much earlier in CPU bringup.
>
> So what am I missing here?

rcu_cpu_starting();
raw_spin_lock_irqsave();
local_irq_save();
preempt_disable();
spin_acquire()
lock_acquire()
trace_lock_acquire() <--- *whoopsie-doodle*
/* uses RCU for tracing */
arch_spin_lock_flags() <--- the actual spinlock

2020-10-15 04:50:59

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 14, 2020 at 04:55:53PM -0700, Paul E. McKenney wrote:
> On Thu, Oct 15, 2020 at 12:39:54AM +0200, Peter Zijlstra wrote:
> > On Wed, Oct 14, 2020 at 03:11:52PM -0700, Paul E. McKenney wrote:
> > > On Wed, Oct 14, 2020 at 11:53:19PM +0200, Peter Zijlstra wrote:
> > > > On Wed, Oct 14, 2020 at 11:34:05AM -0700, Paul E. McKenney wrote:
> > > > > commit 7deaa04b02298001426730ed0e6214ac20d1a1c1
> > > > > Author: Paul E. McKenney <[email protected]>
> > > > > Date: Tue Oct 13 12:39:23 2020 -0700
> > > > >
> > > > > rcu: Prevent lockdep-RCU splats on lock acquisition/release
> > > > >
> > > > > The rcu_cpu_starting() and rcu_report_dead() functions transition the
> > > > > current CPU between online and offline state from an RCU perspective.
> > > > > Unfortunately, this means that the rcu_cpu_starting() function's lock
> > > > > acquisition and the rcu_report_dead() function's lock releases happen
> > > > > while the CPU is offline from an RCU perspective, which can result in
> > > > > lockdep-RCU splats about using RCU from an offline CPU. In reality,
> > > > > aside from the splats, both transitions are safe because a new grace
> > > > > period cannot start until these functions release their locks.
> > > >
> > > > But we call the trace_* crud before we acquire the lock. Are you sure
> > > > that's a false-positive?
> > >
> > > You lost me on this one.
> > >
> > > I am assuming that you are talking about rcu_cpu_starting(), because
> > > that is the one where RCU is not initially watching, that is, the
> > > case where tracing before the lock acquisition would be a problem.
> > > You cannot be talking about rcu_cpu_starting() itself, because it does
> > > not do any tracing before acquiring the lock. But if you are talking
> > > about the caller of rcu_cpu_starting(), then that caller should put the
> > > rcu_cpu_starting() before the tracing. But that would be the other
> > > patch earlier in this thread that was proposing moving the call to
> > > rcu_cpu_starting() much earlier in CPU bringup.
> > >
> > > So what am I missing here?
> >
> > rcu_cpu_starting();
> > raw_spin_lock_irqsave();
> > local_irq_save();
> > preempt_disable();
> > spin_acquire()
> > lock_acquire()
> > trace_lock_acquire() <--- *whoopsie-doodle*
> > /* uses RCU for tracing */
> > arch_spin_lock_flags() <--- the actual spinlock
>
> Gah! Idiot here left out the most important part, so good catch!!!
> Much easier this way than finding out about it the hard way...
>
> I should have asked myself harder questions earlier today about moving
> the counter from the rcu_node structure to the rcu_data structure.
>
> Perhaps something like the following untested patch on top of the
> earlier patch?

Except that this is subtlely flawed also. The delay cannot be at
rcu_gp_cleanup() time because by the time we are working on the last
leaf rcu_node structure, callbacks might already have started being
invoked on CPUs corresponding to the earlier leaf rcu_node structures.

So the (untested) patch below (on top of the other two) moves the delay
to rcu_gp_init(), in particular, to the first loop that traverses only
the leaf rcu_node structures handling CPU hotplug.

Hopefully getting closer!

Oh, and the second smp_mb() added to rcu_gp_init() is probably
redundant given the full barrier implied by the later call to
raw_spin_lock_irq_rcu_node(). But one thing at a time...

Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8b5215e..5904b63 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1725,6 +1725,7 @@ static void rcu_strict_gp_boundary(void *unused)
*/
static bool rcu_gp_init(void)
{
+ unsigned long firstseq;
unsigned long flags;
unsigned long oldmask;
unsigned long mask;
@@ -1768,6 +1769,12 @@ static bool rcu_gp_init(void)
*/
rcu_state.gp_state = RCU_GP_ONOFF;
rcu_for_each_leaf_node(rnp) {
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+ firstseq = READ_ONCE(rnp->ofl_seq);
+ if (firstseq & 0x1)
+ while (firstseq == smp_load_acquire(&rnp->ofl_seq))
+ schedule_timeout_idle(1); // Can't wake unless RCU is watching.
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -1982,7 +1989,6 @@ static void rcu_gp_fqs_loop(void)
static void rcu_gp_cleanup(void)
{
int cpu;
- unsigned long firstseq;
bool needgp = false;
unsigned long gp_duration;
unsigned long new_gp_seq;
@@ -2020,12 +2026,6 @@ static void rcu_gp_cleanup(void)
new_gp_seq = rcu_state.gp_seq;
rcu_seq_end(&new_gp_seq);
rcu_for_each_node_breadth_first(rnp) {
- smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
- firstseq = READ_ONCE(rnp->ofl_seq);
- if (firstseq & 0x1)
- while (firstseq == smp_load_acquire(&rnp->ofl_seq))
- schedule_timeout_idle(1); // Can't wake unless RCU is watching.
- smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock_irq_rcu_node(rnp);
if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
dump_blkd_tasks(rnp, 10);

2020-10-15 07:23:27

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 14, 2020 at 11:53:19PM +0200, Peter Zijlstra wrote:
> On Wed, Oct 14, 2020 at 11:34:05AM -0700, Paul E. McKenney wrote:
> > commit 7deaa04b02298001426730ed0e6214ac20d1a1c1
> > Author: Paul E. McKenney <[email protected]>
> > Date: Tue Oct 13 12:39:23 2020 -0700
> >
> > rcu: Prevent lockdep-RCU splats on lock acquisition/release
> >
> > The rcu_cpu_starting() and rcu_report_dead() functions transition the
> > current CPU between online and offline state from an RCU perspective.
> > Unfortunately, this means that the rcu_cpu_starting() function's lock
> > acquisition and the rcu_report_dead() function's lock releases happen
> > while the CPU is offline from an RCU perspective, which can result in
> > lockdep-RCU splats about using RCU from an offline CPU. In reality,
> > aside from the splats, both transitions are safe because a new grace
> > period cannot start until these functions release their locks.
>
> But we call the trace_* crud before we acquire the lock. Are you sure
> that's a false-positive?

You lost me on this one.

I am assuming that you are talking about rcu_cpu_starting(), because
that is the one where RCU is not initially watching, that is, the
case where tracing before the lock acquisition would be a problem.
You cannot be talking about rcu_cpu_starting() itself, because it does
not do any tracing before acquiring the lock. But if you are talking
about the caller of rcu_cpu_starting(), then that caller should put the
rcu_cpu_starting() before the tracing. But that would be the other
patch earlier in this thread that was proposing moving the call to
rcu_cpu_starting() much earlier in CPU bringup.

So what am I missing here?

Thanx, Paul

2020-10-15 11:44:08

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 14, 2020 at 08:41:28PM -0700, Paul E. McKenney wrote:
> So the (untested) patch below (on top of the other two) moves the delay
> to rcu_gp_init(), in particular, to the first loop that traverses only
> the leaf rcu_node structures handling CPU hotplug.
>
> Hopefully getting closer!

So, if I composed things right, we end up with this. Comments below.


--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1143,13 +1143,15 @@ bool rcu_lockdep_current_cpu_online(void
struct rcu_data *rdp;
struct rcu_node *rnp;
bool ret = false;
+ unsigned long seq;

if (in_nmi() || !rcu_scheduler_fully_active)
return true;
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+ seq = READ_ONCE(rnp->ofl_seq) & ~0x1;
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rnp->ofl_seq))
ret = true;
preempt_enable_notrace();
return ret;
@@ -1715,6 +1717,7 @@ static void rcu_strict_gp_boundary(void
*/
static bool rcu_gp_init(void)
{
+ unsigned long firstseq;
unsigned long flags;
unsigned long oldmask;
unsigned long mask;
@@ -1758,6 +1761,12 @@ static bool rcu_gp_init(void)
*/
rcu_state.gp_state = RCU_GP_ONOFF;
rcu_for_each_leaf_node(rnp) {
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+ firstseq = READ_ONCE(rnp->ofl_seq);
+ if (firstseq & 0x1)
+ while (firstseq == smp_load_acquire(&rnp->ofl_seq))
+ schedule_timeout_idle(1); // Can't wake unless RCU is watching.
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -4047,6 +4056,9 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4064,6 +4076,9 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}

@@ -4091,6 +4106,9 @@ void rcu_report_dead(unsigned int cpu)

/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4103,6 +4121,9 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);

rdp->cpu_started = false;
}
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,6 +56,7 @@ struct rcu_node {
/* Initialized from ->qsmaskinitnext at the */
/* beginning of each grace period. */
unsigned long qsmaskinitnext;
+ unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
/* Online CPUs for next grace period. */
unsigned long expmask; /* CPUs or groups that need to check in */
/* to allow the current expedited GP */


Lets see if I can understand this.

- we seqcount wrap online/offline, such that they're odd while
in-progress. Full memory barriers, such that, unlike with regular
seqcount, it also orders later reads, important?

- when odd, we ensure it is seen as online; notable detail seems to be
that this function is expected to be called in PO relative to the
seqcount ops. It is unsafe concurrently. This seems sufficient for
our goals today.

- when odd, we delay the current gp.


It is that last point where I think I'd like to suggest change. Given
that both rcu_cpu_starting() and rcu_report_dead() (the naming is
slightly inconsistent) are ran with IRQs disabled, spin-waiting seems
like a more natural match.

Also, I don't see the purpose of your smp_load_acquire(), you don't
actually do anything before then calling a full smp_mb().


--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1764,8 +1764,7 @@ static bool rcu_gp_init(void)
smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
firstseq = READ_ONCE(rnp->ofl_seq);
if (firstseq & 0x1)
- while (firstseq == smp_load_acquire(&rnp->ofl_seq))
- schedule_timeout_idle(1); // Can't wake unless RCU is watching.
+ smp_cond_load_relaxed(&rnp->ofl_seq, VAL == firstseq);
smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irq_rcu_node(rnp);

2020-10-15 11:46:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 11:49:26AM +0200, Peter Zijlstra wrote:
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1764,8 +1764,7 @@ static bool rcu_gp_init(void)
> smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
> firstseq = READ_ONCE(rnp->ofl_seq);
> if (firstseq & 0x1)
> - while (firstseq == smp_load_acquire(&rnp->ofl_seq))
> - schedule_timeout_idle(1); // Can't wake unless RCU is watching.
> + smp_cond_load_relaxed(&rnp->ofl_seq, VAL == firstseq);

My bad, that should be: VAL != firstseq.

> smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
> raw_spin_lock(&rcu_state.ofl_lock);
> raw_spin_lock_irq_rcu_node(rnp);

2020-10-15 16:17:22

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 11:49:26AM +0200, Peter Zijlstra wrote:
> On Wed, Oct 14, 2020 at 08:41:28PM -0700, Paul E. McKenney wrote:
> > So the (untested) patch below (on top of the other two) moves the delay
> > to rcu_gp_init(), in particular, to the first loop that traverses only
> > the leaf rcu_node structures handling CPU hotplug.
> >
> > Hopefully getting closer!
>
> So, if I composed things right, we end up with this. Comments below.
>
>
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1143,13 +1143,15 @@ bool rcu_lockdep_current_cpu_online(void
> struct rcu_data *rdp;
> struct rcu_node *rnp;
> bool ret = false;
> + unsigned long seq;
>
> if (in_nmi() || !rcu_scheduler_fully_active)
> return true;
> preempt_disable_notrace();
> rdp = this_cpu_ptr(&rcu_data);
> rnp = rdp->mynode;
> - if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
> + seq = READ_ONCE(rnp->ofl_seq) & ~0x1;
> + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rnp->ofl_seq))
> ret = true;
> preempt_enable_notrace();
> return ret;
> @@ -1715,6 +1717,7 @@ static void rcu_strict_gp_boundary(void
> */
> static bool rcu_gp_init(void)
> {
> + unsigned long firstseq;
> unsigned long flags;
> unsigned long oldmask;
> unsigned long mask;
> @@ -1758,6 +1761,12 @@ static bool rcu_gp_init(void)
> */
> rcu_state.gp_state = RCU_GP_ONOFF;
> rcu_for_each_leaf_node(rnp) {
> + smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
> + firstseq = READ_ONCE(rnp->ofl_seq);
> + if (firstseq & 0x1)
> + while (firstseq == smp_load_acquire(&rnp->ofl_seq))
> + schedule_timeout_idle(1); // Can't wake unless RCU is watching.
> + smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
> raw_spin_lock(&rcu_state.ofl_lock);
> raw_spin_lock_irq_rcu_node(rnp);
> if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
> @@ -4047,6 +4056,9 @@ void rcu_cpu_starting(unsigned int cpu)
>
> rnp = rdp->mynode;
> mask = rdp->grpmask;
> + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
> + WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
> + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
> WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
> newcpu = !(rnp->expmaskinitnext & mask);
> @@ -4064,6 +4076,9 @@ void rcu_cpu_starting(unsigned int cpu)
> } else {
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> }
> + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
> + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
> + WARN_ON_ONCE(rnp->ofl_seq & 0x1);
> smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
> }
>
> @@ -4091,6 +4106,9 @@ void rcu_report_dead(unsigned int cpu)
>
> /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
> mask = rdp->grpmask;
> + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
> + WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
> + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
> raw_spin_lock(&rcu_state.ofl_lock);
> raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
> rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
> @@ -4103,6 +4121,9 @@ void rcu_report_dead(unsigned int cpu)
> WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> raw_spin_unlock(&rcu_state.ofl_lock);
> + smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
> + WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
> + WARN_ON_ONCE(rnp->ofl_seq & 0x1);
>
> rdp->cpu_started = false;
> }
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -56,6 +56,7 @@ struct rcu_node {
> /* Initialized from ->qsmaskinitnext at the */
> /* beginning of each grace period. */
> unsigned long qsmaskinitnext;
> + unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
> /* Online CPUs for next grace period. */
> unsigned long expmask; /* CPUs or groups that need to check in */
> /* to allow the current expedited GP */
>
>
> Lets see if I can understand this.
>
> - we seqcount wrap online/offline, such that they're odd while
> in-progress. Full memory barriers, such that, unlike with regular
> seqcount, it also orders later reads, important?

Yes.

> - when odd, we ensure it is seen as online; notable detail seems to be
> that this function is expected to be called in PO relative to the
> seqcount ops. It is unsafe concurrently. This seems sufficient for
> our goals today.

Where "this function" is rcu_lockdep_current_cpu_online(), yes it
must be called on the CPU in question. Otherwise, you might get
racy results. Which is sometimes OK.

> - when odd, we delay the current gp.

Yes.

> It is that last point where I think I'd like to suggest change. Given
> that both rcu_cpu_starting() and rcu_report_dead() (the naming is
> slightly inconsistent) are ran with IRQs disabled, spin-waiting seems
> like a more natural match.
>
> Also, I don't see the purpose of your smp_load_acquire(), you don't
> actually do anything before then calling a full smp_mb().
>
>
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1764,8 +1764,7 @@ static bool rcu_gp_init(void)
> smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
> firstseq = READ_ONCE(rnp->ofl_seq);
> if (firstseq & 0x1)
> - while (firstseq == smp_load_acquire(&rnp->ofl_seq))
> - schedule_timeout_idle(1); // Can't wake unless RCU is watching.
> + smp_cond_load_relaxed(&rnp->ofl_seq, VAL == firstseq);
> smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
> raw_spin_lock(&rcu_state.ofl_lock);
> raw_spin_lock_irq_rcu_node(rnp);

This would work, and would be absolutely necessary if grace periods
took only (say) 500 nanoseconds to complete. But given that they take
multiple milliseconds at best, and given that this race is extremely
unlikely, and given the heavy use of virtualization, I have to stick
with the schedule_timeout_idle().

In fact, I have on my list to force this race to happen on the grounds
that if it ain't tested, it don't work...

Thanx, Paul

2020-10-15 16:18:34

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 11:50:33AM +0200, Peter Zijlstra wrote:
> On Thu, Oct 15, 2020 at 11:49:26AM +0200, Peter Zijlstra wrote:
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1143,13 +1143,15 @@ bool rcu_lockdep_current_cpu_online(void
> > struct rcu_data *rdp;
> > struct rcu_node *rnp;
> > bool ret = false;
> > + unsigned long seq;
> >
> > if (in_nmi() || !rcu_scheduler_fully_active)
> > return true;
> > preempt_disable_notrace();
> > rdp = this_cpu_ptr(&rcu_data);
> > rnp = rdp->mynode;
> > - if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
> > + seq = READ_ONCE(rnp->ofl_seq) & ~0x1;
> > + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rnp->ofl_seq))
> > ret = true;
> > preempt_enable_notrace();
> > return ret;
>
> Also, here, are the two loads important? Wouldn't:
>
> || READ_ONCE(rnp->ofl_seq) & 0x1
>
> be sufficient?

Indeed it would! Good catch, thank you!!!

Thanx, Paul

2020-10-15 16:23:29

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 11:52:35AM +0200, Peter Zijlstra wrote:
> On Thu, Oct 15, 2020 at 11:49:26AM +0200, Peter Zijlstra wrote:
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1764,8 +1764,7 @@ static bool rcu_gp_init(void)
> > smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
> > firstseq = READ_ONCE(rnp->ofl_seq);
> > if (firstseq & 0x1)
> > - while (firstseq == smp_load_acquire(&rnp->ofl_seq))
> > - schedule_timeout_idle(1); // Can't wake unless RCU is watching.
> > + smp_cond_load_relaxed(&rnp->ofl_seq, VAL == firstseq);
>
> My bad, that should be: VAL != firstseq.

Looks like something -I- would do! ;-)

> > smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
> > raw_spin_lock(&rcu_state.ofl_lock);
> > raw_spin_lock_irq_rcu_node(rnp);

And somewhere or another you asked about the smp_load_acquire(). You are
right, that is not needed. It was a holdover from when I was thinking
I could get away with weaker barriers. It is now READ_ONCE().

Here is the updated patch. Thoughts?

Oh, and special thanks for taking this one seriously, as my environment
is a bit less distraction-free than it usually is.

Thanx, Paul

------------------------------------------------------------------------

commit 99ed1f30fd4e7db4fc5cf8f4cd6329897b2f6ed1
Author: Paul E. McKenney <[email protected]>
Date: Tue Oct 13 12:39:23 2020 -0700

rcu: Prevent lockdep-RCU splats on lock acquisition/release

The rcu_cpu_starting() and rcu_report_dead() functions transition the
current CPU between online and offline state from an RCU perspective.
Unfortunately, this means that the rcu_cpu_starting() function's lock
acquisition and the rcu_report_dead() function's lock releases happen
while the CPU is offline from an RCU perspective, which can result in
lockdep-RCU splats about using RCU from an offline CPU. In reality,
aside from the splats, both transitions are safe because a new grace
period cannot start until these functions release their locks.

But the false-positive splats nevertheless need to be eliminated.

This commit therefore uses sequence-count-like synchronization to forgive
use of RCU while RCU thinks a CPU is offline across the full extent of
the rcu_cpu_starting() and rcu_report_dead() function's lock acquisitions
and releases.

One approach would have been to use the actual sequence-count primitives
provided by the Linux kernel. Unfortunately, the resulting code looks
completely broken and wrong, and is likely to result in patches that
break RCU in an attempt to address this appearance of broken wrongness.
Plus there is no net savings in lines of code, given the additional
explicit memory barriers required.

Therefore, this sequence count is instead implemented by a new ->ofl_seq
field in the rcu_node structure. If this counter's value is an odd
number, RCU forgives RCU read-side critical sections on other CPUs covered
by the same rcu_node structure, even if those CPUs are offline from
an RCU perspective. In addition, if a given leaf rcu_node structure's
->ofl_seq counter value is an odd number, rcu_gp_init() delays starting
the grace period until that counter value changes.

[ paulmck: Apply Peter Zijlstra feedback. ]
Signed-off-by: Paul E. McKenney <[email protected]>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1d42909..6d906fb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1158,7 +1158,7 @@ bool rcu_lockdep_current_cpu_online(void)
preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
- if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
+ if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
ret = true;
preempt_enable_notrace();
return ret;
@@ -1723,6 +1723,7 @@ static void rcu_strict_gp_boundary(void *unused)
*/
static bool rcu_gp_init(void)
{
+ unsigned long firstseq;
unsigned long flags;
unsigned long oldmask;
unsigned long mask;
@@ -1766,6 +1767,12 @@ static bool rcu_gp_init(void)
*/
rcu_state.gp_state = RCU_GP_ONOFF;
rcu_for_each_leaf_node(rnp) {
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
+ firstseq = READ_ONCE(rnp->ofl_seq);
+ if (firstseq & 0x1)
+ while (firstseq == READ_ONCE(rnp->ofl_seq))
+ schedule_timeout_idle(1); // Can't wake unless RCU is watching.
+ smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -4065,6 +4072,9 @@ void rcu_cpu_starting(unsigned int cpu)

rnp = rdp->mynode;
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
newcpu = !(rnp->expmaskinitnext & mask);
@@ -4084,6 +4094,9 @@ void rcu_cpu_starting(unsigned int cpu)
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
}

@@ -4111,6 +4124,9 @@ void rcu_report_dead(unsigned int cpu)

/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
raw_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4123,6 +4139,9 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
raw_spin_unlock(&rcu_state.ofl_lock);
+ smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
+ WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
+ WARN_ON_ONCE(rnp->ofl_seq & 0x1);

rdp->cpu_started = false;
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 805c9eb..7708ed1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -56,6 +56,7 @@ struct rcu_node {
/* Initialized from ->qsmaskinitnext at the */
/* beginning of each grace period. */
unsigned long qsmaskinitnext;
+ unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
/* Online CPUs for next grace period. */
unsigned long expmask; /* CPUs or groups that need to check in */
/* to allow the current expedited GP */

2020-10-15 18:26:47

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 11:49:26AM +0200, Peter Zijlstra wrote:
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1143,13 +1143,15 @@ bool rcu_lockdep_current_cpu_online(void
> struct rcu_data *rdp;
> struct rcu_node *rnp;
> bool ret = false;
> + unsigned long seq;
>
> if (in_nmi() || !rcu_scheduler_fully_active)
> return true;
> preempt_disable_notrace();
> rdp = this_cpu_ptr(&rcu_data);
> rnp = rdp->mynode;
> - if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
> + seq = READ_ONCE(rnp->ofl_seq) & ~0x1;
> + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || seq != READ_ONCE(rnp->ofl_seq))
> ret = true;
> preempt_enable_notrace();
> return ret;

Also, here, are the two loads important? Wouldn't:

|| READ_ONCE(rnp->ofl_seq) & 0x1

be sufficient?

2020-10-15 21:21:38

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Thu, Oct 15, 2020 at 09:15:01AM -0700, Paul E. McKenney wrote:
> On Thu, Oct 15, 2020 at 11:49:26AM +0200, Peter Zijlstra wrote:
> > On Wed, Oct 14, 2020 at 08:41:28PM -0700, Paul E. McKenney wrote:

[ . . . ]

> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1764,8 +1764,7 @@ static bool rcu_gp_init(void)
> > smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
> > firstseq = READ_ONCE(rnp->ofl_seq);
> > if (firstseq & 0x1)
> > - while (firstseq == smp_load_acquire(&rnp->ofl_seq))
> > - schedule_timeout_idle(1); // Can't wake unless RCU is watching.
> > + smp_cond_load_relaxed(&rnp->ofl_seq, VAL == firstseq);
> > smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
> > raw_spin_lock(&rcu_state.ofl_lock);
> > raw_spin_lock_irq_rcu_node(rnp);
>
> This would work, and would be absolutely necessary if grace periods
> took only (say) 500 nanoseconds to complete. But given that they take
> multiple milliseconds at best, and given that this race is extremely
> unlikely, and given the heavy use of virtualization, I have to stick
> with the schedule_timeout_idle().
>
> In fact, I have on my list to force this race to happen on the grounds
> that if it ain't tested, it don't work...

And it only too about 1000 seconds of TREE03 to make this happen, so we
should be good just relying on rcutorture. ;-)

Thanx, Paul

2020-10-28 20:54:39

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Mon, 2020-10-12 at 11:11 +0800, Boqun Feng wrote:
> Hi,
>
> On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > The following commit has been merged into the locking/core branch of tip:
> > >
> > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Gitweb:
> > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > Author: Peter Zijlstra <[email protected]>
> > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > Committer: Ingo Molnar <[email protected]>
> > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > >
> > > lockdep: Fix lockdep recursion
> > >
> > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > itself, will trigger a false-positive.
> > >
> > > One example is the stack-trace code, as called from inside lockdep,
> > > triggering tracing, which in turn calls RCU, which then uses
> > > lockdep_assert_irqs_disabled().
> > >
> > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-
> > > cpu
> > > variables")
> > > Reported-by: Steven Rostedt <[email protected]>
> > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > Signed-off-by: Ingo Molnar <[email protected]>
> >
> > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
> >
>
> I think this happened because in this commit debug_lockdep_rcu_enabled()
> didn't adopt to the change that made lockdep_recursion a percpu
> variable?
>
> Qian, mind to try the following?

Boqun, Paul, may I ask what's the latest with the fixes? I must admit that I got
lost in this thread, but I remember that the patch from Boqun below at least
silence quite some of those warnings if not all. The problem is that some of
those warnings would trigger a lockdep circular locks warning due to printk()
with some locks held which in turn disabling the lockdep, makes our test runs
inefficient.

>
> Although, arguably the problem still exists, i.e. we still have an RCU
> read-side critical section inside lock_acquire(), which may be called on
> a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
> because we don't "free" anything from lockdep, IOW, there is no
> synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
> read-side critical sections inside lockdep. But now we lock class
> recycling, so it might be a problem.
>
> That said, currently validate_chain() and lock class recycling are
> mutually excluded via graph_lock, so we are safe for this one ;-)
>
> ----------->8
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 39334d2d2b37..35d9bab65b75 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> - current->lockdep_recursion == 0;
> + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> + __lockdep_enabled;
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);


2020-10-28 21:56:54

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 28, 2020 at 10:39:47AM -0400, Qian Cai wrote:
> On Tue, 2020-10-27 at 20:01 -0700, Paul E. McKenney wrote:
> > If I have the right email thread associated with the right fixes, these
> > commits in -rcu should be what you are looking for:
> >
> > 73b658b6b7d5 ("rcu: Prevent lockdep-RCU splats on lock acquisition/release")
> > 626b79aa935a ("x86/smpboot: Move rcu_cpu_starting() earlier")
> >
> > And maybe this one as well:
> >
> > 3a6f638cb95b ("rcu,ftrace: Fix ftrace recursion")
> >
> > Please let me know if these commits do not fix things.
> While those patches silence the warnings for x86. Other arches are still
> suffering. It is only after applying the patch from Boqun below fixed
> everything.

Fair point!

> Is it a good idea for Boqun to write a formal patch or we should fix all arches
> individually like "x86/smpboot: Move rcu_cpu_starting() earlier"?

By Boqun's patch, you mean the change to debug_lockdep_rcu_enabled()
shown below? Peter Zijlstra showed that real failures can happen, so we
do not want to cover them up. So we are firmly in "fix all architectures"
space here, sorry!

I am happy to accumulate those patches, but cannot commit to creating
or testing them.

Thanx, Paul

> > > > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > > > index 39334d2d2b37..35d9bab65b75 100644
> > > > --- a/kernel/rcu/update.c
> > > > +++ b/kernel/rcu/update.c
> > > > @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
> > > >
> > > > noinstr int notrace debug_lockdep_rcu_enabled(void)
> > > > {
> > > > - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> > > > - current->lockdep_recursion == 0;
> > > > + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> > > > + __lockdep_enabled;
> > > > }
> > > > EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
>
> The warnings for each arch are:
>
> == powerpc ==
> [ 0.176044][ T1] smp: Bringing up secondary CPUs ...
> [ 0.179731][ T0]
> [ 0.179734][ T0] =============================
> [ 0.179736][ T0] WARNING: suspicious RCU usage
> [ 0.179739][ T0] 5.10.0-rc1-next-20201028+ #2 Not tainted
> [ 0.179741][ T0] -----------------------------
> [ 0.179744][ T0] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
> [ 0.179745][ T0]
> [ 0.179745][ T0] other info that might help us debug this:
> [ 0.179745][ T0]
> [ 0.179748][ T0]
> [ 0.179748][ T0] RCU used illegally from offline CPU!
> [ 0.179748][ T0] rcu_scheduler_active = 1, debug_locks = 1
> [ 0.179750][ T0] no locks held by swapper/1/0.
> [ 0.179752][ T0]
> [ 0.179752][ T0] stack backtrace:
> [ 0.179757][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-20201028+ #2
> [ 0.179759][ T0] Call Trace:
> [ 0.179767][ T0] [c000000015b27ab0] [c000000000657188] dump_stack+0xec/0x144 (unreliable)
> [ 0.179776][ T0] [c000000015b27af0] [c00000000014d0d4] lockdep_rcu_suspicious+0x128/0x14c
> [ 0.179782][ T0] [c000000015b27b70] [c000000000148920] __lock_acquire+0x1060/0x1c60
> [ 0.179788][ T0] [c000000015b27ca0] [c00000000014a1d0] lock_acquire+0x140/0x5f0
> [ 0.179794][ T0] [c000000015b27d90] [c0000000008f22f4] _raw_spin_lock_irqsave+0x64/0xb0
> [ 0.179801][ T0] [c000000015b27dd0] [c0000000001a1094] clockevents_register_device+0x74/0x270
> [ 0.179808][ T0] [c000000015b27e80] [c00000000001f194] register_decrementer_clockevent+0x94/0x110
> [ 0.179814][ T0] [c000000015b27ef0] [c00000000003fd84] start_secondary+0x134/0x800
> [ 0.179819][ T0] [c000000015b27f90] [c00000000000c454] start_secondary_prolog+0x10/0x14
> [ 0.179855][ T0]
> [ 0.179857][ T0] =============================
> [ 0.179858][ T0] WARNING: suspicious RCU usage
> [ 0.179860][ T0] 5.10.0-rc1-next-20201028+ #2 Not tainted
> [ 0.179862][ T0] -----------------------------
> [ 0.179864][ T0] kernel/locking/lockdep.c:886 RCU-list traversed in non-reader section!!
> [ 0.179866][ T0]
> [ 0.179866][ T0] other info that might help us debug this:
> [ 0.179866][ T0]
> [ 0.179868][ T0]
> [ 0.179868][ T0] RCU used illegally from offline CPU!
> [ 0.179868][ T0] rcu_scheduler_active = 1, debug_locks = 1
> [ 0.179870][ T0] no locks held by swapper/1/0.
> [ 0.179871][ T0]
> [ 0.179871][ T0] stack backtrace:
> [ 0.179875][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-20201028+ #2
> [ 0.179876][ T0] Call Trace:
> [ 0.179880][ T0] [c000000015b27980] [c000000000657188] dump_stack+0xec/0x144 (unreliable)
> [ 0.179886][ T0] [c000000015b279c0] [c00000000014d0d4] lockdep_rcu_suspicious+0x128/0x14c
> [ 0.179892][ T0] [c000000015b27a40] [c00000000014b010] register_lock_class+0x680/0xc70
> [ 0.179896][ T0] [c000000015b27b50] [c00000000014795c] __lock_acquire+0x9c/0x1c60
> [ 0.179901][ T0] [c000000015b27c80] [c00000000014a1d0] lock_acquire+0x140/0x5f0
> [ 0.179906][ T0] [c000000015b27d70] [c0000000008f22f4] _raw_spin_lock_irqsave+0x64/0xb0
> [ 0.179912][ T0] [c000000015b27db0] [c0000000003a2fb4] __delete_object+0x44/0x80
> [ 0.179917][ T0] [c000000015b27de0] [c00000000035a964] slab_free_freelist_hook+0x174/0x300
> [ 0.179921][ T0] [c000000015b27e50] [c00000000035f848] kfree+0xf8/0x500
> [ 0.179926][ T0] [c000000015b27ed0] [c000000000656878] free_cpumask_var+0x18/0x30
> [ 0.179931][ T0] [c000000015b27ef0] [c00000000003fff0] start_secondary+0x3a0/0x800
> add_cpu_to_masks at arch/powerpc/kernel/smp.c:1390
> (inlined by) start_secondary at arch/powerpc/kernel/smp.c:1420
> [ 0.179936][ T0] [c000000015b27f90] [c00000000000c454] start_secondary_prolog+0x10/0x14
> [ 0.955418][ T1] smp: Brought up 2 nodes, 128 CPUs
>
> == arm64 ==
> [ 0.473124][ T0] CPU1: Booted secondary processor 0x0000000100 [0x431f0af1]
> [ 0.473180][ C1]
> [ 0.473183][ C1] =============================
> [ 0.473186][ C1] WARNING: suspicious RCU usage
> [ 0.473188][ C1] 5.10.0-rc1-next-20201028+ #3 Not tainted
> [ 0.473190][ C1] -----------------------------
> [ 0.473193][ C1] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
> [ 0.473194][ C1]
> [ 0.473197][ C1] other info that might help us debug this:
> [ 0.473198][ C1]
> [ 0.473200][ C1]
> [ 0.473202][ C1] RCU used illegally from offline CPU!
> [ 0.473204][ C1] rcu_scheduler_active = 1, debug_locks = 1
> [ 0.473206][ C1] no locks held by swapper/1/0.
> [ 0.473208][ C1]
> [ 0.473210][ C1] stack backtrace:
> [ 0.473212][ C1] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-20201028+ #3
> [ 0.473215][ C1] Call trace:
> [ 0.473217][ C1] dump_backtrace+0x0/0x3c8
> [ 0.473219][ C1] show_stack+0x14/0x60
> [ 0.473221][ C1] dump_stack+0x14c/0x1c4
> [ 0.473223][ C1] lockdep_rcu_suspicious+0x134/0x14c
> [ 0.473225][ C1] __lock_acquire+0x1c30/0x2600
> [ 0.473227][ C1] lock_acquire+0x274/0xc48
> [ 0.473229][ C1] _raw_spin_lock+0xc8/0x140
> [ 0.473231][ C1] vprintk_emit+0x90/0x3d0
> [ 0.473233][ C1] vprintk_default+0x34/0x40
> [ 0.473235][ C1] vprintk_func+0x378/0x590
> [ 0.473236][ C1] printk+0xa8/0xd4
> [ 0.473239][ C1] __cpuinfo_store_cpu+0x71c/0x868
> [ 0.473241][ C1] cpuinfo_store_cpu+0x2c/0xc8
> [ 0.473243][ C1] secondary_start_kernel+0x244/0x318
> [ 0.547541][ T0] Detected PIPT I-cache on CPU2
> [ 0.547562][ T0] GICv3: CPU2: found redistributor 200 region 0:0x0000000401100000
>
> == s390 ==
> 00: [ 0.603404] WARNING: suspicious RCU usage
> 00: [ 0.603408] 5.10.0-rc1-next-20201027 #1 Not tainted
> 00: [ 0.603409] -----------------------------
> 00: [ 0.603459] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reade
> 00: r section!!
> 00: [ 0.603460]
> 00: [ 0.603460] other info that might help us debug this:
> 00: [ 0.603460]
> 00: [ 0.603462]
> 00: [ 0.603462] RCU used illegally from offline CPU!
> 00: [ 0.603462] rcu_scheduler_active = 1, debug_locks = 1
> 00: [ 0.603463] no locks held by swapper/1/0.
> 00: [ 0.603464]
> 00: [ 0.603464] stack backtrace:
> 00: [ 0.603467] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-202
> 00: 01027 #1
> 00: [ 0.603469] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> 00: [ 0.603471] Call Trace:
> 00: [ 0.603484] [<00000000d262a778>] show_stack+0x158/0x1f0
> 00: [ 0.603487] [<00000000d2635872>] dump_stack+0x1f2/0x238
> 00: [ 0.603491] [<00000000d167a550>] __lock_acquire+0x2640/0x4dd0
> 00: [ 0.603493] [<00000000d167eda8>] lock_acquire+0x3a8/0xd08
> 00: [ 0.603498] [<00000000d265b088>] _raw_spin_lock_irqsave+0xc0/0xf0
> 00: [ 0.603502] [<00000000d17103f8>] clockevents_register_device+0xa8/0x528
> 00:
> 00: [ 0.603516] [<00000000d14f5246>] init_cpu_timer+0x33e/0x468
> 00: [ 0.603521] [<00000000d151f44a>] smp_init_secondary+0x11a/0x328
> 00: [ 0.603525] [<00000000d151f32a>] smp_start_secondary+0x82/0x88
>

2020-10-28 22:11:22

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, 2020-10-28 at 08:53 -0700, Paul E. McKenney wrote:
> On Wed, Oct 28, 2020 at 10:39:47AM -0400, Qian Cai wrote:
> > On Tue, 2020-10-27 at 20:01 -0700, Paul E. McKenney wrote:
> > > If I have the right email thread associated with the right fixes, these
> > > commits in -rcu should be what you are looking for:
> > >
> > > 73b658b6b7d5 ("rcu: Prevent lockdep-RCU splats on lock
> > > acquisition/release")
> > > 626b79aa935a ("x86/smpboot: Move rcu_cpu_starting() earlier")
> > >
> > > And maybe this one as well:
> > >
> > > 3a6f638cb95b ("rcu,ftrace: Fix ftrace recursion")
> > >
> > > Please let me know if these commits do not fix things.
> > While those patches silence the warnings for x86. Other arches are still
> > suffering. It is only after applying the patch from Boqun below fixed
> > everything.
>
> Fair point!
>
> > Is it a good idea for Boqun to write a formal patch or we should fix all
> > arches
> > individually like "x86/smpboot: Move rcu_cpu_starting() earlier"?
>
> By Boqun's patch, you mean the change to debug_lockdep_rcu_enabled()
> shown below? Peter Zijlstra showed that real failures can happen, so we

Yes.

> do not want to cover them up. So we are firmly in "fix all architectures"
> space here, sorry!
>
> I am happy to accumulate those patches, but cannot commit to creating
> or testing them.

Okay, I posted 3 patches for each arch and CC'ed you. BTW, it looks like
something is wrong on @vger.kernel.org today where I received many of those,

4.7.1 Hello [216.205.24.124], for recipient address <[email protected]> the policy analysis reported: zpostgrey: connect: Connection refused

and I can see your previous mails did not even reach there either.

https://lore.kernel.org/lkml/



2020-10-29 00:54:31

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Wed, Oct 28, 2020 at 04:08:59PM -0400, Qian Cai wrote:
> On Wed, 2020-10-28 at 08:53 -0700, Paul E. McKenney wrote:
> > On Wed, Oct 28, 2020 at 10:39:47AM -0400, Qian Cai wrote:
> > > On Tue, 2020-10-27 at 20:01 -0700, Paul E. McKenney wrote:
> > > > If I have the right email thread associated with the right fixes, these
> > > > commits in -rcu should be what you are looking for:
> > > >
> > > > 73b658b6b7d5 ("rcu: Prevent lockdep-RCU splats on lock
> > > > acquisition/release")
> > > > 626b79aa935a ("x86/smpboot: Move rcu_cpu_starting() earlier")
> > > >
> > > > And maybe this one as well:
> > > >
> > > > 3a6f638cb95b ("rcu,ftrace: Fix ftrace recursion")
> > > >
> > > > Please let me know if these commits do not fix things.
> > > While those patches silence the warnings for x86. Other arches are still
> > > suffering. It is only after applying the patch from Boqun below fixed
> > > everything.
> >
> > Fair point!
> >
> > > Is it a good idea for Boqun to write a formal patch or we should fix all
> > > arches
> > > individually like "x86/smpboot: Move rcu_cpu_starting() earlier"?
> >
> > By Boqun's patch, you mean the change to debug_lockdep_rcu_enabled()
> > shown below? Peter Zijlstra showed that real failures can happen, so we
>
> Yes.
>
> > do not want to cover them up. So we are firmly in "fix all architectures"
> > space here, sorry!
> >
> > I am happy to accumulate those patches, but cannot commit to creating
> > or testing them.
>
> Okay, I posted 3 patches for each arch and CC'ed you.

Very good! I have acked them.

> BTW, it looks like
> something is wrong on @vger.kernel.org today where I received many of those,
>
> 4.7.1 Hello [216.205.24.124], for recipient address <[email protected]> the policy analysis reported: zpostgrey: connect: Connection refused
>
> and I can see your previous mails did not even reach there either.
>
> https://lore.kernel.org/lkml/

It does seem to be having some difficulty, and some people are looking
into it. Hopefully soon someone who can actually make the needed
changes. ;-)

Thanx, Paul

2020-10-29 00:56:30

by Qian Cai

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, 2020-10-27 at 20:01 -0700, Paul E. McKenney wrote:
> If I have the right email thread associated with the right fixes, these
> commits in -rcu should be what you are looking for:
>
> 73b658b6b7d5 ("rcu: Prevent lockdep-RCU splats on lock acquisition/release")
> 626b79aa935a ("x86/smpboot: Move rcu_cpu_starting() earlier")
>
> And maybe this one as well:
>
> 3a6f638cb95b ("rcu,ftrace: Fix ftrace recursion")
>
> Please let me know if these commits do not fix things.
While those patches silence the warnings for x86. Other arches are still
suffering. It is only after applying the patch from Boqun below fixed
everything.

Is it a good idea for Boqun to write a formal patch or we should fix all arches
individually like "x86/smpboot: Move rcu_cpu_starting() earlier"?

> > > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > > index 39334d2d2b37..35d9bab65b75 100644
> > > --- a/kernel/rcu/update.c
> > > +++ b/kernel/rcu/update.c
> > > @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
> > >
> > > noinstr int notrace debug_lockdep_rcu_enabled(void)
> > > {
> > > - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> > > - current->lockdep_recursion == 0;
> > > + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> > > + __lockdep_enabled;
> > > }
> > > EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);

The warnings for each arch are:

== powerpc ==
[ 0.176044][ T1] smp: Bringing up secondary CPUs ...
[ 0.179731][ T0]
[ 0.179734][ T0] =============================
[ 0.179736][ T0] WARNING: suspicious RCU usage
[ 0.179739][ T0] 5.10.0-rc1-next-20201028+ #2 Not tainted
[ 0.179741][ T0] -----------------------------
[ 0.179744][ T0] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
[ 0.179745][ T0]
[ 0.179745][ T0] other info that might help us debug this:
[ 0.179745][ T0]
[ 0.179748][ T0]
[ 0.179748][ T0] RCU used illegally from offline CPU!
[ 0.179748][ T0] rcu_scheduler_active = 1, debug_locks = 1
[ 0.179750][ T0] no locks held by swapper/1/0.
[ 0.179752][ T0]
[ 0.179752][ T0] stack backtrace:
[ 0.179757][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-20201028+ #2
[ 0.179759][ T0] Call Trace:
[ 0.179767][ T0] [c000000015b27ab0] [c000000000657188] dump_stack+0xec/0x144 (unreliable)
[ 0.179776][ T0] [c000000015b27af0] [c00000000014d0d4] lockdep_rcu_suspicious+0x128/0x14c
[ 0.179782][ T0] [c000000015b27b70] [c000000000148920] __lock_acquire+0x1060/0x1c60
[ 0.179788][ T0] [c000000015b27ca0] [c00000000014a1d0] lock_acquire+0x140/0x5f0
[ 0.179794][ T0] [c000000015b27d90] [c0000000008f22f4] _raw_spin_lock_irqsave+0x64/0xb0
[ 0.179801][ T0] [c000000015b27dd0] [c0000000001a1094] clockevents_register_device+0x74/0x270
[ 0.179808][ T0] [c000000015b27e80] [c00000000001f194] register_decrementer_clockevent+0x94/0x110
[ 0.179814][ T0] [c000000015b27ef0] [c00000000003fd84] start_secondary+0x134/0x800
[ 0.179819][ T0] [c000000015b27f90] [c00000000000c454] start_secondary_prolog+0x10/0x14
[ 0.179855][ T0]
[ 0.179857][ T0] =============================
[ 0.179858][ T0] WARNING: suspicious RCU usage
[ 0.179860][ T0] 5.10.0-rc1-next-20201028+ #2 Not tainted
[ 0.179862][ T0] -----------------------------
[ 0.179864][ T0] kernel/locking/lockdep.c:886 RCU-list traversed in non-reader section!!
[ 0.179866][ T0]
[ 0.179866][ T0] other info that might help us debug this:
[ 0.179866][ T0]
[ 0.179868][ T0]
[ 0.179868][ T0] RCU used illegally from offline CPU!
[ 0.179868][ T0] rcu_scheduler_active = 1, debug_locks = 1
[ 0.179870][ T0] no locks held by swapper/1/0.
[ 0.179871][ T0]
[ 0.179871][ T0] stack backtrace:
[ 0.179875][ T0] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-20201028+ #2
[ 0.179876][ T0] Call Trace:
[ 0.179880][ T0] [c000000015b27980] [c000000000657188] dump_stack+0xec/0x144 (unreliable)
[ 0.179886][ T0] [c000000015b279c0] [c00000000014d0d4] lockdep_rcu_suspicious+0x128/0x14c
[ 0.179892][ T0] [c000000015b27a40] [c00000000014b010] register_lock_class+0x680/0xc70
[ 0.179896][ T0] [c000000015b27b50] [c00000000014795c] __lock_acquire+0x9c/0x1c60
[ 0.179901][ T0] [c000000015b27c80] [c00000000014a1d0] lock_acquire+0x140/0x5f0
[ 0.179906][ T0] [c000000015b27d70] [c0000000008f22f4] _raw_spin_lock_irqsave+0x64/0xb0
[ 0.179912][ T0] [c000000015b27db0] [c0000000003a2fb4] __delete_object+0x44/0x80
[ 0.179917][ T0] [c000000015b27de0] [c00000000035a964] slab_free_freelist_hook+0x174/0x300
[ 0.179921][ T0] [c000000015b27e50] [c00000000035f848] kfree+0xf8/0x500
[ 0.179926][ T0] [c000000015b27ed0] [c000000000656878] free_cpumask_var+0x18/0x30
[ 0.179931][ T0] [c000000015b27ef0] [c00000000003fff0] start_secondary+0x3a0/0x800
add_cpu_to_masks at arch/powerpc/kernel/smp.c:1390
(inlined by) start_secondary at arch/powerpc/kernel/smp.c:1420
[ 0.179936][ T0] [c000000015b27f90] [c00000000000c454] start_secondary_prolog+0x10/0x14
[ 0.955418][ T1] smp: Brought up 2 nodes, 128 CPUs

== arm64 ==
[ 0.473124][ T0] CPU1: Booted secondary processor 0x0000000100 [0x431f0af1]
[ 0.473180][ C1]
[ 0.473183][ C1] =============================
[ 0.473186][ C1] WARNING: suspicious RCU usage
[ 0.473188][ C1] 5.10.0-rc1-next-20201028+ #3 Not tainted
[ 0.473190][ C1] -----------------------------
[ 0.473193][ C1] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!!
[ 0.473194][ C1]
[ 0.473197][ C1] other info that might help us debug this:
[ 0.473198][ C1]
[ 0.473200][ C1]
[ 0.473202][ C1] RCU used illegally from offline CPU!
[ 0.473204][ C1] rcu_scheduler_active = 1, debug_locks = 1
[ 0.473206][ C1] no locks held by swapper/1/0.
[ 0.473208][ C1]
[ 0.473210][ C1] stack backtrace:
[ 0.473212][ C1] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-20201028+ #3
[ 0.473215][ C1] Call trace:
[ 0.473217][ C1] dump_backtrace+0x0/0x3c8
[ 0.473219][ C1] show_stack+0x14/0x60
[ 0.473221][ C1] dump_stack+0x14c/0x1c4
[ 0.473223][ C1] lockdep_rcu_suspicious+0x134/0x14c
[ 0.473225][ C1] __lock_acquire+0x1c30/0x2600
[ 0.473227][ C1] lock_acquire+0x274/0xc48
[ 0.473229][ C1] _raw_spin_lock+0xc8/0x140
[ 0.473231][ C1] vprintk_emit+0x90/0x3d0
[ 0.473233][ C1] vprintk_default+0x34/0x40
[ 0.473235][ C1] vprintk_func+0x378/0x590
[ 0.473236][ C1] printk+0xa8/0xd4
[ 0.473239][ C1] __cpuinfo_store_cpu+0x71c/0x868
[ 0.473241][ C1] cpuinfo_store_cpu+0x2c/0xc8
[ 0.473243][ C1] secondary_start_kernel+0x244/0x318
[ 0.547541][ T0] Detected PIPT I-cache on CPU2
[ 0.547562][ T0] GICv3: CPU2: found redistributor 200 region 0:0x0000000401100000

== s390 ==
00: [ 0.603404] WARNING: suspicious RCU usage
00: [ 0.603408] 5.10.0-rc1-next-20201027 #1 Not tainted
00: [ 0.603409] -----------------------------
00: [ 0.603459] kernel/locking/lockdep.c:3497 RCU-list traversed in non-reade
00: r section!!
00: [ 0.603460]
00: [ 0.603460] other info that might help us debug this:
00: [ 0.603460]
00: [ 0.603462]
00: [ 0.603462] RCU used illegally from offline CPU!
00: [ 0.603462] rcu_scheduler_active = 1, debug_locks = 1
00: [ 0.603463] no locks held by swapper/1/0.
00: [ 0.603464]
00: [ 0.603464] stack backtrace:
00: [ 0.603467] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.10.0-rc1-next-202
00: 01027 #1
00: [ 0.603469] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
00: [ 0.603471] Call Trace:
00: [ 0.603484] [<00000000d262a778>] show_stack+0x158/0x1f0
00: [ 0.603487] [<00000000d2635872>] dump_stack+0x1f2/0x238
00: [ 0.603491] [<00000000d167a550>] __lock_acquire+0x2640/0x4dd0
00: [ 0.603493] [<00000000d167eda8>] lock_acquire+0x3a8/0xd08
00: [ 0.603498] [<00000000d265b088>] _raw_spin_lock_irqsave+0xc0/0xf0
00: [ 0.603502] [<00000000d17103f8>] clockevents_register_device+0xa8/0x528
00:
00: [ 0.603516] [<00000000d14f5246>] init_cpu_timer+0x33e/0x468
00: [ 0.603521] [<00000000d151f44a>] smp_init_secondary+0x11a/0x328
00: [ 0.603525] [<00000000d151f32a>] smp_start_secondary+0x82/0x88

2020-10-29 08:45:04

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: locking/core] lockdep: Fix lockdep recursion

On Tue, Oct 27, 2020 at 03:31:41PM -0400, Qian Cai wrote:
> On Mon, 2020-10-12 at 11:11 +0800, Boqun Feng wrote:
> > Hi,
> >
> > On Fri, Oct 09, 2020 at 09:41:24AM -0400, Qian Cai wrote:
> > > On Fri, 2020-10-09 at 07:58 +0000, tip-bot2 for Peter Zijlstra wrote:
> > > > The following commit has been merged into the locking/core branch of tip:
> > > >
> > > > Commit-ID: 4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Gitweb:
> > > > https://git.kernel.org/tip/4d004099a668c41522242aa146a38cc4eb59cb1e
> > > > Author: Peter Zijlstra <[email protected]>
> > > > AuthorDate: Fri, 02 Oct 2020 11:04:21 +02:00
> > > > Committer: Ingo Molnar <[email protected]>
> > > > CommitterDate: Fri, 09 Oct 2020 08:53:30 +02:00
> > > >
> > > > lockdep: Fix lockdep recursion
> > > >
> > > > Steve reported that lockdep_assert*irq*(), when nested inside lockdep
> > > > itself, will trigger a false-positive.
> > > >
> > > > One example is the stack-trace code, as called from inside lockdep,
> > > > triggering tracing, which in turn calls RCU, which then uses
> > > > lockdep_assert_irqs_disabled().
> > > >
> > > > Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-
> > > > cpu
> > > > variables")
> > > > Reported-by: Steven Rostedt <[email protected]>
> > > > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > > > Signed-off-by: Ingo Molnar <[email protected]>
> > >
> > > Reverting this linux-next commit fixed booting RCU-list warnings everywhere.
> > >
> >
> > I think this happened because in this commit debug_lockdep_rcu_enabled()
> > didn't adopt to the change that made lockdep_recursion a percpu
> > variable?
> >
> > Qian, mind to try the following?
>
> Boqun, Paul, may I ask what's the latest with the fixes? I must admit that I got
> lost in this thread, but I remember that the patch from Boqun below at least
> silence quite some of those warnings if not all. The problem is that some of
> those warnings would trigger a lockdep circular locks warning due to printk()
> with some locks held which in turn disabling the lockdep, makes our test runs
> inefficient.

If I have the right email thread associated with the right fixes, these
commits in -rcu should be what you are looking for:

73b658b6b7d5 ("rcu: Prevent lockdep-RCU splats on lock acquisition/release")
626b79aa935a ("x86/smpboot: Move rcu_cpu_starting() earlier")

And maybe this one as well:

3a6f638cb95b ("rcu,ftrace: Fix ftrace recursion")

Please let me know if these commits do not fix things.

Thanx, Paul

> > Although, arguably the problem still exists, i.e. we still have an RCU
> > read-side critical section inside lock_acquire(), which may be called on
> > a yet-to-online CPU, which RCU doesn't watch. I think this used to be OK
> > because we don't "free" anything from lockdep, IOW, there is no
> > synchronize_rcu() or call_rcu() that _needs_ to wait for the RCU
> > read-side critical sections inside lockdep. But now we lock class
> > recycling, so it might be a problem.
> >
> > That said, currently validate_chain() and lock class recycling are
> > mutually excluded via graph_lock, so we are safe for this one ;-)
> >
> > ----------->8
> > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > index 39334d2d2b37..35d9bab65b75 100644
> > --- a/kernel/rcu/update.c
> > +++ b/kernel/rcu/update.c
> > @@ -275,8 +275,8 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
> >
> > noinstr int notrace debug_lockdep_rcu_enabled(void)
> > {
> > - return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> > - current->lockdep_recursion == 0;
> > + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE &&
> > + __lockdep_enabled;
> > }
> > EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
>
>