If an interrupt is not masked by local_irq_disable (e.g., a powerpc perf
interrupt), then it can hit in local_irq_enable() after trace_hardirqs_on()
and before raw_local_irq_enable().
If that interrupt handler calls local_irq_save(), it will call
trace_hardirqs_off() but the local_irq_restore() will not call
trace_hardirqs_on() again because raw_irqs_disabled_flags(flags) is true.
This can lead lockdep_assert_irqs_enabled() to trigger false positive
warnings.
Fix this by being careful to only enable and disable trace_hardirqs with
the outer-most irq enable/disable.
Reported-by: Alexey Kardashevskiy <[email protected]>
Signed-off-by: Nicholas Piggin <[email protected]>
---
I haven't tested on other architectures but I imagine NMIs in general
might cause a similar problem.
Other architectures might have to be updated for patch 2, but there's
a lot of asm around interrupt/return, so I didn't have a very good
lock. The warnings should be harmless enough and uncover most places
that need updating.
arch/powerpc/include/asm/hw_irq.h | 11 ++++-------
include/linux/irqflags.h | 29 ++++++++++++++++++-----------
2 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3a0db7b0b46e..35060be09073 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
#define powerpc_local_irq_pmu_save(flags) \
do { \
raw_local_irq_pmu_save(flags); \
- trace_hardirqs_off(); \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while(0)
#define powerpc_local_irq_pmu_restore(flags) \
do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_pmu_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
+ if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_on(); \
- raw_local_irq_pmu_restore(flags); \
- } \
+ raw_local_irq_pmu_restore(flags); \
} while(0)
#else
#define powerpc_local_irq_pmu_save(flags) \
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 6384d2813ded..571ee29ecefc 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -163,26 +163,33 @@ do { \
* if !TRACE_IRQFLAGS.
*/
#ifdef CONFIG_TRACE_IRQFLAGS
-#define local_irq_enable() \
- do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
-#define local_irq_disable() \
- do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
+#define local_irq_enable() \
+ do { \
+ trace_hardirqs_on(); \
+ raw_local_irq_enable(); \
+ } while (0)
+
+#define local_irq_disable() \
+ do { \
+ bool was_disabled = raw_irqs_disabled(); \
+ raw_local_irq_disable(); \
+ if (!was_disabled) \
+ trace_hardirqs_off(); \
+ } while (0)
+
#define local_irq_save(flags) \
do { \
raw_local_irq_save(flags); \
- trace_hardirqs_off(); \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while (0)
#define local_irq_restore(flags) \
do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
+ if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_on(); \
- raw_local_irq_restore(flags); \
- } \
+ raw_local_irq_restore(flags); \
} while (0)
#define safe_halt() \
--
2.23.0
With the previous patch, lockdep hardirq state changes should not be
redundant. Softirq state changes already follow that pattern.
So warn on unexpected enable-when-enabled or disable-when-disabled
conditions, to catch possible errors or sloppy patterns that could
lead to similar bad behavior due to NMIs etc.
Signed-off-by: Nicholas Piggin <[email protected]>
---
kernel/locking/lockdep.c | 80 +++++++++++++-----------------
kernel/locking/lockdep_internals.h | 4 --
kernel/locking/lockdep_proc.c | 10 +---
3 files changed, 35 insertions(+), 59 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 29a8de4c50b9..138458fb2234 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3649,15 +3649,8 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
- if (unlikely(current->hardirqs_enabled)) {
- /*
- * Neither irq nor preemption are disabled here
- * so this is racy by nature but losing one hit
- * in a stat is not a big deal.
- */
- __debug_atomic_inc(redundant_hardirqs_on);
+ if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled))
return;
- }
/*
* We're enabling irqs and according to our state above irqs weren't
@@ -3695,15 +3688,8 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
if (unlikely(!debug_locks || curr->lockdep_recursion))
return;
- if (curr->hardirqs_enabled) {
- /*
- * Neither irq nor preemption are disabled here
- * so this is racy by nature but losing one hit
- * in a stat is not a big deal.
- */
- __debug_atomic_inc(redundant_hardirqs_on);
+ if (DEBUG_LOCKS_WARN_ON(curr->hardirqs_enabled))
return;
- }
/*
* We're enabling irqs and according to our state above irqs weren't
@@ -3738,6 +3724,9 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
if (unlikely(!debug_locks || curr->lockdep_recursion))
return;
+ if (DEBUG_LOCKS_WARN_ON(!curr->hardirqs_enabled))
+ return;
+
/*
* So we're supposed to get called after you mask local IRQs, but for
* some reason the hardware doesn't quite think you did a proper job.
@@ -3745,17 +3734,13 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->hardirqs_enabled) {
- /*
- * We have done an ON -> OFF transition:
- */
- curr->hardirqs_enabled = 0;
- curr->hardirq_disable_ip = ip;
- curr->hardirq_disable_event = ++curr->irq_events;
- debug_atomic_inc(hardirqs_off_events);
- } else {
- debug_atomic_inc(redundant_hardirqs_off);
- }
+ /*
+ * We have done an ON -> OFF transition:
+ */
+ curr->hardirqs_enabled = 0;
+ curr->hardirq_disable_ip = ip;
+ curr->hardirq_disable_event = ++curr->irq_events;
+ debug_atomic_inc(hardirqs_off_events);
}
EXPORT_SYMBOL_GPL(lockdep_hardirqs_off);
@@ -3769,6 +3754,9 @@ void lockdep_softirqs_on(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
+ if (DEBUG_LOCKS_WARN_ON(curr->softirqs_enabled))
+ return;
+
/*
* We fancy IRQs being disabled here, see softirq.c, avoids
* funny state and nesting things.
@@ -3776,11 +3764,6 @@ void lockdep_softirqs_on(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->softirqs_enabled) {
- debug_atomic_inc(redundant_softirqs_on);
- return;
- }
-
current->lockdep_recursion++;
/*
* We'll do an OFF -> ON transition:
@@ -3809,26 +3792,26 @@ void lockdep_softirqs_off(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
+ if (DEBUG_LOCKS_WARN_ON(!curr->softirqs_enabled))
+ return;
+
/*
* We fancy IRQs being disabled here, see softirq.c
*/
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
- if (curr->softirqs_enabled) {
- /*
- * We have done an ON -> OFF transition:
- */
- curr->softirqs_enabled = 0;
- curr->softirq_disable_ip = ip;
- curr->softirq_disable_event = ++curr->irq_events;
- debug_atomic_inc(softirqs_off_events);
- /*
- * Whoops, we wanted softirqs off, so why aren't they?
- */
- DEBUG_LOCKS_WARN_ON(!softirq_count());
- } else
- debug_atomic_inc(redundant_softirqs_off);
+ /*
+ * We have done an ON -> OFF transition:
+ */
+ curr->softirqs_enabled = 0;
+ curr->softirq_disable_ip = ip;
+ curr->softirq_disable_event = ++curr->irq_events;
+ debug_atomic_inc(softirqs_off_events);
+ /*
+ * Whoops, we wanted softirqs off, so why aren't they?
+ */
+ DEBUG_LOCKS_WARN_ON(!softirq_count());
}
static int
@@ -5684,6 +5667,11 @@ void __init lockdep_init(void)
printk(" per task-struct memory footprint: %zu bytes\n",
sizeof(((struct task_struct *)NULL)->held_locks));
+
+ WARN_ON(irqs_disabled());
+
+ current->hardirqs_enabled = 1;
+ current->softirqs_enabled = 1;
}
static void
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index baca699b94e9..6dd8b1f06dc4 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -180,12 +180,8 @@ struct lockdep_stats {
unsigned int chain_lookup_misses;
unsigned long hardirqs_on_events;
unsigned long hardirqs_off_events;
- unsigned long redundant_hardirqs_on;
- unsigned long redundant_hardirqs_off;
unsigned long softirqs_on_events;
unsigned long softirqs_off_events;
- unsigned long redundant_softirqs_on;
- unsigned long redundant_softirqs_off;
int nr_unused_locks;
unsigned int nr_redundant_checks;
unsigned int nr_redundant;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 5525cd3ba0c8..98f204220ed9 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -172,12 +172,8 @@ static void lockdep_stats_debug_show(struct seq_file *m)
#ifdef CONFIG_DEBUG_LOCKDEP
unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
hi2 = debug_atomic_read(hardirqs_off_events),
- hr1 = debug_atomic_read(redundant_hardirqs_on),
- hr2 = debug_atomic_read(redundant_hardirqs_off),
si1 = debug_atomic_read(softirqs_on_events),
- si2 = debug_atomic_read(softirqs_off_events),
- sr1 = debug_atomic_read(redundant_softirqs_on),
- sr2 = debug_atomic_read(redundant_softirqs_off);
+ si2 = debug_atomic_read(softirqs_off_events);
seq_printf(m, " chain lookup misses: %11llu\n",
debug_atomic_read(chain_lookup_misses));
@@ -196,12 +192,8 @@ static void lockdep_stats_debug_show(struct seq_file *m)
seq_printf(m, " hardirq on events: %11llu\n", hi1);
seq_printf(m, " hardirq off events: %11llu\n", hi2);
- seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
- seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
seq_printf(m, " softirq on events: %11llu\n", si1);
seq_printf(m, " softirq off events: %11llu\n", si2);
- seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
- seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
#endif
}
--
2.23.0
On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index 3a0db7b0b46e..35060be09073 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
> #define powerpc_local_irq_pmu_save(flags) \
> do { \
> raw_local_irq_pmu_save(flags); \
> - trace_hardirqs_off(); \
> + if (!raw_irqs_disabled_flags(flags)) \
> + trace_hardirqs_off(); \
> } while(0)
> #define powerpc_local_irq_pmu_restore(flags) \
> do { \
> - if (raw_irqs_disabled_flags(flags)) { \
> - raw_local_irq_pmu_restore(flags); \
> - trace_hardirqs_off(); \
> - } else { \
> + if (!raw_irqs_disabled_flags(flags)) \
> trace_hardirqs_on(); \
> - raw_local_irq_pmu_restore(flags); \
> - } \
> + raw_local_irq_pmu_restore(flags); \
> } while(0)
You shouldn't be calling lockdep from NMI context! That is, I recently
added suport for that on x86:
https://lkml.kernel.org/r/[email protected]
https://lkml.kernel.org/r/[email protected]
But you need to be very careful on how you order things, as you can see
the above relies on preempt_count() already having been incremented with
NMI_MASK.
Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
> On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>
>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>> index 3a0db7b0b46e..35060be09073 100644
>> --- a/arch/powerpc/include/asm/hw_irq.h
>> +++ b/arch/powerpc/include/asm/hw_irq.h
>> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>> #define powerpc_local_irq_pmu_save(flags) \
>> do { \
>> raw_local_irq_pmu_save(flags); \
>> - trace_hardirqs_off(); \
>> + if (!raw_irqs_disabled_flags(flags)) \
>> + trace_hardirqs_off(); \
>> } while(0)
>> #define powerpc_local_irq_pmu_restore(flags) \
>> do { \
>> - if (raw_irqs_disabled_flags(flags)) { \
>> - raw_local_irq_pmu_restore(flags); \
>> - trace_hardirqs_off(); \
>> - } else { \
>> + if (!raw_irqs_disabled_flags(flags)) \
>> trace_hardirqs_on(); \
>> - raw_local_irq_pmu_restore(flags); \
>> - } \
>> + raw_local_irq_pmu_restore(flags); \
>> } while(0)
>
> You shouldn't be calling lockdep from NMI context!
After this patch it doesn't.
trace_hardirqs_on/off implementation appears to expect to be called in NMI
context though, for some reason.
> That is, I recently
> added suport for that on x86:
>
> https://lkml.kernel.org/r/[email protected]
> https://lkml.kernel.org/r/[email protected]
>
> But you need to be very careful on how you order things, as you can see
> the above relies on preempt_count() already having been incremented with
> NMI_MASK.
Hmm. My patch seems simpler.
I don't know this stuff very well, I don't really understand what your patch
enables for x86 but at least it shouldn't be incompatible with this one
AFAIKS.
Thanks,
Nick
On Thu, Jul 23, 2020 at 11:11:03PM +1000, Nicholas Piggin wrote:
> Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
> > On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
> >
> >> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> >> index 3a0db7b0b46e..35060be09073 100644
> >> --- a/arch/powerpc/include/asm/hw_irq.h
> >> +++ b/arch/powerpc/include/asm/hw_irq.h
> >> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
> >> #define powerpc_local_irq_pmu_save(flags) \
> >> do { \
> >> raw_local_irq_pmu_save(flags); \
> >> - trace_hardirqs_off(); \
> >> + if (!raw_irqs_disabled_flags(flags)) \
> >> + trace_hardirqs_off(); \
> >> } while(0)
> >> #define powerpc_local_irq_pmu_restore(flags) \
> >> do { \
> >> - if (raw_irqs_disabled_flags(flags)) { \
> >> - raw_local_irq_pmu_restore(flags); \
> >> - trace_hardirqs_off(); \
> >> - } else { \
> >> + if (!raw_irqs_disabled_flags(flags)) \
> >> trace_hardirqs_on(); \
> >> - raw_local_irq_pmu_restore(flags); \
> >> - } \
> >> + raw_local_irq_pmu_restore(flags); \
> >> } while(0)
> >
> > You shouldn't be calling lockdep from NMI context!
>
> After this patch it doesn't.
You sure, trace_hardirqs_{on,off}() calls into lockdep. (FWIW they're
also broken vs entry ordering, but that's another story).
> trace_hardirqs_on/off implementation appears to expect to be called in NMI
> context though, for some reason.
Hurpm, not sure.. I'll have to go grep arch code now :/ The generic NMI
code didn't touch that stuff.
Argh, yes, there might be broken there... damn! I'll go frob around.
Excerpts from Peter Zijlstra's message of July 24, 2020 12:59 am:
> On Thu, Jul 23, 2020 at 11:11:03PM +1000, Nicholas Piggin wrote:
>> Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
>> > On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>> >
>> >> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>> >> index 3a0db7b0b46e..35060be09073 100644
>> >> --- a/arch/powerpc/include/asm/hw_irq.h
>> >> +++ b/arch/powerpc/include/asm/hw_irq.h
>> >> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>> >> #define powerpc_local_irq_pmu_save(flags) \
>> >> do { \
>> >> raw_local_irq_pmu_save(flags); \
>> >> - trace_hardirqs_off(); \
>> >> + if (!raw_irqs_disabled_flags(flags)) \
>> >> + trace_hardirqs_off(); \
>> >> } while(0)
>> >> #define powerpc_local_irq_pmu_restore(flags) \
>> >> do { \
>> >> - if (raw_irqs_disabled_flags(flags)) { \
>> >> - raw_local_irq_pmu_restore(flags); \
>> >> - trace_hardirqs_off(); \
>> >> - } else { \
>> >> + if (!raw_irqs_disabled_flags(flags)) \
>> >> trace_hardirqs_on(); \
>> >> - raw_local_irq_pmu_restore(flags); \
>> >> - } \
>> >> + raw_local_irq_pmu_restore(flags); \
>> >> } while(0)
>> >
>> > You shouldn't be calling lockdep from NMI context!
>>
>> After this patch it doesn't.
>
> You sure, trace_hardirqs_{on,off}() calls into lockdep.
At least for irq enable/disable functions yes. NMI runs with
interrupts disabled so these will never call trace_hardirqs_on/off
after this patch.
> (FWIW they're
> also broken vs entry ordering, but that's another story).
>
>> trace_hardirqs_on/off implementation appears to expect to be called in NMI
>> context though, for some reason.
>
> Hurpm, not sure.. I'll have to go grep arch code now :/ The generic NMI
> code didn't touch that stuff.
>
> Argh, yes, there might be broken there... damn! I'll go frob around.
>
Thanks,
Nick
Hi Nicholas,
I love your patch! Yet something to improve:
[auto build test ERROR on linux/master]
[also build test ERROR on powerpc/next linus/master v5.8-rc6 next-20200723]
[cannot apply to tip/locking/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/lockdep-improve-current-hard-soft-irqs_enabled-synchronisation-with-actual-irq-state/20200723-185938
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 9ebcfadb0610322ac537dd7aa5d9cbc2b2894c68
config: nds32-allyesconfig (attached as .config)
compiler: nds32le-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nds32
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
In file included from include/asm-generic/bitops.h:14,
from ./arch/nds32/include/generated/asm/bitops.h:1,
from include/linux/bitops.h:29,
from include/linux/kernel.h:12,
from include/linux/list.h:9,
from include/linux/rculist.h:10,
from include/linux/pid.h:5,
from include/linux/sched.h:14,
from arch/nds32/kernel/asm-offsets.c:4:
include/linux/spinlock_api_smp.h: In function '__raw_spin_lock_irq':
>> include/linux/irqflags.h:158:31: error: implicit declaration of function 'arch_irqs_disabled'; did you mean 'raw_irqs_disabled'? [-Werror=implicit-function-declaration]
158 | #define raw_irqs_disabled() (arch_irqs_disabled())
| ^~~~~~~~~~~~~~~~~~
include/linux/irqflags.h:174:23: note: in expansion of macro 'raw_irqs_disabled'
174 | bool was_disabled = raw_irqs_disabled(); \
| ^~~~~~~~~~~~~~~~~
include/linux/spinlock_api_smp.h:126:2: note: in expansion of macro 'local_irq_disable'
126 | local_irq_disable();
| ^~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
make[2]: *** [scripts/Makefile.build:114: arch/nds32/kernel/asm-offsets.s] Error 1
make[2]: Target '__build' not remade because of errors.
make[1]: *** [Makefile:1175: prepare0] Error 2
make[1]: Target 'prepare' not remade because of errors.
make: *** [Makefile:185: __sub-make] Error 2
make: Target 'prepare' not remade because of errors.
vim +158 include/linux/irqflags.h
81d68a96a398448 Steven Rostedt 2008-05-12 132
df9ee29270c11db David Howells 2010-10-07 133 /*
df9ee29270c11db David Howells 2010-10-07 134 * Wrap the arch provided IRQ routines to provide appropriate checks.
df9ee29270c11db David Howells 2010-10-07 135 */
df9ee29270c11db David Howells 2010-10-07 136 #define raw_local_irq_disable() arch_local_irq_disable()
df9ee29270c11db David Howells 2010-10-07 137 #define raw_local_irq_enable() arch_local_irq_enable()
df9ee29270c11db David Howells 2010-10-07 138 #define raw_local_irq_save(flags) \
df9ee29270c11db David Howells 2010-10-07 139 do { \
df9ee29270c11db David Howells 2010-10-07 140 typecheck(unsigned long, flags); \
df9ee29270c11db David Howells 2010-10-07 141 flags = arch_local_irq_save(); \
df9ee29270c11db David Howells 2010-10-07 142 } while (0)
df9ee29270c11db David Howells 2010-10-07 143 #define raw_local_irq_restore(flags) \
df9ee29270c11db David Howells 2010-10-07 144 do { \
df9ee29270c11db David Howells 2010-10-07 145 typecheck(unsigned long, flags); \
df9ee29270c11db David Howells 2010-10-07 146 arch_local_irq_restore(flags); \
df9ee29270c11db David Howells 2010-10-07 147 } while (0)
df9ee29270c11db David Howells 2010-10-07 148 #define raw_local_save_flags(flags) \
df9ee29270c11db David Howells 2010-10-07 149 do { \
df9ee29270c11db David Howells 2010-10-07 150 typecheck(unsigned long, flags); \
df9ee29270c11db David Howells 2010-10-07 151 flags = arch_local_save_flags(); \
df9ee29270c11db David Howells 2010-10-07 152 } while (0)
df9ee29270c11db David Howells 2010-10-07 153 #define raw_irqs_disabled_flags(flags) \
df9ee29270c11db David Howells 2010-10-07 154 ({ \
df9ee29270c11db David Howells 2010-10-07 155 typecheck(unsigned long, flags); \
df9ee29270c11db David Howells 2010-10-07 156 arch_irqs_disabled_flags(flags); \
df9ee29270c11db David Howells 2010-10-07 157 })
df9ee29270c11db David Howells 2010-10-07 @158 #define raw_irqs_disabled() (arch_irqs_disabled())
df9ee29270c11db David Howells 2010-10-07 159 #define raw_safe_halt() arch_safe_halt()
de30a2b355ea853 Ingo Molnar 2006-07-03 160
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Nicholas,
I love your patch! Yet something to improve:
[auto build test ERROR on linux/master]
[also build test ERROR on powerpc/next linus/master v5.8-rc6]
[cannot apply to tip/locking/core next-20200723]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/lockdep-improve-current-hard-soft-irqs_enabled-synchronisation-with-actual-irq-state/20200723-185938
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 9ebcfadb0610322ac537dd7aa5d9cbc2b2894c68
config: x86_64-randconfig-a002-20200723 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-14) 9.3.0
reproduce (this is a W=1 build):
# save the attached .config to linux build tree
make W=1 ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
kernel/locking/lockdep.c: In function 'lockdep_init':
>> kernel/locking/lockdep.c:5673:9: error: 'struct task_struct' has no member named 'hardirqs_enabled'
5673 | current->hardirqs_enabled = 1;
| ^~
>> kernel/locking/lockdep.c:5674:9: error: 'struct task_struct' has no member named 'softirqs_enabled'
5674 | current->softirqs_enabled = 1;
| ^~
In file included from kernel/locking/lockdep.c:60:
At top level:
kernel/locking/lockdep_internals.h:64:28: warning: 'LOCKF_USED_IN_IRQ_READ' defined but not used [-Wunused-const-variable=]
64 | static const unsigned long LOCKF_USED_IN_IRQ_READ =
| ^~~~~~~~~~~~~~~~~~~~~~
In file included from kernel/locking/lockdep.c:60:
kernel/locking/lockdep_internals.h:58:28: warning: 'LOCKF_ENABLED_IRQ_READ' defined but not used [-Wunused-const-variable=]
58 | static const unsigned long LOCKF_ENABLED_IRQ_READ =
| ^~~~~~~~~~~~~~~~~~~~~~
In file included from kernel/locking/lockdep.c:60:
kernel/locking/lockdep_internals.h:52:28: warning: 'LOCKF_USED_IN_IRQ' defined but not used [-Wunused-const-variable=]
52 | static const unsigned long LOCKF_USED_IN_IRQ =
| ^~~~~~~~~~~~~~~~~
In file included from kernel/locking/lockdep.c:60:
kernel/locking/lockdep_internals.h:46:28: warning: 'LOCKF_ENABLED_IRQ' defined but not used [-Wunused-const-variable=]
46 | static const unsigned long LOCKF_ENABLED_IRQ =
| ^~~~~~~~~~~~~~~~~
vim +5673 kernel/locking/lockdep.c
5667
5668 printk(" per task-struct memory footprint: %zu bytes\n",
5669 sizeof(((struct task_struct *)NULL)->held_locks));
5670
5671 WARN_ON(irqs_disabled());
5672
> 5673 current->hardirqs_enabled = 1;
> 5674 current->softirqs_enabled = 1;
5675 }
5676
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Nicholas,
I love your patch! Perhaps something to improve:
[auto build test WARNING on linux/master]
[also build test WARNING on powerpc/next linus/master v5.8-rc6 next-20200723]
[cannot apply to tip/locking/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/lockdep-improve-current-hard-soft-irqs_enabled-synchronisation-with-actual-irq-state/20200723-185938
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 9ebcfadb0610322ac537dd7aa5d9cbc2b2894c68
config: i386-randconfig-s001-20200723 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-14) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.2-93-g4c6cbe55-dirty
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
sparse warnings: (new ones prefixed by >>)
kernel/locking/spinlock.c:149:17: sparse: sparse: context imbalance in '_raw_spin_lock' - wrong count at exit
kernel/locking/spinlock.c: note: in included file (through include/linux/preempt.h):
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_spin_lock_irqsave' - wrong count at exit
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_spin_lock_irq' - wrong count at exit
kernel/locking/spinlock.c:173:17: sparse: sparse: context imbalance in '_raw_spin_lock_bh' - wrong count at exit
kernel/locking/spinlock.c:181:17: sparse: sparse: context imbalance in '_raw_spin_unlock' - unexpected unlock
kernel/locking/spinlock.c:189:17: sparse: sparse: context imbalance in '_raw_spin_unlock_irqrestore' - unexpected unlock
kernel/locking/spinlock.c:197:17: sparse: sparse: context imbalance in '_raw_spin_unlock_irq' - unexpected unlock
kernel/locking/spinlock.c:205:17: sparse: sparse: context imbalance in '_raw_spin_unlock_bh' - unexpected unlock
kernel/locking/spinlock.c:221:17: sparse: sparse: context imbalance in '_raw_read_lock' - wrong count at exit
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_read_lock_irqsave' - wrong count at exit
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_read_lock_irq' - wrong count at exit
kernel/locking/spinlock.c:245:17: sparse: sparse: context imbalance in '_raw_read_lock_bh' - wrong count at exit
kernel/locking/spinlock.c:253:17: sparse: sparse: context imbalance in '_raw_read_unlock' - unexpected unlock
kernel/locking/spinlock.c:261:17: sparse: sparse: context imbalance in '_raw_read_unlock_irqrestore' - unexpected unlock
kernel/locking/spinlock.c:269:17: sparse: sparse: context imbalance in '_raw_read_unlock_irq' - unexpected unlock
kernel/locking/spinlock.c:277:17: sparse: sparse: context imbalance in '_raw_read_unlock_bh' - unexpected unlock
kernel/locking/spinlock.c:293:17: sparse: sparse: context imbalance in '_raw_write_lock' - wrong count at exit
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_write_lock_irqsave' - wrong count at exit
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_write_lock_irq' - wrong count at exit
kernel/locking/spinlock.c:317:17: sparse: sparse: context imbalance in '_raw_write_lock_bh' - wrong count at exit
kernel/locking/spinlock.c:325:17: sparse: sparse: context imbalance in '_raw_write_unlock' - unexpected unlock
kernel/locking/spinlock.c:333:17: sparse: sparse: context imbalance in '_raw_write_unlock_irqrestore' - unexpected unlock
kernel/locking/spinlock.c:341:17: sparse: sparse: context imbalance in '_raw_write_unlock_irq' - unexpected unlock
kernel/locking/spinlock.c:349:17: sparse: sparse: context imbalance in '_raw_write_unlock_bh' - unexpected unlock
kernel/locking/spinlock.c:358:17: sparse: sparse: context imbalance in '_raw_spin_lock_nested' - wrong count at exit
>> arch/x86/include/asm/preempt.h:79:9: sparse: sparse: context imbalance in '_raw_spin_lock_irqsave_nested' - wrong count at exit
kernel/locking/spinlock.c:380:17: sparse: sparse: context imbalance in '_raw_spin_lock_nest_lock' - wrong count at exit
--
kernel/trace/ring_buffer.c:699:32: sparse: sparse: incorrect type in return expression (different base types) @@ expected restricted __poll_t @@ got int @@
kernel/trace/ring_buffer.c:699:32: sparse: expected restricted __poll_t
kernel/trace/ring_buffer.c:699:32: sparse: got int
kernel/trace/ring_buffer.c: note: in included file (through include/linux/irqflags.h, arch/x86/include/asm/special_insns.h, arch/x86/include/asm/processor.h, ...):
>> arch/x86/include/asm/irqflags.h:162:28: sparse: sparse: context imbalance in 'ring_buffer_peek' - different lock contexts for basic block
>> arch/x86/include/asm/irqflags.h:162:28: sparse: sparse: context imbalance in 'ring_buffer_consume' - different lock contexts for basic block
>> arch/x86/include/asm/irqflags.h:162:28: sparse: sparse: context imbalance in 'ring_buffer_empty' - different lock contexts for basic block
>> arch/x86/include/asm/irqflags.h:162:28: sparse: sparse: context imbalance in 'ring_buffer_empty_cpu' - different lock contexts for basic block
vim +/_raw_spin_lock_irqsave +79 arch/x86/include/asm/preempt.h
c2daa3bed53a811 Peter Zijlstra 2013-08-14 72
c2daa3bed53a811 Peter Zijlstra 2013-08-14 73 /*
c2daa3bed53a811 Peter Zijlstra 2013-08-14 74 * The various preempt_count add/sub methods
c2daa3bed53a811 Peter Zijlstra 2013-08-14 75 */
c2daa3bed53a811 Peter Zijlstra 2013-08-14 76
c2daa3bed53a811 Peter Zijlstra 2013-08-14 77 static __always_inline void __preempt_count_add(int val)
c2daa3bed53a811 Peter Zijlstra 2013-08-14 78 {
b3ca1c10d7b32fd Christoph Lameter 2014-04-07 @79 raw_cpu_add_4(__preempt_count, val);
c2daa3bed53a811 Peter Zijlstra 2013-08-14 80 }
c2daa3bed53a811 Peter Zijlstra 2013-08-14 81
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On 23/07/2020 23:11, Nicholas Piggin wrote:
> Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
>> On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>>
>>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>>> index 3a0db7b0b46e..35060be09073 100644
>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>>> #define powerpc_local_irq_pmu_save(flags) \
>>> do { \
>>> raw_local_irq_pmu_save(flags); \
>>> - trace_hardirqs_off(); \
>>> + if (!raw_irqs_disabled_flags(flags)) \
>>> + trace_hardirqs_off(); \
>>> } while(0)
>>> #define powerpc_local_irq_pmu_restore(flags) \
>>> do { \
>>> - if (raw_irqs_disabled_flags(flags)) { \
>>> - raw_local_irq_pmu_restore(flags); \
>>> - trace_hardirqs_off(); \
>>> - } else { \
>>> + if (!raw_irqs_disabled_flags(flags)) \
>>> trace_hardirqs_on(); \
>>> - raw_local_irq_pmu_restore(flags); \
>>> - } \
>>> + raw_local_irq_pmu_restore(flags); \
>>> } while(0)
>>
>> You shouldn't be calling lockdep from NMI context!
>
> After this patch it doesn't.
>
> trace_hardirqs_on/off implementation appears to expect to be called in NMI
> context though, for some reason.
>
>> That is, I recently
>> added suport for that on x86:
>>
>> https://lkml.kernel.org/r/[email protected]
>> https://lkml.kernel.org/r/[email protected]
>>
>> But you need to be very careful on how you order things, as you can see
>> the above relies on preempt_count() already having been incremented with
>> NMI_MASK.
>
> Hmm. My patch seems simpler.
And your patches fix my error while Peter's do not:
IRQs not enabled as expected
WARNING: CPU: 0 PID: 1377 at /home/aik/p/kernel/kernel/softirq.c:169
__local_bh_enable_ip+0x118/0x190
>
> I don't know this stuff very well, I don't really understand what your patch
> enables for x86 but at least it shouldn't be incompatible with this one
> AFAIKS.
>
> Thanks,
> Nick
>
--
Alexey
Excerpts from Alexey Kardashevskiy's message of July 24, 2020 2:16 pm:
>
>
> On 23/07/2020 23:11, Nicholas Piggin wrote:
>> Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
>>> On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>>>
>>>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>>>> index 3a0db7b0b46e..35060be09073 100644
>>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>>> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>>>> #define powerpc_local_irq_pmu_save(flags) \
>>>> do { \
>>>> raw_local_irq_pmu_save(flags); \
>>>> - trace_hardirqs_off(); \
>>>> + if (!raw_irqs_disabled_flags(flags)) \
>>>> + trace_hardirqs_off(); \
>>>> } while(0)
>>>> #define powerpc_local_irq_pmu_restore(flags) \
>>>> do { \
>>>> - if (raw_irqs_disabled_flags(flags)) { \
>>>> - raw_local_irq_pmu_restore(flags); \
>>>> - trace_hardirqs_off(); \
>>>> - } else { \
>>>> + if (!raw_irqs_disabled_flags(flags)) \
>>>> trace_hardirqs_on(); \
>>>> - raw_local_irq_pmu_restore(flags); \
>>>> - } \
>>>> + raw_local_irq_pmu_restore(flags); \
>>>> } while(0)
>>>
>>> You shouldn't be calling lockdep from NMI context!
>>
>> After this patch it doesn't.
>>
>> trace_hardirqs_on/off implementation appears to expect to be called in NMI
>> context though, for some reason.
>>
>>> That is, I recently
>>> added suport for that on x86:
>>>
>>> https://lkml.kernel.org/r/[email protected]
>>> https://lkml.kernel.org/r/[email protected]
>>>
>>> But you need to be very careful on how you order things, as you can see
>>> the above relies on preempt_count() already having been incremented with
>>> NMI_MASK.
>>
>> Hmm. My patch seems simpler.
>
> And your patches fix my error while Peter's do not:
>
>
> IRQs not enabled as expected
> WARNING: CPU: 0 PID: 1377 at /home/aik/p/kernel/kernel/softirq.c:169
> __local_bh_enable_ip+0x118/0x190
I think they would have needed some powerpc bits as well. But I don't
see a reason we can't merge my patches, at least they fix this case and
don't seem to make things worse in any way.
Thanks,
Nick
> On 24-Jul-2020, at 9:46 AM, Alexey Kardashevskiy <[email protected]> wrote:
>
>
>
> On 23/07/2020 23:11, Nicholas Piggin wrote:
>> Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
>>> On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>>>
>>>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>>>> index 3a0db7b0b46e..35060be09073 100644
>>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>>> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>>>> #define powerpc_local_irq_pmu_save(flags) \
>>>> do { \
>>>> raw_local_irq_pmu_save(flags); \
>>>> - trace_hardirqs_off(); \
>>>> + if (!raw_irqs_disabled_flags(flags)) \
>>>> + trace_hardirqs_off(); \
>>>> } while(0)
>>>> #define powerpc_local_irq_pmu_restore(flags) \
>>>> do { \
>>>> - if (raw_irqs_disabled_flags(flags)) { \
>>>> - raw_local_irq_pmu_restore(flags); \
>>>> - trace_hardirqs_off(); \
>>>> - } else { \
>>>> + if (!raw_irqs_disabled_flags(flags)) \
>>>> trace_hardirqs_on(); \
>>>> - raw_local_irq_pmu_restore(flags); \
>>>> - } \
>>>> + raw_local_irq_pmu_restore(flags); \
>>>> } while(0)
>>>
>>> You shouldn't be calling lockdep from NMI context!
>>
>> After this patch it doesn't.
>>
>> trace_hardirqs_on/off implementation appears to expect to be called in NMI
>> context though, for some reason.
>>
>>> That is, I recently
>>> added suport for that on x86:
>>>
>>> https://lkml.kernel.org/r/[email protected]
>>> https://lkml.kernel.org/r/[email protected]
>>>
>>> But you need to be very careful on how you order things, as you can see
>>> the above relies on preempt_count() already having been incremented with
>>> NMI_MASK.
>>
>> Hmm. My patch seems simpler.
>
> And your patches fix my error while Peter's do not:
>
>
> IRQs not enabled as expected
> WARNING: CPU: 0 PID: 1377 at /home/aik/p/kernel/kernel/softirq.c:169
> __local_bh_enable_ip+0x118/0x190
Hi Nicholas, Alexey
I was able to reproduce the warning which Alexey reported using perf_fuzzer test suite.
With the patch provided by Nick, I don’t see the issue anymore. This patch fixes the
warnings I got with perf fuzzer run.
Thanks Nick for the fix.
Tested-by: Athira Rajeev<[email protected]>
>
>
>>
>> I don't know this stuff very well, I don't really understand what your patch
>> enables for x86 but at least it shouldn't be incompatible with this one
>> AFAIKS.
>>
>> Thanks,
>> Nick
>>
>
> --
> Alexey
On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index 3a0db7b0b46e..35060be09073 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
> #define powerpc_local_irq_pmu_save(flags) \
> do { \
> raw_local_irq_pmu_save(flags); \
> - trace_hardirqs_off(); \
> + if (!raw_irqs_disabled_flags(flags)) \
> + trace_hardirqs_off(); \
> } while(0)
So one problem with the above is something like this:
raw_local_irq_save();
<NMI>
powerpc_local_irq_pmu_save();
that would now no longer call into tracing/lockdep at all. As a
consequence, lockdep and tracing would show the NMI ran with IRQs
enabled, which is exceptionally weird..
Similar problem with:
raw_local_irq_disable();
local_irq_save()
Now, most architectures today seem to do what x86 also did:
<NMI>
trace_hardirqs_off()
...
if (irqs_unmasked(regs))
trace_hardirqs_on()
</NMI>
Which is 'funny' when it interleaves like:
local_irq_disable();
...
local_irq_enable()
trace_hardirqs_on();
<NMI/>
raw_local_irq_enable();
Because then it will undo the trace_hardirqs_on() we just did. With the
result that both tracing and lockdep will see a hardirqs-disable without
a matching enable, while the hardware state is enabled.
Which is exactly the state Alexey seems to have ran into.
Now, x86, and at least arm64 call nmi_enter() before
trace_hardirqs_off(), but AFAICT Power never did that, and that's part
of the problem. nmi_enter() does lockdep_off() and that _used_ to also
kill IRQ tracking.
Now, my patch changed that, it makes IRQ tracking not respect
lockdep_off(). And that exposed x86 (and everybody else :/) to the same
problem you have.
And this is why I made x86 look at software state in NMIs. Because then
it all works out. For bonus points, trace_hardirqs_*() also has some
do-it-once logic for tracing.
Anyway, it's Saturday evening, time for a beer. I'll stare at this more
later.
Excerpts from Peter Zijlstra's message of July 26, 2020 6:26 am:
> On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>> index 3a0db7b0b46e..35060be09073 100644
>> --- a/arch/powerpc/include/asm/hw_irq.h
>> +++ b/arch/powerpc/include/asm/hw_irq.h
>> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>> #define powerpc_local_irq_pmu_save(flags) \
>> do { \
>> raw_local_irq_pmu_save(flags); \
>> - trace_hardirqs_off(); \
>> + if (!raw_irqs_disabled_flags(flags)) \
>> + trace_hardirqs_off(); \
>> } while(0)
>
> So one problem with the above is something like this:
>
> raw_local_irq_save();
> <NMI>
> powerpc_local_irq_pmu_save();
>
> that would now no longer call into tracing/lockdep at all. As a
> consequence, lockdep and tracing would show the NMI ran with IRQs
> enabled, which is exceptionally weird..
powerpc perf NMIs will trace_hardirqs_off() if they were enabled,
and trace_hardirqs_on() at exit in that case too.
Machine check and system reset (like x86's "nmi") don't, but that's
deliberate to avoid any tracing in those cases which often causes
more problems than it's worth (and we have flags to prevent ftrace,
etc too for those cases).
> Similar problem with:
>
> raw_local_irq_disable();
> local_irq_save()
>
> Now, most architectures today seem to do what x86 also did:
>
> <NMI>
> trace_hardirqs_off()
> ...
> if (irqs_unmasked(regs))
> trace_hardirqs_on()
> </NMI>
>
> Which is 'funny' when it interleaves like:
>
> local_irq_disable();
> ...
> local_irq_enable()
> trace_hardirqs_on();
> <NMI/>
> raw_local_irq_enable();
>
> Because then it will undo the trace_hardirqs_on() we just did. With the
> result that both tracing and lockdep will see a hardirqs-disable without
> a matching enable, while the hardware state is enabled.
Seems like an arch problem -- why not disable if it was enabled only?
I guess the local_irq tracing calls are a mess so maybe they copied
those.
> Which is exactly the state Alexey seems to have ran into.
No his was what I said, the interruptee's trace_hardirqs_on() in
local_irq_enable getting lost because the NMI's local_irq_disable
always disables, but the enable doesn't re-enable.
It's all just weird asymmetrical special case hacks AFAIKS, the
code should just be symmetric and lockdep handle it's own weirdness.
>
> Now, x86, and at least arm64 call nmi_enter() before
> trace_hardirqs_off(), but AFAICT Power never did that, and that's part
> of the problem. nmi_enter() does lockdep_off() and that _used_ to also
> kill IRQ tracking.
Power does do nmi_enter -- __perf_event_interrupt()
nmi = perf_intr_is_nmi(regs);
if (nmi)
nmi_enter();
else
irq_enter();
Thanks,
Nick
> Now, my patch changed that, it makes IRQ tracking not respect
> lockdep_off(). And that exposed x86 (and everybody else :/) to the same
> problem you have.
>
> And this is why I made x86 look at software state in NMIs. Because then
> it all works out. For bonus points, trace_hardirqs_*() also has some
> do-it-once logic for tracing.
>
>
>
> Anyway, it's Saturday evening, time for a beer. I'll stare at this more
> later.
>
On 24/07/2020 15:59, Nicholas Piggin wrote:
> Excerpts from Alexey Kardashevskiy's message of July 24, 2020 2:16 pm:
>>
>>
>> On 23/07/2020 23:11, Nicholas Piggin wrote:
>>> Excerpts from Peter Zijlstra's message of July 23, 2020 9:40 pm:
>>>> On Thu, Jul 23, 2020 at 08:56:14PM +1000, Nicholas Piggin wrote:
>>>>
>>>>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>>>>> index 3a0db7b0b46e..35060be09073 100644
>>>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>>>> @@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
>>>>> #define powerpc_local_irq_pmu_save(flags) \
>>>>> do { \
>>>>> raw_local_irq_pmu_save(flags); \
>>>>> - trace_hardirqs_off(); \
>>>>> + if (!raw_irqs_disabled_flags(flags)) \
>>>>> + trace_hardirqs_off(); \
>>>>> } while(0)
>>>>> #define powerpc_local_irq_pmu_restore(flags) \
>>>>> do { \
>>>>> - if (raw_irqs_disabled_flags(flags)) { \
>>>>> - raw_local_irq_pmu_restore(flags); \
>>>>> - trace_hardirqs_off(); \
>>>>> - } else { \
>>>>> + if (!raw_irqs_disabled_flags(flags)) \
>>>>> trace_hardirqs_on(); \
>>>>> - raw_local_irq_pmu_restore(flags); \
>>>>> - } \
>>>>> + raw_local_irq_pmu_restore(flags); \
>>>>> } while(0)
>>>>
>>>> You shouldn't be calling lockdep from NMI context!
>>>
>>> After this patch it doesn't.
>>>
>>> trace_hardirqs_on/off implementation appears to expect to be called in NMI
>>> context though, for some reason.
>>>
>>>> That is, I recently
>>>> added suport for that on x86:
>>>>
>>>> https://lkml.kernel.org/r/[email protected]
>>>> https://lkml.kernel.org/r/[email protected]
>>>>
>>>> But you need to be very careful on how you order things, as you can see
>>>> the above relies on preempt_count() already having been incremented with
>>>> NMI_MASK.
>>>
>>> Hmm. My patch seems simpler.
>>
>> And your patches fix my error while Peter's do not:
>>
>>
>> IRQs not enabled as expected
>> WARNING: CPU: 0 PID: 1377 at /home/aik/p/kernel/kernel/softirq.c:169
>> __local_bh_enable_ip+0x118/0x190
>
> I think they would have needed some powerpc bits as well.
True, there is quite a lot to repeat of what x86 does, I was in a hurry
and did not think it through :)
> But I don't
> see a reason we can't merge my patches, at least they fix this case and
> don't seem to make things worse in any way.
True. Or we could keep these lockdep_stats::redundant_softirqs_on/etc
and make powerpc_local_irq_pmu_restore()/local_irq_restore() call
trace_hardirqs_on() always and let lockdep do reference counting, may be?
--
Alexey
On Sun, Jul 26, 2020 at 02:14:34PM +1000, Nicholas Piggin wrote:
> > Now, x86, and at least arm64 call nmi_enter() before
> > trace_hardirqs_off(), but AFAICT Power never did that, and that's part
> > of the problem. nmi_enter() does lockdep_off() and that _used_ to also
> > kill IRQ tracking.
>
> Power does do nmi_enter -- __perf_event_interrupt()
>
> nmi = perf_intr_is_nmi(regs);
> if (nmi)
> nmi_enter();
> else
> irq_enter();
That's _waaaay_ too late, you've done the trace_hardirqs_{off,on} in
arch/powerpc/kernel/exceptions-64e.S, you need to _first_ do nmi_enter()
and _then_ trace_hardirqs_off(), or rather, that's still broken, but
then we get into the details of entry ordering.
On Sun, Jul 26, 2020 at 02:14:34PM +1000, Nicholas Piggin wrote:
> Excerpts from Peter Zijlstra's message of July 26, 2020 6:26 am:
> > Which is 'funny' when it interleaves like:
> >
> > local_irq_disable();
> > ...
> > local_irq_enable()
> > trace_hardirqs_on();
> > <NMI/>
> > raw_local_irq_enable();
> >
> > Because then it will undo the trace_hardirqs_on() we just did. With the
> > result that both tracing and lockdep will see a hardirqs-disable without
> > a matching enable, while the hardware state is enabled.
>
> Seems like an arch problem -- why not disable if it was enabled only?
> I guess the local_irq tracing calls are a mess so maybe they copied
> those.
Because, as I wrote earlier, then we can miss updating software state.
So your proposal has:
raw_local_irq_disable()
<NMI>
if (!arch_irqs_disabled(regs->flags) // false
trace_hardirqs_off();
// tracing/lockdep still think IRQs are enabled
// hardware IRQ state is disabled.
With the current code we have:
local_irq_enable()
trace_hardirqs_on();
<NMI>
trace_hardirqs_off();
...
if (!arch_irqs_disabled(regs->flags)) // false
trace_hardirqs_on();
</NMI>
// and now the NMI disabled software state again
// while we're about to enable the hardware state
raw_local_irq_enable();
> > Which is exactly the state Alexey seems to have ran into.
>
> No his was what I said, the interruptee's trace_hardirqs_on() in
> local_irq_enable getting lost because the NMI's local_irq_disable
> always disables, but the enable doesn't re-enable.
That's _exactly_ the case above. It doesn't re-enable because hardirqs
are actually still disabled. You _cannot_ rely on hardirq state for
NMIs, that'll get you wrong state.
> It's all just weird asymmetrical special case hacks AFAIKS, the
> code should just be symmetric and lockdep handle it's own weirdness.
It's for non-maskable exceptions/interrupts, because there the hardware
and software state changes non-atomically. For maskable interrupts doing
the software state transitions inside the disabled region makes perfect
sense, because that keeps it atomic.
Excerpts from [email protected]'s message of July 26, 2020 10:11 pm:
> On Sun, Jul 26, 2020 at 02:14:34PM +1000, Nicholas Piggin wrote:
>> Excerpts from Peter Zijlstra's message of July 26, 2020 6:26 am:
>
>> > Which is 'funny' when it interleaves like:
>> >
>> > local_irq_disable();
>> > ...
>> > local_irq_enable()
>> > trace_hardirqs_on();
>> > <NMI/>
>> > raw_local_irq_enable();
>> >
>> > Because then it will undo the trace_hardirqs_on() we just did. With the
>> > result that both tracing and lockdep will see a hardirqs-disable without
>> > a matching enable, while the hardware state is enabled.
>>
>> Seems like an arch problem -- why not disable if it was enabled only?
>> I guess the local_irq tracing calls are a mess so maybe they copied
>> those.
>
> Because, as I wrote earlier, then we can miss updating software state.
> So your proposal has:
>
> raw_local_irq_disable()
> <NMI>
> if (!arch_irqs_disabled(regs->flags) // false
> trace_hardirqs_off();
>
> // tracing/lockdep still think IRQs are enabled
> // hardware IRQ state is disabled.
... and then lockdep_nmi_enter can disable IRQs if they were enabled?
The only reason it's done this way as opposed to a much simple counter
increment/decrement AFAIKS is to avoid some overhead of calling
trace_hardirqs_on/off (which seems a bit dubious but let's go with it).
In that case the lockdep_nmi_enter code is the right spot to clean up
that gap vs NMIs. I guess there's an argument that arch_nmi_enter could
do it. I don't see the problem with fixing it up here though, this is a
slow path so it doesn't matter if we have some more logic for it.
Thanks,
Nick
On 23/07/2020 20:56, Nicholas Piggin wrote:
> With the previous patch, lockdep hardirq state changes should not be
> redundant. Softirq state changes already follow that pattern.
>
> So warn on unexpected enable-when-enabled or disable-when-disabled
> conditions, to catch possible errors or sloppy patterns that could
> lead to similar bad behavior due to NMIs etc.
This one does not apply anymore as Peter's changes went in already but
this one is not really a fix so I can live with that. Did 1/2 go
anywhere? Thanks,
>
> Signed-off-by: Nicholas Piggin <[email protected]>
> ---
> kernel/locking/lockdep.c | 80 +++++++++++++-----------------
> kernel/locking/lockdep_internals.h | 4 --
> kernel/locking/lockdep_proc.c | 10 +---
> 3 files changed, 35 insertions(+), 59 deletions(-)
>
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 29a8de4c50b9..138458fb2234 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -3649,15 +3649,8 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
> if (unlikely(!debug_locks || current->lockdep_recursion))
> return;
>
> - if (unlikely(current->hardirqs_enabled)) {
> - /*
> - * Neither irq nor preemption are disabled here
> - * so this is racy by nature but losing one hit
> - * in a stat is not a big deal.
> - */
> - __debug_atomic_inc(redundant_hardirqs_on);
> + if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled))
> return;
> - }
>
> /*
> * We're enabling irqs and according to our state above irqs weren't
> @@ -3695,15 +3688,8 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
> if (unlikely(!debug_locks || curr->lockdep_recursion))
> return;
>
> - if (curr->hardirqs_enabled) {
> - /*
> - * Neither irq nor preemption are disabled here
> - * so this is racy by nature but losing one hit
> - * in a stat is not a big deal.
> - */
> - __debug_atomic_inc(redundant_hardirqs_on);
> + if (DEBUG_LOCKS_WARN_ON(curr->hardirqs_enabled))
> return;
> - }
>
> /*
> * We're enabling irqs and according to our state above irqs weren't
> @@ -3738,6 +3724,9 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
> if (unlikely(!debug_locks || curr->lockdep_recursion))
> return;
>
> + if (DEBUG_LOCKS_WARN_ON(!curr->hardirqs_enabled))
> + return;
> +
> /*
> * So we're supposed to get called after you mask local IRQs, but for
> * some reason the hardware doesn't quite think you did a proper job.
> @@ -3745,17 +3734,13 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
> if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
> return;
>
> - if (curr->hardirqs_enabled) {
> - /*
> - * We have done an ON -> OFF transition:
> - */
> - curr->hardirqs_enabled = 0;
> - curr->hardirq_disable_ip = ip;
> - curr->hardirq_disable_event = ++curr->irq_events;
> - debug_atomic_inc(hardirqs_off_events);
> - } else {
> - debug_atomic_inc(redundant_hardirqs_off);
> - }
> + /*
> + * We have done an ON -> OFF transition:
> + */
> + curr->hardirqs_enabled = 0;
> + curr->hardirq_disable_ip = ip;
> + curr->hardirq_disable_event = ++curr->irq_events;
> + debug_atomic_inc(hardirqs_off_events);
> }
> EXPORT_SYMBOL_GPL(lockdep_hardirqs_off);
>
> @@ -3769,6 +3754,9 @@ void lockdep_softirqs_on(unsigned long ip)
> if (unlikely(!debug_locks || current->lockdep_recursion))
> return;
>
> + if (DEBUG_LOCKS_WARN_ON(curr->softirqs_enabled))
> + return;
> +
> /*
> * We fancy IRQs being disabled here, see softirq.c, avoids
> * funny state and nesting things.
> @@ -3776,11 +3764,6 @@ void lockdep_softirqs_on(unsigned long ip)
> if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
> return;
>
> - if (curr->softirqs_enabled) {
> - debug_atomic_inc(redundant_softirqs_on);
> - return;
> - }
> -
> current->lockdep_recursion++;
> /*
> * We'll do an OFF -> ON transition:
> @@ -3809,26 +3792,26 @@ void lockdep_softirqs_off(unsigned long ip)
> if (unlikely(!debug_locks || current->lockdep_recursion))
> return;
>
> + if (DEBUG_LOCKS_WARN_ON(!curr->softirqs_enabled))
> + return;
> +
> /*
> * We fancy IRQs being disabled here, see softirq.c
> */
> if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
> return;
>
> - if (curr->softirqs_enabled) {
> - /*
> - * We have done an ON -> OFF transition:
> - */
> - curr->softirqs_enabled = 0;
> - curr->softirq_disable_ip = ip;
> - curr->softirq_disable_event = ++curr->irq_events;
> - debug_atomic_inc(softirqs_off_events);
> - /*
> - * Whoops, we wanted softirqs off, so why aren't they?
> - */
> - DEBUG_LOCKS_WARN_ON(!softirq_count());
> - } else
> - debug_atomic_inc(redundant_softirqs_off);
> + /*
> + * We have done an ON -> OFF transition:
> + */
> + curr->softirqs_enabled = 0;
> + curr->softirq_disable_ip = ip;
> + curr->softirq_disable_event = ++curr->irq_events;
> + debug_atomic_inc(softirqs_off_events);
> + /*
> + * Whoops, we wanted softirqs off, so why aren't they?
> + */
> + DEBUG_LOCKS_WARN_ON(!softirq_count());
> }
>
> static int
> @@ -5684,6 +5667,11 @@ void __init lockdep_init(void)
>
> printk(" per task-struct memory footprint: %zu bytes\n",
> sizeof(((struct task_struct *)NULL)->held_locks));
> +
> + WARN_ON(irqs_disabled());
> +
> + current->hardirqs_enabled = 1;
> + current->softirqs_enabled = 1;
> }
>
> static void
> diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
> index baca699b94e9..6dd8b1f06dc4 100644
> --- a/kernel/locking/lockdep_internals.h
> +++ b/kernel/locking/lockdep_internals.h
> @@ -180,12 +180,8 @@ struct lockdep_stats {
> unsigned int chain_lookup_misses;
> unsigned long hardirqs_on_events;
> unsigned long hardirqs_off_events;
> - unsigned long redundant_hardirqs_on;
> - unsigned long redundant_hardirqs_off;
> unsigned long softirqs_on_events;
> unsigned long softirqs_off_events;
> - unsigned long redundant_softirqs_on;
> - unsigned long redundant_softirqs_off;
> int nr_unused_locks;
> unsigned int nr_redundant_checks;
> unsigned int nr_redundant;
> diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
> index 5525cd3ba0c8..98f204220ed9 100644
> --- a/kernel/locking/lockdep_proc.c
> +++ b/kernel/locking/lockdep_proc.c
> @@ -172,12 +172,8 @@ static void lockdep_stats_debug_show(struct seq_file *m)
> #ifdef CONFIG_DEBUG_LOCKDEP
> unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
> hi2 = debug_atomic_read(hardirqs_off_events),
> - hr1 = debug_atomic_read(redundant_hardirqs_on),
> - hr2 = debug_atomic_read(redundant_hardirqs_off),
> si1 = debug_atomic_read(softirqs_on_events),
> - si2 = debug_atomic_read(softirqs_off_events),
> - sr1 = debug_atomic_read(redundant_softirqs_on),
> - sr2 = debug_atomic_read(redundant_softirqs_off);
> + si2 = debug_atomic_read(softirqs_off_events);
>
> seq_printf(m, " chain lookup misses: %11llu\n",
> debug_atomic_read(chain_lookup_misses));
> @@ -196,12 +192,8 @@ static void lockdep_stats_debug_show(struct seq_file *m)
>
> seq_printf(m, " hardirq on events: %11llu\n", hi1);
> seq_printf(m, " hardirq off events: %11llu\n", hi2);
> - seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
> - seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
> seq_printf(m, " softirq on events: %11llu\n", si1);
> seq_printf(m, " softirq off events: %11llu\n", si2);
> - seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
> - seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
> #endif
> }
>
>
--
Alexey
What's wrong with something like this?
AFAICT there's no reason to actually try and add IRQ tracing here, it's
just a hand full of instructions at the most.
---
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3a0db7b0b46e..6be22c1838e2 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -196,33 +196,6 @@ static inline bool arch_irqs_disabled(void)
arch_local_irq_restore(flags); \
} while(0)
-#ifdef CONFIG_TRACE_IRQFLAGS
-#define powerpc_local_irq_pmu_save(flags) \
- do { \
- raw_local_irq_pmu_save(flags); \
- trace_hardirqs_off(); \
- } while(0)
-#define powerpc_local_irq_pmu_restore(flags) \
- do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_pmu_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
- trace_hardirqs_on(); \
- raw_local_irq_pmu_restore(flags); \
- } \
- } while(0)
-#else
-#define powerpc_local_irq_pmu_save(flags) \
- do { \
- raw_local_irq_pmu_save(flags); \
- } while(0)
-#define powerpc_local_irq_pmu_restore(flags) \
- do { \
- raw_local_irq_pmu_restore(flags); \
- } while (0)
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3E
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index bc4bd19b7fc2..b357a35672b1 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -32,9 +32,9 @@ static __inline__ void local_##op(long i, local_t *l) \
{ \
unsigned long flags; \
\
- powerpc_local_irq_pmu_save(flags); \
+ raw_powerpc_local_irq_pmu_save(flags); \
l->v c_op i; \
- powerpc_local_irq_pmu_restore(flags); \
+ raw_powerpc_local_irq_pmu_restore(flags); \
}
#define LOCAL_OP_RETURN(op, c_op) \
@@ -43,9 +43,9 @@ static __inline__ long local_##op##_return(long a, local_t *l) \
long t; \
unsigned long flags; \
\
- powerpc_local_irq_pmu_save(flags); \
+ raw_powerpc_local_irq_pmu_save(flags); \
t = (l->v c_op a); \
- powerpc_local_irq_pmu_restore(flags); \
+ raw_powerpc_local_irq_pmu_restore(flags); \
\
return t; \
}
@@ -81,11 +81,11 @@ static __inline__ long local_cmpxchg(local_t *l, long o, long n)
long t;
unsigned long flags;
- powerpc_local_irq_pmu_save(flags);
+ raw_powerpc_local_irq_pmu_save(flags);
t = l->v;
if (t == o)
l->v = n;
- powerpc_local_irq_pmu_restore(flags);
+ raw_powerpc_local_irq_pmu_restore(flags);
return t;
}
@@ -95,10 +95,10 @@ static __inline__ long local_xchg(local_t *l, long n)
long t;
unsigned long flags;
- powerpc_local_irq_pmu_save(flags);
+ raw_powerpc_local_irq_pmu_save(flags);
t = l->v;
l->v = n;
- powerpc_local_irq_pmu_restore(flags);
+ raw_powerpc_local_irq_pmu_restore(flags);
return t;
}
@@ -117,12 +117,12 @@ static __inline__ int local_add_unless(local_t *l, long a, long u)
unsigned long flags;
int ret = 0;
- powerpc_local_irq_pmu_save(flags);
+ raw_powerpc_local_irq_pmu_save(flags);
if (l->v != u) {
l->v += a;
ret = 1;
}
- powerpc_local_irq_pmu_restore(flags);
+ raw_powerpc_local_irq_pmu_restore(flags);
return ret;
}
Excerpts from [email protected]'s message of August 7, 2020 9:11 pm:
>
> What's wrong with something like this?
>
> AFAICT there's no reason to actually try and add IRQ tracing here, it's
> just a hand full of instructions at the most.
Because we may want to use that in other places as well, so it would
be nice to have tracing.
Hmm... also, I thought NMI context was free to call local_irq_save/restore
anyway so the bug would still be there in those cases?
Thanks,
Nick
>
> ---
>
> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
> index 3a0db7b0b46e..6be22c1838e2 100644
> --- a/arch/powerpc/include/asm/hw_irq.h
> +++ b/arch/powerpc/include/asm/hw_irq.h
> @@ -196,33 +196,6 @@ static inline bool arch_irqs_disabled(void)
> arch_local_irq_restore(flags); \
> } while(0)
>
> -#ifdef CONFIG_TRACE_IRQFLAGS
> -#define powerpc_local_irq_pmu_save(flags) \
> - do { \
> - raw_local_irq_pmu_save(flags); \
> - trace_hardirqs_off(); \
> - } while(0)
> -#define powerpc_local_irq_pmu_restore(flags) \
> - do { \
> - if (raw_irqs_disabled_flags(flags)) { \
> - raw_local_irq_pmu_restore(flags); \
> - trace_hardirqs_off(); \
> - } else { \
> - trace_hardirqs_on(); \
> - raw_local_irq_pmu_restore(flags); \
> - } \
> - } while(0)
> -#else
> -#define powerpc_local_irq_pmu_save(flags) \
> - do { \
> - raw_local_irq_pmu_save(flags); \
> - } while(0)
> -#define powerpc_local_irq_pmu_restore(flags) \
> - do { \
> - raw_local_irq_pmu_restore(flags); \
> - } while (0)
> -#endif /* CONFIG_TRACE_IRQFLAGS */
> -
> #endif /* CONFIG_PPC_BOOK3S */
>
> #ifdef CONFIG_PPC_BOOK3E
> diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
> index bc4bd19b7fc2..b357a35672b1 100644
> --- a/arch/powerpc/include/asm/local.h
> +++ b/arch/powerpc/include/asm/local.h
> @@ -32,9 +32,9 @@ static __inline__ void local_##op(long i, local_t *l) \
> { \
> unsigned long flags; \
> \
> - powerpc_local_irq_pmu_save(flags); \
> + raw_powerpc_local_irq_pmu_save(flags); \
> l->v c_op i; \
> - powerpc_local_irq_pmu_restore(flags); \
> + raw_powerpc_local_irq_pmu_restore(flags); \
> }
>
> #define LOCAL_OP_RETURN(op, c_op) \
> @@ -43,9 +43,9 @@ static __inline__ long local_##op##_return(long a, local_t *l) \
> long t; \
> unsigned long flags; \
> \
> - powerpc_local_irq_pmu_save(flags); \
> + raw_powerpc_local_irq_pmu_save(flags); \
> t = (l->v c_op a); \
> - powerpc_local_irq_pmu_restore(flags); \
> + raw_powerpc_local_irq_pmu_restore(flags); \
> \
> return t; \
> }
> @@ -81,11 +81,11 @@ static __inline__ long local_cmpxchg(local_t *l, long o, long n)
> long t;
> unsigned long flags;
>
> - powerpc_local_irq_pmu_save(flags);
> + raw_powerpc_local_irq_pmu_save(flags);
> t = l->v;
> if (t == o)
> l->v = n;
> - powerpc_local_irq_pmu_restore(flags);
> + raw_powerpc_local_irq_pmu_restore(flags);
>
> return t;
> }
> @@ -95,10 +95,10 @@ static __inline__ long local_xchg(local_t *l, long n)
> long t;
> unsigned long flags;
>
> - powerpc_local_irq_pmu_save(flags);
> + raw_powerpc_local_irq_pmu_save(flags);
> t = l->v;
> l->v = n;
> - powerpc_local_irq_pmu_restore(flags);
> + raw_powerpc_local_irq_pmu_restore(flags);
>
> return t;
> }
> @@ -117,12 +117,12 @@ static __inline__ int local_add_unless(local_t *l, long a, long u)
> unsigned long flags;
> int ret = 0;
>
> - powerpc_local_irq_pmu_save(flags);
> + raw_powerpc_local_irq_pmu_save(flags);
> if (l->v != u) {
> l->v += a;
> ret = 1;
> }
> - powerpc_local_irq_pmu_restore(flags);
> + raw_powerpc_local_irq_pmu_restore(flags);
>
> return ret;
> }
>
On Wed, Aug 12, 2020 at 06:18:28PM +1000, Nicholas Piggin wrote:
> Excerpts from [email protected]'s message of August 7, 2020 9:11 pm:
> >
> > What's wrong with something like this?
> >
> > AFAICT there's no reason to actually try and add IRQ tracing here, it's
> > just a hand full of instructions at the most.
>
> Because we may want to use that in other places as well, so it would
> be nice to have tracing.
>
> Hmm... also, I thought NMI context was free to call local_irq_save/restore
> anyway so the bug would still be there in those cases?
NMI code has in_nmi() true, in which case the IRQ tracing is disabled
(except for x86 which has CONFIG_TRACE_IRQFLAGS_NMI).
Excerpts from [email protected]'s message of August 12, 2020 8:35 pm:
> On Wed, Aug 12, 2020 at 06:18:28PM +1000, Nicholas Piggin wrote:
>> Excerpts from [email protected]'s message of August 7, 2020 9:11 pm:
>> >
>> > What's wrong with something like this?
>> >
>> > AFAICT there's no reason to actually try and add IRQ tracing here, it's
>> > just a hand full of instructions at the most.
>>
>> Because we may want to use that in other places as well, so it would
>> be nice to have tracing.
>>
>> Hmm... also, I thought NMI context was free to call local_irq_save/restore
>> anyway so the bug would still be there in those cases?
>
> NMI code has in_nmi() true, in which case the IRQ tracing is disabled
> (except for x86 which has CONFIG_TRACE_IRQFLAGS_NMI).
>
That doesn't help. It doesn't fix the lockdep irq state going out of
synch with the actual irq state. The code which triggered this with the
special powerpc irq disable has in_nmi() true as well.
Thanks,
Nick
On Tue, Aug 18, 2020 at 05:22:33PM +1000, Nicholas Piggin wrote:
> Excerpts from [email protected]'s message of August 12, 2020 8:35 pm:
> > On Wed, Aug 12, 2020 at 06:18:28PM +1000, Nicholas Piggin wrote:
> >> Excerpts from [email protected]'s message of August 7, 2020 9:11 pm:
> >> >
> >> > What's wrong with something like this?
> >> >
> >> > AFAICT there's no reason to actually try and add IRQ tracing here, it's
> >> > just a hand full of instructions at the most.
> >>
> >> Because we may want to use that in other places as well, so it would
> >> be nice to have tracing.
> >>
> >> Hmm... also, I thought NMI context was free to call local_irq_save/restore
> >> anyway so the bug would still be there in those cases?
> >
> > NMI code has in_nmi() true, in which case the IRQ tracing is disabled
> > (except for x86 which has CONFIG_TRACE_IRQFLAGS_NMI).
> >
>
> That doesn't help. It doesn't fix the lockdep irq state going out of
> synch with the actual irq state. The code which triggered this with the
> special powerpc irq disable has in_nmi() true as well.
Urgh, you're talking about using lockdep_assert_irqs*() from NMI
context?
If not, I'm afraid I might've lost the plot a little on what exact
failure case we're talking about.
Excerpts from [email protected]'s message of August 19, 2020 1:41 am:
> On Tue, Aug 18, 2020 at 05:22:33PM +1000, Nicholas Piggin wrote:
>> Excerpts from [email protected]'s message of August 12, 2020 8:35 pm:
>> > On Wed, Aug 12, 2020 at 06:18:28PM +1000, Nicholas Piggin wrote:
>> >> Excerpts from [email protected]'s message of August 7, 2020 9:11 pm:
>> >> >
>> >> > What's wrong with something like this?
>> >> >
>> >> > AFAICT there's no reason to actually try and add IRQ tracing here, it's
>> >> > just a hand full of instructions at the most.
>> >>
>> >> Because we may want to use that in other places as well, so it would
>> >> be nice to have tracing.
>> >>
>> >> Hmm... also, I thought NMI context was free to call local_irq_save/restore
>> >> anyway so the bug would still be there in those cases?
>> >
>> > NMI code has in_nmi() true, in which case the IRQ tracing is disabled
>> > (except for x86 which has CONFIG_TRACE_IRQFLAGS_NMI).
>> >
>>
>> That doesn't help. It doesn't fix the lockdep irq state going out of
>> synch with the actual irq state. The code which triggered this with the
>> special powerpc irq disable has in_nmi() true as well.
>
> Urgh, you're talking about using lockdep_assert_irqs*() from NMI
> context?
>
> If not, I'm afraid I might've lost the plot a little on what exact
> failure case we're talking about.
>
Hm, I may have been a bit confused actually. Since your Fix
TRACE_IRQFLAGS vs NMIs patch it might now work.
I'm worried powerpc disables trace irqs trace_hardirqs_off()
before nmi_enter() might still be a problem, but not sure
actually. Alexey did you end up re-testing with Peter's patch
or current upstream?
Thanks,
Nick
On 19/08/2020 09:54, Nicholas Piggin wrote:
> Excerpts from [email protected]'s message of August 19, 2020 1:41 am:
>> On Tue, Aug 18, 2020 at 05:22:33PM +1000, Nicholas Piggin wrote:
>>> Excerpts from [email protected]'s message of August 12, 2020 8:35 pm:
>>>> On Wed, Aug 12, 2020 at 06:18:28PM +1000, Nicholas Piggin wrote:
>>>>> Excerpts from [email protected]'s message of August 7, 2020 9:11 pm:
>>>>>>
>>>>>> What's wrong with something like this?
>>>>>>
>>>>>> AFAICT there's no reason to actually try and add IRQ tracing here, it's
>>>>>> just a hand full of instructions at the most.
>>>>>
>>>>> Because we may want to use that in other places as well, so it would
>>>>> be nice to have tracing.
>>>>>
>>>>> Hmm... also, I thought NMI context was free to call local_irq_save/restore
>>>>> anyway so the bug would still be there in those cases?
>>>>
>>>> NMI code has in_nmi() true, in which case the IRQ tracing is disabled
>>>> (except for x86 which has CONFIG_TRACE_IRQFLAGS_NMI).
>>>>
>>>
>>> That doesn't help. It doesn't fix the lockdep irq state going out of
>>> synch with the actual irq state. The code which triggered this with the
>>> special powerpc irq disable has in_nmi() true as well.
>>
>> Urgh, you're talking about using lockdep_assert_irqs*() from NMI
>> context?
>>
>> If not, I'm afraid I might've lost the plot a little on what exact
>> failure case we're talking about.
>>
>
> Hm, I may have been a bit confused actually. Since your Fix
> TRACE_IRQFLAGS vs NMIs patch it might now work.
>
> I'm worried powerpc disables trace irqs trace_hardirqs_off()
> before nmi_enter() might still be a problem, but not sure
> actually. Alexey did you end up re-testing with Peter's patch
The one above in the thread which replaces powerpc_local_irq_pmu_save()
with
raw_powerpc_local_irq_pmu_save()? It did not compile as there is no
raw_powerpc_local_irq_pmu_save() so I may be missing something here.
I applied the patch on top of the current upstream and replaced
raw_powerpc_local_irq_pmu_save() with raw_local_irq_pmu_save() (which I
think was the intention) but I still see the issue.
> or current upstream?
The upstream 18445bf405cb (13 hours old) also shows the problem. Yours
1/2 still fixes it.
>
> Thanks,
> Nick
>
--
Alexey
On Wed, Aug 19, 2020 at 08:39:13PM +1000, Alexey Kardashevskiy wrote:
> > or current upstream?
>
> The upstream 18445bf405cb (13 hours old) also shows the problem. Yours
> 1/2 still fixes it.
Afaict that just reduces the window.
Isn't the problem that:
arch/powerpc/kernel/exceptions-64e.S
START_EXCEPTION(perfmon);
NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x260)
INTS_DISABLE
# RECONCILE_IRQ_STATE
# TRACE_DISABLE_INTS
# TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
#
# but we haven't done nmi_enter() yet... whoopsy
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
# perf_irq()
# perf_event_interrupt
# __perf_event_interrupt
# nmi_enter()
That is, afaict your entry code is buggered.
On Wed, Aug 19, 2020 at 05:32:50PM +0200, [email protected] wrote:
> On Wed, Aug 19, 2020 at 08:39:13PM +1000, Alexey Kardashevskiy wrote:
>
> > > or current upstream?
> >
> > The upstream 18445bf405cb (13 hours old) also shows the problem. Yours
> > 1/2 still fixes it.
>
> Afaict that just reduces the window.
>
> Isn't the problem that:
>
> arch/powerpc/kernel/exceptions-64e.S
>
> START_EXCEPTION(perfmon);
> NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
> PROLOG_ADDITION_NONE)
> EXCEPTION_COMMON(0x260)
> INTS_DISABLE
> # RECONCILE_IRQ_STATE
> # TRACE_DISABLE_INTS
> # TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
> #
> # but we haven't done nmi_enter() yet... whoopsy
>
> CHECK_NAPPING()
> addi r3,r1,STACK_FRAME_OVERHEAD
> bl performance_monitor_exception
> # perf_irq()
> # perf_event_interrupt
> # __perf_event_interrupt
> # nmi_enter()
>
>
>
> That is, afaict your entry code is buggered.
That is, patch 1/2 doesn't change the case:
local_irq_enable()
trace_hardirqs_on()
<NMI>
trace_hardirqs_off()
...
if (regs_irqs_disabled(regs)) // false
trace_hardirqs_on();
</NMI>
raw_local_irq_enable()
Where local_irq_enable() has done trace_hardirqs_on() and the NMI hits
and undoes it, but doesn't re-do it because the hardware state is still
disabled.
What's supposed to happen is:
<NMI>
nmi_enter()
trace_hardirqs_off() // no-op, because in_nmi() (or previously because lockdep_off())
...
</NMI>
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 044d0d6de9f50192f9697583504a382347ee95ca
Gitweb: https://git.kernel.org/tip/044d0d6de9f50192f9697583504a382347ee95ca
Author: Nicholas Piggin <[email protected]>
AuthorDate: Thu, 23 Jul 2020 20:56:14 +10:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 26 Aug 2020 12:41:56 +02:00
lockdep: Only trace IRQ edges
Problem:
raw_local_irq_save(); // software state on
local_irq_save(); // software state off
...
local_irq_restore(); // software state still off, because we don't enable IRQs
raw_local_irq_restore(); // software state still off, *whoopsie*
existing instances:
- lock_acquire()
raw_local_irq_save()
__lock_acquire()
arch_spin_lock(&graph_lock)
pv_wait() := kvm_wait() (same or worse for Xen/HyperV)
local_irq_save()
- trace_clock_global()
raw_local_irq_save()
arch_spin_lock()
pv_wait() := kvm_wait()
local_irq_save()
- apic_retrigger_irq()
raw_local_irq_save()
apic->send_IPI() := default_send_IPI_single_phys()
local_irq_save()
Possible solutions:
A) make it work by enabling the tracing inside raw_*()
B) make it work by keeping tracing disabled inside raw_*()
C) call it broken and clean it up now
Now, given that the only reason to use the raw_* variant is because you don't
want tracing. Therefore A) seems like a weird option (although it can be done).
C) is tempting, but OTOH it ends up converting a _lot_ of code to raw just
because there is one raw user, this strips the validation/tracing off for all
the other users.
So we pick B) and declare any code that ends up doing:
raw_local_irq_save()
local_irq_save()
lockdep_assert_irqs_disabled();
broken. AFAICT this problem has existed forever, the only reason it came
up is because commit: 859d069ee1dd ("lockdep: Prepare for NMI IRQ
state tracking") changed IRQ tracing vs lockdep recursion and the
first instance is fairly common, the other cases hardly ever happen.
Signed-off-by: Nicholas Piggin <[email protected]>
[rewrote changelog]
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Steven Rostedt (VMware) <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
Tested-by: Marco Elver <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/powerpc/include/asm/hw_irq.h | 11 ++++-------
include/linux/irqflags.h | 15 +++++++--------
2 files changed, 11 insertions(+), 15 deletions(-)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3a0db7b..35060be 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
#define powerpc_local_irq_pmu_save(flags) \
do { \
raw_local_irq_pmu_save(flags); \
- trace_hardirqs_off(); \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while(0)
#define powerpc_local_irq_pmu_restore(flags) \
do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_pmu_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
+ if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_on(); \
- raw_local_irq_pmu_restore(flags); \
- } \
+ raw_local_irq_pmu_restore(flags); \
} while(0)
#else
#define powerpc_local_irq_pmu_save(flags) \
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 00d553d..3ed4e87 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -191,25 +191,24 @@ do { \
#define local_irq_disable() \
do { \
+ bool was_disabled = raw_irqs_disabled();\
raw_local_irq_disable(); \
- trace_hardirqs_off(); \
+ if (!was_disabled) \
+ trace_hardirqs_off(); \
} while (0)
#define local_irq_save(flags) \
do { \
raw_local_irq_save(flags); \
- trace_hardirqs_off(); \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while (0)
#define local_irq_restore(flags) \
do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
+ if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_on(); \
- raw_local_irq_restore(flags); \
- } \
+ raw_local_irq_restore(flags); \
} while (0)
#define safe_halt() \