2020-06-23 08:41:39

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

Currently all IRQ-tracking state is in task_struct, this means that
task_struct needs to be defined before we use it.

Especially for lockdep_assert_irq*() this can lead to header-hell.

Move the hardirq state into per-cpu variables to avoid the task_struct
dependency.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
include/linux/irqflags.h | 19 ++++++++++++-------
include/linux/lockdep.h | 34 ++++++++++++++++++----------------
include/linux/sched.h | 2 --
kernel/fork.c | 4 +---
kernel/locking/lockdep.c | 30 +++++++++++++++---------------
kernel/softirq.c | 6 ++++++
6 files changed, 52 insertions(+), 43 deletions(-)

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@

#include <linux/typecheck.h>
#include <asm/irqflags.h>
+#include <asm/percpu.h>

/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,22 @@
#endif

#ifdef CONFIG_TRACE_IRQFLAGS
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
extern void trace_hardirqs_on_prepare(void);
extern void trace_hardirqs_off_finish(void);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p) ((p)->hardirq_context)
+# define lockdep_hardirq_context(p) (this_cpu_read(hardirq_context))
# define lockdep_softirq_context(p) ((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled))
# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter() \
-do { \
- if (!current->hardirq_context++) \
- current->hardirq_threaded = 0; \
+# define lockdep_hardirq_enter() \
+do { \
+ if (this_cpu_inc_return(hardirq_context) == 1) \
+ current->hardirq_threaded = 0; \
} while (0)
# define lockdep_hardirq_threaded() \
do { \
@@ -50,7 +55,7 @@ do { \
} while (0)
# define lockdep_hardirq_exit() \
do { \
- current->hardirq_context--; \
+ this_cpu_dec(hardirq_context); \
} while (0)
# define lockdep_softirq_enter() \
do { \
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -20,6 +20,7 @@ extern int lock_stat;
#define MAX_LOCKDEP_SUBCLASSES 8UL

#include <linux/types.h>
+#include <asm/percpu.h>

enum lockdep_wait_type {
LD_WAIT_INV = 0, /* not checked, catch all */
@@ -703,28 +704,29 @@ do { \
lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)

-#define lockdep_assert_irqs_enabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled, \
- "IRQs not enabled as expected\n"); \
- } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);

-#define lockdep_assert_irqs_disabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled, \
- "IRQs not disabled as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_enabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
+} while (0)

-#define lockdep_assert_in_irq() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context, \
- "Not in hardirq as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_disabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
+} while (0)
+
+#define lockdep_assert_in_irq() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \
+} while (0)

#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define might_lock_nested(lock, subclass) do { } while (0)
+
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
@@ -734,7 +736,7 @@ do { \

# define lockdep_assert_RT_in_threaded_ctx() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirq_context && \
+ lockdep_hardirq_context(current) && \
!(current->hardirq_threaded || current->irq_config), \
"Not in threaded context on PREEMPT_RT as expected\n"); \
} while (0)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -991,8 +991,6 @@ struct task_struct {
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
- int hardirqs_enabled;
- int hardirq_context;
u64 hardirq_chain_key;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1954,8 +1954,8 @@ static __latent_entropy struct task_stru

rt_mutex_init_task(p);

+ lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKING
- DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
@@ -2036,7 +2036,6 @@ static __latent_entropy struct task_stru
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
- p->hardirqs_enabled = 0;
p->hardirq_enable_ip = 0;
p->hardirq_enable_event = 0;
p->hardirq_disable_ip = _THIS_IP_;
@@ -2046,7 +2045,6 @@ static __latent_entropy struct task_stru
p->softirq_enable_event = 0;
p->softirq_disable_ip = 0;
p->softirq_disable_event = 0;
- p->hardirq_context = 0;
p->softirq_context = 0;
#endif

--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_str
pr_warn("-----------------------------------------------------\n");
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
- curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+ lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
- curr->hardirqs_enabled,
+ lockdep_hardirqs_enabled(curr),
curr->softirqs_enabled);
print_lock(next);

@@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigne
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;

- if (unlikely(current->hardirqs_enabled)) {
+ if (unlikely(lockdep_hardirqs_enabled(current))) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigne
* Can't allow enabling interrupts while in an interrupt handler,
* that's general bad form and such. Recursion, limited stack etc..
*/
- if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current)))
return;

current->hardirq_chain_key = current->curr_chain_key;
@@ -3724,7 +3724,7 @@ void noinstr lockdep_hardirqs_on(unsigne
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;

- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled(curr)) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3751,7 +3751,7 @@ void noinstr lockdep_hardirqs_on(unsigne

skip_checks:
/* we'll do an OFF -> ON transition: */
- curr->hardirqs_enabled = 1;
+ this_cpu_write(hardirqs_enabled, 1);
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_on_events);
@@ -3783,11 +3783,11 @@ void noinstr lockdep_hardirqs_off(unsign
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;

- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled(curr)) {
/*
* We have done an ON -> OFF transition:
*/
- curr->hardirqs_enabled = 0;
+ this_cpu_write(hardirqs_enabled, 0);
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_off_events);
@@ -3832,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long i
* usage bit for all held locks, if hardirqs are
* enabled too:
*/
- if (curr->hardirqs_enabled)
+ if (lockdep_hardirqs_enabled(curr))
mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
lockdep_recursion_finish();
}
@@ -3881,7 +3881,7 @@ mark_usage(struct task_struct *curr, str
*/
if (!hlock->trylock) {
if (hlock->read) {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context(curr))
if (!mark_lock(curr, hlock,
LOCK_USED_IN_HARDIRQ_READ))
return 0;
@@ -3890,7 +3890,7 @@ mark_usage(struct task_struct *curr, str
LOCK_USED_IN_SOFTIRQ_READ))
return 0;
} else {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context(curr))
if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
return 0;
if (curr->softirq_context)
@@ -3928,7 +3928,7 @@ mark_usage(struct task_struct *curr, str

static inline unsigned int task_irq_context(struct task_struct *task)
{
- return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+ return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context(task) +
LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
}

@@ -4021,7 +4021,7 @@ static inline short task_wait_context(st
* Set appropriate wait type for the context; for IRQs we have to take
* into account force_irqthread as that is implied by PREEMPT_RT.
*/
- if (curr->hardirq_context) {
+ if (lockdep_hardirq_context(curr)) {
/*
* Check if force_irqthreads will run us threaded.
*/
@@ -4864,11 +4864,11 @@ static void check_flags(unsigned long fl
return;

if (irqs_disabled_flags(flags)) {
- if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled(current))) {
printk("possible reason: unannotated irqs-off.\n");
}
} else {
- if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled(current))) {
printk("possible reason: unannotated irqs-on.\n");
}
}
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned l
* where hardirqs are disabled legitimately:
*/
#ifdef CONFIG_TRACE_IRQFLAGS
+
+DEFINE_PER_CPU(int, hardirqs_enabled);
+DEFINE_PER_CPU(int, hardirq_context);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
+
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;



2020-06-23 15:03:26

by Ahmed S. Darwish

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
...
> -#define lockdep_assert_irqs_disabled() do { \
> - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> - current->hardirqs_enabled, \
> - "IRQs not disabled as expected\n"); \
> - } while (0)
> +#define lockdep_assert_irqs_enabled() \
> +do { \
> + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
> +} while (0)
>

Can we add a small comment on top of lockdep_off(), stating that lockdep
IRQ tracking will still be kept after a lockdep_off call?

thanks,

--
Ahmed S. Darwish
Linutronix GmbH

2020-06-23 15:28:04

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 05:00:31PM +0200, Ahmed S. Darwish wrote:
> On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
> ...
> > -#define lockdep_assert_irqs_disabled() do { \
> > - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> > - current->hardirqs_enabled, \
> > - "IRQs not disabled as expected\n"); \
> > - } while (0)
> > +#define lockdep_assert_irqs_enabled() \
> > +do { \
> > + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
> > +} while (0)
> >
>
> Can we add a small comment on top of lockdep_off(), stating that lockdep
> IRQ tracking will still be kept after a lockdep_off call?

That would only legitimize lockdep_off(). The only comment I want to put
on that is: "if you use this, you're doing it wrong'.

2020-06-23 16:16:00

by Ahmed S. Darwish

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 05:24:50PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 05:00:31PM +0200, Ahmed S. Darwish wrote:
> > On Tue, Jun 23, 2020 at 10:36:52AM +0200, Peter Zijlstra wrote:
> > ...
> > > -#define lockdep_assert_irqs_disabled() do { \
> > > - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> > > - current->hardirqs_enabled, \
> > > - "IRQs not disabled as expected\n"); \
> > > - } while (0)
> > > +#define lockdep_assert_irqs_enabled() \
> > > +do { \
> > > + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
> > > +} while (0)
> > >
> >
> > Can we add a small comment on top of lockdep_off(), stating that lockdep
> > IRQ tracking will still be kept after a lockdep_off call?
>
> That would only legitimize lockdep_off(). The only comment I want to put
> on that is: "if you use this, you're doing it wrong'.
>

Well, freshly merged code is using it. For example, KCSAN:

=> f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
=> kernel/kcsan/report.c:

void kcsan_report(...)
{
...
/*
* With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
* we do not turn off lockdep here; this could happen due to recursion
* into lockdep via KCSAN if we detect a race in utilities used by
* lockdep.
*/
lockdep_off();
...
}

thanks,

--
Ahmed S. Darwish
Linutronix GmbH

2020-06-23 16:39:16

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> Well, freshly merged code is using it. For example, KCSAN:
>
> => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> => kernel/kcsan/report.c:
>
> void kcsan_report(...)
> {
> ...
> /*
> * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> * we do not turn off lockdep here; this could happen due to recursion
> * into lockdep via KCSAN if we detect a race in utilities used by
> * lockdep.
> */
> lockdep_off();
> ...
> }

Marco, do you remember what exactly happened there? Because I'm about to
wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
lockdep_off().

2020-06-23 18:03:05

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > Well, freshly merged code is using it. For example, KCSAN:
> >
> > => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > => kernel/kcsan/report.c:
> >
> > void kcsan_report(...)
> > {
> > ...
> > /*
> > * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> > * we do not turn off lockdep here; this could happen due to recursion
> > * into lockdep via KCSAN if we detect a race in utilities used by
> > * lockdep.
> > */
> > lockdep_off();
> > ...
> > }
>
> Marco, do you remember what exactly happened there? Because I'm about to
> wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> lockdep_off().

Yeah, I was trying to squash any kind of recursion:

lockdep -> other libs ->
-> KCSAN
-> print report
-> dump stack, printk and friends
-> lockdep -> other libs
-> KCSAN ...

Some history:

* Initial patch to fix:
https://lore.kernel.org/lkml/[email protected]/

* KCSAN+lockdep+ftrace:
https://lore.kernel.org/lkml/[email protected]/

lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
there are no paths out of lockdep, or the IRQ flags tracing code, that
might lead through other libs, through KCSAN, libs used to generate a
report, and back to lockdep.

I never quite figured out the exact trace that led to corruption, but
avoiding any kind of potential for recursion was the only thing that
would avoid the check_flags() warnings.

Thanks,
-- Marco

2020-06-23 18:14:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 07:59:57PM +0200, Marco Elver wrote:
> On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > > Well, freshly merged code is using it. For example, KCSAN:
> > >
> > > => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > > => kernel/kcsan/report.c:
> > >
> > > void kcsan_report(...)
> > > {
> > > ...
> > > /*
> > > * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> > > * we do not turn off lockdep here; this could happen due to recursion
> > > * into lockdep via KCSAN if we detect a race in utilities used by
> > > * lockdep.
> > > */
> > > lockdep_off();
> > > ...
> > > }
> >
> > Marco, do you remember what exactly happened there? Because I'm about to
> > wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> > lockdep_off().
>
> Yeah, I was trying to squash any kind of recursion:
>
> lockdep -> other libs ->
> -> KCSAN
> -> print report
> -> dump stack, printk and friends
> -> lockdep -> other libs
> -> KCSAN ...
>
> Some history:
>
> * Initial patch to fix:
> https://lore.kernel.org/lkml/[email protected]/

That patch is weird; just :=n on lockdep.c should've cured that, the
rest is massive overkill.

> * KCSAN+lockdep+ftrace:
> https://lore.kernel.org/lkml/[email protected]/

That doesn't really have anything useful..

> lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
> there are no paths out of lockdep, or the IRQ flags tracing code, that
> might lead through other libs, through KCSAN, libs used to generate a
> report, and back to lockdep.
>
> I never quite figured out the exact trace that led to corruption, but
> avoiding any kind of potential for recursion was the only thing that
> would avoid the check_flags() warnings.

Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
anything happens.

2020-06-23 18:42:52

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, 23 Jun 2020 at 20:13, Peter Zijlstra <[email protected]> wrote:
>
> On Tue, Jun 23, 2020 at 07:59:57PM +0200, Marco Elver wrote:
> > On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> > > On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > > > Well, freshly merged code is using it. For example, KCSAN:
> > > >
> > > > => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > > > => kernel/kcsan/report.c:
> > > >
> > > > void kcsan_report(...)
> > > > {
> > > > ...
> > > > /*
> > > > * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> > > > * we do not turn off lockdep here; this could happen due to recursion
> > > > * into lockdep via KCSAN if we detect a race in utilities used by
> > > > * lockdep.
> > > > */
> > > > lockdep_off();
> > > > ...
> > > > }
> > >
> > > Marco, do you remember what exactly happened there? Because I'm about to
> > > wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> > > lockdep_off().
> >
> > Yeah, I was trying to squash any kind of recursion:
> >
> > lockdep -> other libs ->
> > -> KCSAN
> > -> print report
> > -> dump stack, printk and friends
> > -> lockdep -> other libs
> > -> KCSAN ...
> >
> > Some history:
> >
> > * Initial patch to fix:
> > https://lore.kernel.org/lkml/[email protected]/
>
> That patch is weird; just :=n on lockdep.c should've cured that, the
> rest is massive overkill.
>
> > * KCSAN+lockdep+ftrace:
> > https://lore.kernel.org/lkml/[email protected]/
>
> That doesn't really have anything useful..
>
> > lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
> > there are no paths out of lockdep, or the IRQ flags tracing code, that
> > might lead through other libs, through KCSAN, libs used to generate a
> > report, and back to lockdep.
> >
> > I never quite figured out the exact trace that led to corruption, but
> > avoiding any kind of potential for recursion was the only thing that
> > would avoid the check_flags() warnings.
>
> Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> anything happens.

Thanks!

This was happening with Qian Cai's (Cc'd) test cases. If the kernel or
this patch changed things around so this doesn't happen anymore
regardless, then I don't see a problem.

Thanks,
-- Marco

2020-06-23 19:17:29

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 08:39PM +0200, Marco Elver wrote:
> On Tue, 23 Jun 2020 at 20:13, Peter Zijlstra <[email protected]> wrote:
> >
> > On Tue, Jun 23, 2020 at 07:59:57PM +0200, Marco Elver wrote:
> > > On Tue, Jun 23, 2020 at 06:37PM +0200, Peter Zijlstra wrote:
> > > > On Tue, Jun 23, 2020 at 06:13:21PM +0200, Ahmed S. Darwish wrote:
> > > > > Well, freshly merged code is using it. For example, KCSAN:
> > > > >
> > > > > => f1bc96210c6a ("kcsan: Make KCSAN compatible with lockdep")
> > > > > => kernel/kcsan/report.c:
> > > > >
> > > > > void kcsan_report(...)
> > > > > {
> > > > > ...
> > > > > /*
> > > > > * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
> > > > > * we do not turn off lockdep here; this could happen due to recursion
> > > > > * into lockdep via KCSAN if we detect a race in utilities used by
> > > > > * lockdep.
> > > > > */
> > > > > lockdep_off();
> > > > > ...
> > > > > }
> > > >
> > > > Marco, do you remember what exactly happened there? Because I'm about to
> > > > wreck that. That is, I'm going to make TRACE_IRQFLAGS ignore
> > > > lockdep_off().
> > >
> > > Yeah, I was trying to squash any kind of recursion:
> > >
> > > lockdep -> other libs ->
> > > -> KCSAN
> > > -> print report
> > > -> dump stack, printk and friends
> > > -> lockdep -> other libs
> > > -> KCSAN ...
> > >
> > > Some history:
> > >
> > > * Initial patch to fix:
> > > https://lore.kernel.org/lkml/[email protected]/
> >
> > That patch is weird; just :=n on lockdep.c should've cured that, the
> > rest is massive overkill.
> >
> > > * KCSAN+lockdep+ftrace:
> > > https://lore.kernel.org/lkml/[email protected]/
> >
> > That doesn't really have anything useful..
> >
> > > lockdep now has KCSAN_SANITIZE := n, but we still need to ensure that
> > > there are no paths out of lockdep, or the IRQ flags tracing code, that
> > > might lead through other libs, through KCSAN, libs used to generate a
> > > report, and back to lockdep.
> > >
> > > I never quite figured out the exact trace that led to corruption, but
> > > avoiding any kind of potential for recursion was the only thing that
> > > would avoid the check_flags() warnings.
> >
> > Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> > anything happens.
>
> Thanks!
>
> This was happening with Qian Cai's (Cc'd) test cases. If the kernel or
> this patch changed things around so this doesn't happen anymore
> regardless, then I don't see a problem.

I see the below report when I boot with your branch + KCSAN and
PROVE_LOCKING. config attached. Trying to make sense of what's
happening.

Thanks,
-- Marco

------ >8 ------

[ 10.182354] ------------[ cut here ]------------
[ 10.183058] WARNING: CPU: 7 PID: 136 at kernel/locking/lockdep.c:398 lockdep_hardirqs_on_prepare+0x1c6/0x270
[ 10.184347] Modules linked in:
[ 10.184771] CPU: 7 PID: 136 Comm: systemd-journal Not tainted 5.8.0-rc1+ #3
[ 10.185706] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
[ 10.186821] RIP: 0010:lockdep_hardirqs_on_prepare+0x1c6/0x270
[ 10.187594] Code: 75 28 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 0f 85 b9 00 00 00 48 83 c4 10 5b 41 5e 41 5f c3 65 48 ff 05 d4 24 4e 75 eb d8 <0f> 0b 90 41 c7 86 c4 08 00 00 00 00 00 00 eb c8 e8 65 09 71 01 85
[ 10.190203] RSP: 0018:ffffa7ee802b7848 EFLAGS: 00010017
[ 10.190989] RAX: 0000000000000001 RBX: ffff955e92a34ab0 RCX: 0000000000000001
[ 10.192053] RDX: 0000000000000006 RSI: ffff955e92a34a88 RDI: ffff955e92a341c0
[ 10.193117] RBP: ffffa7ee802b7be8 R08: 0000000000000000 R09: 0000ffffffffffff
[ 10.194186] R10: 0000ffffffffffff R11: 0000ffff8d07e268 R12: 0000000000000001
[ 10.195249] R13: ffffffff8e41bb10 R14: ffff955e92a341c0 R15: 0000000000000001
[ 10.196312] FS: 00007fd6862aa8c0(0000) GS:ffff955e9fd80000(0000) knlGS:0000000000000000
[ 10.197513] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 10.198373] CR2: 00007fd6837dd000 CR3: 0000000812acc001 CR4: 0000000000760ee0
[ 10.199436] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 10.200494] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 10.201554] PKRU: 55555554
[ 10.201967] Call Trace:
[ 10.202348] ? _raw_spin_unlock_irqrestore+0x40/0x70
[ 10.203093] trace_hardirqs_on+0x56/0x60 <----- enter IRQ flags tracing code?
[ 10.203686] _raw_spin_unlock_irqrestore+0x40/0x70 <----- take report_lock
[ 10.204406] prepare_report+0x11f/0x150
[ 10.204986] kcsan_report+0xca/0x6c0 <----- generating a KCSAN report
[ 10.205529] ? __this_cpu_preempt_check+0x18/0x20
[ 10.206236] ? validate_chain+0x104/0x3320
[ 10.206853] ? __lock_acquire+0x9b2/0x10b0
[ 10.207472] ? __lock_acquire+0x9b2/0x10b0
[ 10.208091] ? lock_acquire+0xd3/0x2c0
[ 10.208665] ? kernfs_iop_permission+0x3f/0x170
[ 10.209347] ? lockref_get_not_dead+0x14/0x60
[ 10.210012] ? kernfs_iop_permission+0x3f/0x170
[ 10.210698] ? __mutex_trylock_or_owner+0x9a/0xf0
[ 10.211404] ? kernfs_iop_permission+0x3f/0x170
[ 10.212083] ? __mutex_lock+0x226/0x710
[ 10.212669] kcsan_found_watchpoint+0xe5/0x110
[ 10.213334] kernfs_iop_permission+0x72/0x170
[ 10.214000] ? kernfs_evict_inode+0x40/0x40
[ 10.214632] link_path_walk+0x667/0x840
[ 10.215215] path_lookupat+0x7b/0x4b0
[ 10.215774] filename_lookup+0xe9/0x330
[ 10.216355] ? getname_flags+0x7a/0x370
[ 10.216939] ? __might_fault+0x5/0x10
[ 10.217495] ? strncpy_from_user+0x190/0x240
[ 10.218149] user_path_at_empty+0x3b/0x50
[ 10.218757] do_faccessat+0x1e8/0x430
[ 10.219313] __x64_sys_access+0x33/0x40
[ 10.219896] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 10.220679] do_syscall_64+0x65/0xa0
[ 10.221221] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 10.221987] RIP: 0033:0x7fd6855679c7
[ 10.222528] Code: Bad RIP value.
[ 10.223010] RSP: 002b:00007ffcca601df8 EFLAGS: 00000246 ORIG_RAX: 0000000000000015
[ 10.224123] RAX: ffffffffffffffda RBX: 00007ffcca601e80 RCX: 00007fd6855679c7
[ 10.225173] RDX: 00302e33303a3030 RSI: 0000000000000000 RDI: 00007ffcca601e00
[ 10.226229] RBP: 00007ffcca602f20 R08: 000000000000c0c0 R09: 0000000000000120
[ 10.227283] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffcca602f40
[ 10.228339] R13: 000055878dd137c8 R14: 0000000000000018 R15: 00007ffcca601e00
[ 10.229400] irq event stamp: 63012
[ 10.229922] hardirqs last enabled at (63011): [<ffffffff8adf9f68>] step_into+0x328/0xd20
[ 10.231130] hardirqs last disabled at (63012): [<ffffffff8c24f99e>] _raw_spin_lock_irqsave+0x2e/0x80
[ 10.232415] softirqs last enabled at (62514): [<ffffffff8c400fd2>] asm_call_on_stack+0x12/0x20
[ 10.233564] softirqs last disabled at (62327): [<ffffffff8c400fd2>] asm_call_on_stack+0x12/0x20
[ 10.234718] ---[ end trace 696b39497a6233dc ]---


Attachments:
(No filename) (7.03 kB)
.config (131.80 kB)
Download all attachments

2020-06-23 19:45:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 09:13:35PM +0200, Marco Elver wrote:
> I see the below report when I boot with your branch + KCSAN and
> PROVE_LOCKING. config attached. Trying to make sense of what's
> happening.

Ah, I was still playing with tip/master + PROVE_LOCKING + KCSAN and
slowly removing parts of that annotation patch to see what would come
unstuck.

I think I just hit a genuine but unavoidable lockdep report on
report_lock.

> ------ >8 ------
>
> [ 10.182354] ------------[ cut here ]------------
> [ 10.183058] WARNING: CPU: 7 PID: 136 at kernel/locking/lockdep.c:398 lockdep_hardirqs_on_prepare+0x1c6/0x270
> [ 10.184347] Modules linked in:
> [ 10.184771] CPU: 7 PID: 136 Comm: systemd-journal Not tainted 5.8.0-rc1+ #3
> [ 10.185706] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
> [ 10.186821] RIP: 0010:lockdep_hardirqs_on_prepare+0x1c6/0x270
> [ 10.187594] Code: 75 28 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 0f 85 b9 00 00 00 48 83 c4 10 5b 41 5e 41 5f c3 65 48 ff 05 d4 24 4e 75 eb d8 <0f> 0b 90 41 c7 86 c4 08 00 00 00 00 00 00 eb c8 e8 65 09 71 01 85
> [ 10.190203] RSP: 0018:ffffa7ee802b7848 EFLAGS: 00010017
> [ 10.190989] RAX: 0000000000000001 RBX: ffff955e92a34ab0 RCX: 0000000000000001
> [ 10.192053] RDX: 0000000000000006 RSI: ffff955e92a34a88 RDI: ffff955e92a341c0
> [ 10.193117] RBP: ffffa7ee802b7be8 R08: 0000000000000000 R09: 0000ffffffffffff
> [ 10.194186] R10: 0000ffffffffffff R11: 0000ffff8d07e268 R12: 0000000000000001
> [ 10.195249] R13: ffffffff8e41bb10 R14: ffff955e92a341c0 R15: 0000000000000001
> [ 10.196312] FS: 00007fd6862aa8c0(0000) GS:ffff955e9fd80000(0000) knlGS:0000000000000000
> [ 10.197513] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 10.198373] CR2: 00007fd6837dd000 CR3: 0000000812acc001 CR4: 0000000000760ee0
> [ 10.199436] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 10.200494] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 10.201554] PKRU: 55555554
> [ 10.201967] Call Trace:
> [ 10.202348] ? _raw_spin_unlock_irqrestore+0x40/0x70
> [ 10.203093] trace_hardirqs_on+0x56/0x60 <----- enter IRQ flags tracing code?
> [ 10.203686] _raw_spin_unlock_irqrestore+0x40/0x70 <----- take report_lock
> [ 10.204406] prepare_report+0x11f/0x150
> [ 10.204986] kcsan_report+0xca/0x6c0 <----- generating a KCSAN report
> [ 10.212669] kcsan_found_watchpoint+0xe5/0x110

That appears to be warning about a lockdep_recursion underflow, weird.
I'll go stare at it.


2020-06-23 20:27:24

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> anything happens.

OK, so the below patch doesn't seem to have any nasty recursion issues
here. The only 'problem' is that lockdep now sees report_lock can cause
deadlocks.

It is completely right about it too, but I don't suspect there's much we
can do about it, it's pretty much the standard printk() with scheduler
locks held report.

---
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f67949d11e..732623c30359 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
}

if (!kcsan_interrupt_watcher)
- /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
- raw_local_irq_save(irq_flags);
+ local_irq_save(irq_flags);

watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
- raw_local_irq_restore(irq_flags);
+ local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..ef31c1d2dac3 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -605,14 +605,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
goto out;

- /*
- * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
- * we do not turn off lockdep here; this could happen due to recursion
- * into lockdep via KCSAN if we detect a race in utilities used by
- * lockdep.
- */
- lockdep_off();
-
if (prepare_report(&flags, type, &ai, other_info)) {
/*
* Never report if value_change is FALSE, only if we it is
@@ -628,7 +620,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
release_report(&flags, other_info);
}

- lockdep_on();
out:
kcsan_enable_current();
}

2020-06-23 21:11:06

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 10:24:04PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> > Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> > anything happens.
>
> OK, so the below patch doesn't seem to have any nasty recursion issues
> here. The only 'problem' is that lockdep now sees report_lock can cause
> deadlocks.
>
> It is completely right about it too, but I don't suspect there's much we
> can do about it, it's pretty much the standard printk() with scheduler
> locks held report.

Just for giggles I added the below and that works fine too. Right until
the report_lock deadlock splat of course, thereafter lockdep is
disabled.

diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..a011cf0a1611 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -459,6 +459,8 @@ static void set_other_info_task_blocking(unsigned long *flags,
*/
int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);

+ lockdep_assert_held(&report_lock);
+
other_info->task = current;
do {
if (is_running) {
@@ -495,6 +497,8 @@ static void set_other_info_task_blocking(unsigned long *flags,
other_info->task == current);
if (is_running)
set_current_state(TASK_RUNNING);
+
+ lockdep_assert_held(&report_lock);
}

/* Populate @other_info; requires that the provided @other_info not in use. */

2020-06-23 21:42:11

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 09:13:35PM +0200, Marco Elver wrote:
> [ 10.182354] ------------[ cut here ]------------
> [ 10.183058] WARNING: CPU: 7 PID: 136 at kernel/locking/lockdep.c:398 lockdep_hardirqs_on_prepare+0x1c6/0x270
> [ 10.184347] Modules linked in:
> [ 10.184771] CPU: 7 PID: 136 Comm: systemd-journal Not tainted 5.8.0-rc1+ #3
> [ 10.185706] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
> [ 10.186821] RIP: 0010:lockdep_hardirqs_on_prepare+0x1c6/0x270
> [ 10.187594] Code: 75 28 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 0f 85 b9 00 00 00 48 83 c4 10 5b 41 5e 41 5f c3 65 48 ff 05 d4 24 4e 75 eb d8 <0f> 0b 90 41 c7 86 c4 08 00 00 00 00 00 00 eb c8 e8 65 09 71 01 85
> [ 10.190203] RSP: 0018:ffffa7ee802b7848 EFLAGS: 00010017
> [ 10.190989] RAX: 0000000000000001 RBX: ffff955e92a34ab0 RCX: 0000000000000001
> [ 10.192053] RDX: 0000000000000006 RSI: ffff955e92a34a88 RDI: ffff955e92a341c0
> [ 10.193117] RBP: ffffa7ee802b7be8 R08: 0000000000000000 R09: 0000ffffffffffff
> [ 10.194186] R10: 0000ffffffffffff R11: 0000ffff8d07e268 R12: 0000000000000001
> [ 10.195249] R13: ffffffff8e41bb10 R14: ffff955e92a341c0 R15: 0000000000000001
> [ 10.196312] FS: 00007fd6862aa8c0(0000) GS:ffff955e9fd80000(0000) knlGS:0000000000000000
> [ 10.197513] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 10.198373] CR2: 00007fd6837dd000 CR3: 0000000812acc001 CR4: 0000000000760ee0
> [ 10.199436] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 10.200494] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 10.201554] PKRU: 55555554
> [ 10.201967] Call Trace:
> [ 10.202348] ? _raw_spin_unlock_irqrestore+0x40/0x70
> [ 10.203093] trace_hardirqs_on+0x56/0x60 <----- enter IRQ flags tracing code?
> [ 10.203686] _raw_spin_unlock_irqrestore+0x40/0x70 <----- take report_lock
> [ 10.204406] prepare_report+0x11f/0x150
> [ 10.204986] kcsan_report+0xca/0x6c0 <----- generating a KCSAN report

Oh, duh.. that's because lockdep_off() ;-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ab7571c1a1f5..c9ea05edce25 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -395,7 +395,7 @@ void lockdep_init_task(struct task_struct *task)

static __always_inline void lockdep_recursion_finish(void)
{
- if (WARN_ON_ONCE(--current->lockdep_recursion))
+ if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK))
current->lockdep_recursion = 0;
}


2020-06-24 09:06:15

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 10:24:04PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> > Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> > anything happens.
>
> OK, so the below patch doesn't seem to have any nasty recursion issues
> here. The only 'problem' is that lockdep now sees report_lock can cause
> deadlocks.
>
> It is completely right about it too, but I don't suspect there's much we
> can do about it, it's pretty much the standard printk() with scheduler
> locks held report.

So I've been getting tons and tons of this:

[ 60.471348] ==================================================================
[ 60.479427] BUG: KCSAN: data-race in __rcu_read_lock / __rcu_read_unlock
[ 60.486909]
[ 60.488572] write (marked) to 0xffff88840fff1cf0 of 4 bytes by interrupt on cpu 1:
[ 60.497026] __rcu_read_lock+0x37/0x60
[ 60.501214] cpuacct_account_field+0x1b/0x170
[ 60.506081] task_group_account_field+0x32/0x160
[ 60.511238] account_system_time+0xe6/0x110
[ 60.515912] update_process_times+0x1d/0xd0
[ 60.520585] tick_sched_timer+0xfc/0x180
[ 60.524967] __hrtimer_run_queues+0x271/0x440
[ 60.529832] hrtimer_interrupt+0x222/0x670
[ 60.534409] __sysvec_apic_timer_interrupt+0xb3/0x1a0
[ 60.540052] asm_call_on_stack+0x12/0x20
[ 60.544434] sysvec_apic_timer_interrupt+0xba/0x130
[ 60.549882] asm_sysvec_apic_timer_interrupt+0x12/0x20
[ 60.555621] delay_tsc+0x7d/0xe0
[ 60.559226] kcsan_setup_watchpoint+0x292/0x4e0
[ 60.564284] __rcu_read_unlock+0x73/0x2c0
[ 60.568763] __unlock_page_memcg+0xda/0xf0
[ 60.573338] unlock_page_memcg+0x32/0x40
[ 60.577721] page_remove_rmap+0x5c/0x200
[ 60.582104] unmap_page_range+0x83c/0xc10
[ 60.586582] unmap_single_vma+0xb0/0x150
[ 60.590963] unmap_vmas+0x81/0xe0
[ 60.594663] exit_mmap+0x135/0x2b0
[ 60.598464] __mmput+0x21/0x150
[ 60.601970] mmput+0x2a/0x30
[ 60.605176] exit_mm+0x2fc/0x350
[ 60.608780] do_exit+0x372/0xff0
[ 60.612385] do_group_exit+0x139/0x140
[ 60.616571] __do_sys_exit_group+0xb/0x10
[ 60.621048] __se_sys_exit_group+0xa/0x10
[ 60.625524] __x64_sys_exit_group+0x1b/0x20
[ 60.630189] do_syscall_64+0x6c/0xe0
[ 60.634182] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 60.639820]
[ 60.641485] read to 0xffff88840fff1cf0 of 4 bytes by task 2430 on cpu 1:
[ 60.648969] __rcu_read_unlock+0x73/0x2c0
[ 60.653446] __unlock_page_memcg+0xda/0xf0
[ 60.658019] unlock_page_memcg+0x32/0x40
[ 60.662400] page_remove_rmap+0x5c/0x200
[ 60.666782] unmap_page_range+0x83c/0xc10
[ 60.671259] unmap_single_vma+0xb0/0x150
[ 60.675641] unmap_vmas+0x81/0xe0
[ 60.679341] exit_mmap+0x135/0x2b0
[ 60.683141] __mmput+0x21/0x150
[ 60.686647] mmput+0x2a/0x30
[ 60.689853] exit_mm+0x2fc/0x350
[ 60.693458] do_exit+0x372/0xff0
[ 60.697062] do_group_exit+0x139/0x140
[ 60.701248] __do_sys_exit_group+0xb/0x10
[ 60.705724] __se_sys_exit_group+0xa/0x10
[ 60.710201] __x64_sys_exit_group+0x1b/0x20
[ 60.714872] do_syscall_64+0x6c/0xe0
[ 60.718864] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 60.724503]
[ 60.726156] Reported by Kernel Concurrency Sanitizer on:
[ 60.732089] CPU: 1 PID: 2430 Comm: sshd Not tainted 5.8.0-rc2-00186-gb4ee11fe08b3-dirty #303
[ 60.741510] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
[ 60.752957] ==================================================================

And I figured a quick way to get rid of that would be something like the
below, seeing how volatile gets auto annotated... but that doesn't seem
to actually work.

What am I missing?



diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 352223664ebd..b08861118e1a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -351,17 +351,17 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)

static void rcu_preempt_read_enter(void)
{
- current->rcu_read_lock_nesting++;
+ (*(volatile int *)&current->rcu_read_lock_nesting)++;
}

static int rcu_preempt_read_exit(void)
{
- return --current->rcu_read_lock_nesting;
+ return --(*(volatile int *)&current->rcu_read_lock_nesting);
}

static void rcu_preempt_depth_set(int val)
{
- current->rcu_read_lock_nesting = val;
+ WRITE_ONCE(current->rcu_read_lock_nesting, val);
}

/*

2020-06-24 10:18:52

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Wed, 24 Jun 2020 at 11:01, Peter Zijlstra <[email protected]> wrote:
>
> On Tue, Jun 23, 2020 at 10:24:04PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> > > Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> > > anything happens.
> >
> > OK, so the below patch doesn't seem to have any nasty recursion issues
> > here. The only 'problem' is that lockdep now sees report_lock can cause
> > deadlocks.
> >
> > It is completely right about it too, but I don't suspect there's much we
> > can do about it, it's pretty much the standard printk() with scheduler
> > locks held report.
>
> So I've been getting tons and tons of this:
>
> [ 60.471348] ==================================================================
> [ 60.479427] BUG: KCSAN: data-race in __rcu_read_lock / __rcu_read_unlock
> [ 60.486909]
> [ 60.488572] write (marked) to 0xffff88840fff1cf0 of 4 bytes by interrupt on cpu 1:
> [ 60.497026] __rcu_read_lock+0x37/0x60
> [ 60.501214] cpuacct_account_field+0x1b/0x170
> [ 60.506081] task_group_account_field+0x32/0x160
> [ 60.511238] account_system_time+0xe6/0x110
> [ 60.515912] update_process_times+0x1d/0xd0
> [ 60.520585] tick_sched_timer+0xfc/0x180
> [ 60.524967] __hrtimer_run_queues+0x271/0x440
> [ 60.529832] hrtimer_interrupt+0x222/0x670
> [ 60.534409] __sysvec_apic_timer_interrupt+0xb3/0x1a0
> [ 60.540052] asm_call_on_stack+0x12/0x20
> [ 60.544434] sysvec_apic_timer_interrupt+0xba/0x130
> [ 60.549882] asm_sysvec_apic_timer_interrupt+0x12/0x20
> [ 60.555621] delay_tsc+0x7d/0xe0
> [ 60.559226] kcsan_setup_watchpoint+0x292/0x4e0
> [ 60.564284] __rcu_read_unlock+0x73/0x2c0
> [ 60.568763] __unlock_page_memcg+0xda/0xf0
> [ 60.573338] unlock_page_memcg+0x32/0x40
> [ 60.577721] page_remove_rmap+0x5c/0x200
> [ 60.582104] unmap_page_range+0x83c/0xc10
> [ 60.586582] unmap_single_vma+0xb0/0x150
> [ 60.590963] unmap_vmas+0x81/0xe0
> [ 60.594663] exit_mmap+0x135/0x2b0
> [ 60.598464] __mmput+0x21/0x150
> [ 60.601970] mmput+0x2a/0x30
> [ 60.605176] exit_mm+0x2fc/0x350
> [ 60.608780] do_exit+0x372/0xff0
> [ 60.612385] do_group_exit+0x139/0x140
> [ 60.616571] __do_sys_exit_group+0xb/0x10
> [ 60.621048] __se_sys_exit_group+0xa/0x10
> [ 60.625524] __x64_sys_exit_group+0x1b/0x20
> [ 60.630189] do_syscall_64+0x6c/0xe0
> [ 60.634182] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 60.639820]
> [ 60.641485] read to 0xffff88840fff1cf0 of 4 bytes by task 2430 on cpu 1:
> [ 60.648969] __rcu_read_unlock+0x73/0x2c0
> [ 60.653446] __unlock_page_memcg+0xda/0xf0
> [ 60.658019] unlock_page_memcg+0x32/0x40
> [ 60.662400] page_remove_rmap+0x5c/0x200
> [ 60.666782] unmap_page_range+0x83c/0xc10
> [ 60.671259] unmap_single_vma+0xb0/0x150
> [ 60.675641] unmap_vmas+0x81/0xe0
> [ 60.679341] exit_mmap+0x135/0x2b0
> [ 60.683141] __mmput+0x21/0x150
> [ 60.686647] mmput+0x2a/0x30
> [ 60.689853] exit_mm+0x2fc/0x350
> [ 60.693458] do_exit+0x372/0xff0
> [ 60.697062] do_group_exit+0x139/0x140
> [ 60.701248] __do_sys_exit_group+0xb/0x10
> [ 60.705724] __se_sys_exit_group+0xa/0x10
> [ 60.710201] __x64_sys_exit_group+0x1b/0x20
> [ 60.714872] do_syscall_64+0x6c/0xe0
> [ 60.718864] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 60.724503]
> [ 60.726156] Reported by Kernel Concurrency Sanitizer on:
> [ 60.732089] CPU: 1 PID: 2430 Comm: sshd Not tainted 5.8.0-rc2-00186-gb4ee11fe08b3-dirty #303
> [ 60.741510] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
> [ 60.752957] ==================================================================
>
> And I figured a quick way to get rid of that would be something like the
> below, seeing how volatile gets auto annotated... but that doesn't seem
> to actually work.
>
> What am I missing?

There's one more in include/linux/rcupdate.h. I suggested this at some point:

https://lore.kernel.org/lkml/[email protected]/

To avoid volatiles as I don't think they are needed here.

[ Still testing your other patches for KCSAN, will send another reply there. ]

Thanks,
-- Marco

2020-06-24 11:35:56

by Marco Elver

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 23, 2020 at 10:24PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 23, 2020 at 08:12:32PM +0200, Peter Zijlstra wrote:
> > Fair enough; I'll rip it all up and boot a KCSAN kernel, see what if
> > anything happens.
>
> OK, so the below patch doesn't seem to have any nasty recursion issues
> here. The only 'problem' is that lockdep now sees report_lock can cause
> deadlocks.

Thanks, using non-raw now makes sense.

> It is completely right about it too, but I don't suspect there's much we
> can do about it, it's pretty much the standard printk() with scheduler
> locks held report.

Right, I think we just have to tolerate the potential risk of deadlock
until there is a way to make all the code that prints in print_report()
scheduler-safe (that includes stack_trace_print()).

Based on your suggested change to core.c, how about the below patch?
Anything we've missed? If you think it's reasonable, please carry it
with the IRQ state tracking changes.

As far as I can tell there are no more warnings together with the other
patch you sent to add '& LOCKDEP_RECURSION_MASK'.

Thanks,
-- Marco

------ >8 ------

From: Marco Elver <[email protected]>
Date: Wed, 24 Jun 2020 11:23:22 +0200
Subject: [PATCH] kcsan: Make KCSAN compatible with new IRQ state tracking

The new IRQ state tracking code does not honor lockdep_off(), and as
such we should again permit tracing by using non-raw functions in
core.c. Update the lockdep_off() comment in report.c, to reflect the
fact there is still a potential risk of deadlock due to using printk()
from scheduler code.

Suggested-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Marco Elver <[email protected]>
---
kernel/kcsan/core.c | 5 ++---
kernel/kcsan/report.c | 9 +++++----
2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f67949d11e..732623c30359 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
}

if (!kcsan_interrupt_watcher)
- /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
- raw_local_irq_save(irq_flags);
+ local_irq_save(irq_flags);

watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
- raw_local_irq_restore(irq_flags);
+ local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f8345bae9..6b2fb1a6d8cd 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -606,10 +606,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
goto out;

/*
- * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
- * we do not turn off lockdep here; this could happen due to recursion
- * into lockdep via KCSAN if we detect a race in utilities used by
- * lockdep.
+ * Because we may generate reports when we're in scheduler code, the use
+ * of printk() could deadlock. Until such time that all printing code
+ * called in print_report() is scheduler-safe, accept the risk, and just
+ * get our message out. As such, also disable lockdep to hide the
+ * warning, and avoid disabling lockdep for the rest of the kernel.
*/
lockdep_off();

--
2.27.0.111.gc72c7da667-goog

2020-06-24 12:36:20

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Wed, Jun 24, 2020 at 12:17:56PM +0200, Marco Elver wrote:
> On Wed, 24 Jun 2020 at 11:01, Peter Zijlstra <[email protected]> wrote:

> > And I figured a quick way to get rid of that would be something like the
> > below, seeing how volatile gets auto annotated... but that doesn't seem
> > to actually work.
> >
> > What am I missing?
>
> There's one more in include/linux/rcupdate.h. I suggested this at some point:
>
> https://lore.kernel.org/lkml/[email protected]/
>
> To avoid volatiles as I don't think they are needed here.

Urgghh.. local_t is very expensive for this. The current code is
actually fine, even on load-store architectures. Using local_t will only
result in it being more expensive for no gain.

I'll go put data_race() around it.

2020-06-24 15:20:20

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Wed, Jun 24, 2020 at 01:32:46PM +0200, Marco Elver wrote:
> From: Marco Elver <[email protected]>
> Date: Wed, 24 Jun 2020 11:23:22 +0200
> Subject: [PATCH] kcsan: Make KCSAN compatible with new IRQ state tracking
>
> The new IRQ state tracking code does not honor lockdep_off(), and as
> such we should again permit tracing by using non-raw functions in
> core.c. Update the lockdep_off() comment in report.c, to reflect the
> fact there is still a potential risk of deadlock due to using printk()
> from scheduler code.
>
> Suggested-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Marco Elver <[email protected]>

Thanks!

I've put this in front of the series at hand. I'll wait a little while
longer for arch people to give feedback on their header patches before I
stuff the lot into tip/locking/core.

2020-06-30 06:00:57

by Ahmed S. Darwish

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

Peter Zijlstra wrote:

...

> -#define lockdep_assert_irqs_disabled() do { \
> - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> - current->hardirqs_enabled, \
> - "IRQs not disabled as expected\n"); \
> - } while (0)

...

> +#define lockdep_assert_irqs_disabled() \
> +do { \
> + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
> +} while (0)

I think it would be nice to keep the "IRQs not disabled as expected"
message. It makes the lockdep splat much more readable.

This is similarly the case for the v3 lockdep preemption macros:

https://lkml.kernel.org/r/[email protected]

I did not add a message though to get in-sync with the IRQ macros above.

Thanks,

--
Ahmed S. Darwish
Linutronix GmbH

2020-06-30 09:42:25

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v4 7/8] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

On Tue, Jun 30, 2020 at 07:59:39AM +0200, Ahmed S. Darwish wrote:
> Peter Zijlstra wrote:
>
> ...
>
> > -#define lockdep_assert_irqs_disabled() do { \
> > - WARN_ONCE(debug_locks && !current->lockdep_recursion && \
> > - current->hardirqs_enabled, \
> > - "IRQs not disabled as expected\n"); \
> > - } while (0)
>
> ...
>
> > +#define lockdep_assert_irqs_disabled() \
> > +do { \
> > + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
> > +} while (0)
>
> I think it would be nice to keep the "IRQs not disabled as expected"
> message. It makes the lockdep splat much more readable.
>
> This is similarly the case for the v3 lockdep preemption macros:
>
> https://lkml.kernel.org/r/[email protected]
>
> I did not add a message though to get in-sync with the IRQ macros above.

Hurmph.. the file:line output of a splat is usually all I look at, also
__WARN_printf() generates such atrocious crap code that try and not use
it.

I suppose I should do a __WARN_str() or something, but then people are
unlikely to want to use that, too much variation etc. :/

Cursed if you do, cursed if you don't.

Subject: [tip: locking/core] kcsan: Make KCSAN compatible with new IRQ state tracking

The following commit has been merged into the locking/core branch of tip:

Commit-ID: 248591f5d257a19c1cba9ab9da3536bfbc2f0149
Gitweb: https://git.kernel.org/tip/248591f5d257a19c1cba9ab9da3536bfbc2f0149
Author: Marco Elver <[email protected]>
AuthorDate: Wed, 24 Jun 2020 13:32:46 +02:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 10 Jul 2020 12:00:00 +02:00

kcsan: Make KCSAN compatible with new IRQ state tracking

The new IRQ state tracking code does not honor lockdep_off(), and as
such we should again permit tracing by using non-raw functions in
core.c. Update the lockdep_off() comment in report.c, to reflect the
fact there is still a potential risk of deadlock due to using printk()
from scheduler code.

Suggested-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Marco Elver <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Ingo Molnar <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/kcsan/core.c | 5 ++---
kernel/kcsan/report.c | 9 +++++----
2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 15f6794..732623c 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -397,8 +397,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
}

if (!kcsan_interrupt_watcher)
- /* Use raw to avoid lockdep recursion via IRQ flags tracing. */
- raw_local_irq_save(irq_flags);
+ local_irq_save(irq_flags);

watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write);
if (watchpoint == NULL) {
@@ -539,7 +538,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock:
if (!kcsan_interrupt_watcher)
- raw_local_irq_restore(irq_flags);
+ local_irq_restore(irq_flags);
out:
user_access_restore(ua_flags);
}
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index ac5f834..6b2fb1a 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -606,10 +606,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
goto out;

/*
- * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
- * we do not turn off lockdep here; this could happen due to recursion
- * into lockdep via KCSAN if we detect a race in utilities used by
- * lockdep.
+ * Because we may generate reports when we're in scheduler code, the use
+ * of printk() could deadlock. Until such time that all printing code
+ * called in print_report() is scheduler-safe, accept the risk, and just
+ * get our message out. As such, also disable lockdep to hide the
+ * warning, and avoid disabling lockdep for the rest of the kernel.
*/
lockdep_off();

Subject: [tip: locking/core] lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

The following commit has been merged into the locking/core branch of tip:

Commit-ID: a21ee6055c30ce68c4e201c6496f0ed2a1936230
Gitweb: https://git.kernel.org/tip/a21ee6055c30ce68c4e201c6496f0ed2a1936230
Author: Peter Zijlstra <[email protected]>
AuthorDate: Mon, 25 May 2020 12:22:41 +02:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 10 Jul 2020 12:00:02 +02:00

lockdep: Change hardirq{s_enabled,_context} to per-cpu variables

Currently all IRQ-tracking state is in task_struct, this means that
task_struct needs to be defined before we use it.

Especially for lockdep_assert_irq*() this can lead to header-hell.

Move the hardirq state into per-cpu variables to avoid the task_struct
dependency.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Ingo Molnar <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
include/linux/irqflags.h | 19 ++++++++++++-------
include/linux/lockdep.h | 34 ++++++++++++++++++----------------
include/linux/sched.h | 2 --
kernel/fork.c | 4 +---
kernel/locking/lockdep.c | 30 +++++++++++++++---------------
kernel/softirq.c | 6 ++++++
6 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 6384d28..255444f 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -14,6 +14,7 @@

#include <linux/typecheck.h>
#include <asm/irqflags.h>
+#include <asm/percpu.h>

/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
@@ -31,18 +32,22 @@
#endif

#ifdef CONFIG_TRACE_IRQFLAGS
+
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);
+
extern void trace_hardirqs_on_prepare(void);
extern void trace_hardirqs_off_finish(void);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
-# define lockdep_hardirq_context(p) ((p)->hardirq_context)
+# define lockdep_hardirq_context(p) (this_cpu_read(hardirq_context))
# define lockdep_softirq_context(p) ((p)->softirq_context)
-# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled)
+# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled))
# define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled)
-# define lockdep_hardirq_enter() \
-do { \
- if (!current->hardirq_context++) \
- current->hardirq_threaded = 0; \
+# define lockdep_hardirq_enter() \
+do { \
+ if (this_cpu_inc_return(hardirq_context) == 1) \
+ current->hardirq_threaded = 0; \
} while (0)
# define lockdep_hardirq_threaded() \
do { \
@@ -50,7 +55,7 @@ do { \
} while (0)
# define lockdep_hardirq_exit() \
do { \
- current->hardirq_context--; \
+ this_cpu_dec(hardirq_context); \
} while (0)
# define lockdep_softirq_enter() \
do { \
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 3b73cf8..be6cb17 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -11,6 +11,7 @@
#define __LINUX_LOCKDEP_H

#include <linux/lockdep_types.h>
+#include <asm/percpu.h>

struct task_struct;

@@ -529,28 +530,29 @@ do { \
lock_release(&(lock)->dep_map, _THIS_IP_); \
} while (0)

-#define lockdep_assert_irqs_enabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirqs_enabled, \
- "IRQs not enabled as expected\n"); \
- } while (0)
+DECLARE_PER_CPU(int, hardirqs_enabled);
+DECLARE_PER_CPU(int, hardirq_context);

-#define lockdep_assert_irqs_disabled() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirqs_enabled, \
- "IRQs not disabled as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_enabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \
+} while (0)

-#define lockdep_assert_in_irq() do { \
- WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- !current->hardirq_context, \
- "Not in hardirq as expected\n"); \
- } while (0)
+#define lockdep_assert_irqs_disabled() \
+do { \
+ WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \
+} while (0)
+
+#define lockdep_assert_in_irq() \
+do { \
+ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \
+} while (0)

#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define might_lock_nested(lock, subclass) do { } while (0)
+
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
@@ -560,7 +562,7 @@ do { \

# define lockdep_assert_RT_in_threaded_ctx() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
- current->hardirq_context && \
+ lockdep_hardirq_context(current) && \
!(current->hardirq_threaded || current->irq_config), \
"Not in threaded context on PREEMPT_RT as expected\n"); \
} while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 692e327..3903a95 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -990,8 +990,6 @@ struct task_struct {
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
- int hardirqs_enabled;
- int hardirq_context;
u64 hardirq_chain_key;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493..70d9d0a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1954,8 +1954,8 @@ static __latent_entropy struct task_struct *copy_process(

rt_mutex_init_task(p);

+ lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKING
- DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
@@ -2036,7 +2036,6 @@ static __latent_entropy struct task_struct *copy_process(
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
- p->hardirqs_enabled = 0;
p->hardirq_enable_ip = 0;
p->hardirq_enable_event = 0;
p->hardirq_disable_ip = _THIS_IP_;
@@ -2046,7 +2045,6 @@ static __latent_entropy struct task_struct *copy_process(
p->softirq_enable_event = 0;
p->softirq_disable_ip = 0;
p->softirq_disable_event = 0;
- p->hardirq_context = 0;
p->softirq_context = 0;
#endif

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d595623..ab4ffbe 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr,
pr_warn("-----------------------------------------------------\n");
pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
- curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+ lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
- curr->hardirqs_enabled,
+ lockdep_hardirqs_enabled(curr),
curr->softirqs_enabled);
print_lock(next);

@@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;

- if (unlikely(current->hardirqs_enabled)) {
+ if (unlikely(lockdep_hardirqs_enabled(current))) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip)
* Can't allow enabling interrupts while in an interrupt handler,
* that's general bad form and such. Recursion, limited stack etc..
*/
- if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current)))
return;

current->hardirq_chain_key = current->curr_chain_key;
@@ -3724,7 +3724,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)
if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK))
return;

- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled(curr)) {
/*
* Neither irq nor preemption are disabled here
* so this is racy by nature but losing one hit
@@ -3751,7 +3751,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip)

skip_checks:
/* we'll do an OFF -> ON transition: */
- curr->hardirqs_enabled = 1;
+ this_cpu_write(hardirqs_enabled, 1);
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_on_events);
@@ -3783,11 +3783,11 @@ void noinstr lockdep_hardirqs_off(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;

- if (curr->hardirqs_enabled) {
+ if (lockdep_hardirqs_enabled(curr)) {
/*
* We have done an ON -> OFF transition:
*/
- curr->hardirqs_enabled = 0;
+ this_cpu_write(hardirqs_enabled, 0);
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(hardirqs_off_events);
@@ -3832,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long ip)
* usage bit for all held locks, if hardirqs are
* enabled too:
*/
- if (curr->hardirqs_enabled)
+ if (lockdep_hardirqs_enabled(curr))
mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
lockdep_recursion_finish();
}
@@ -3881,7 +3881,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
*/
if (!hlock->trylock) {
if (hlock->read) {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context(curr))
if (!mark_lock(curr, hlock,
LOCK_USED_IN_HARDIRQ_READ))
return 0;
@@ -3890,7 +3890,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
LOCK_USED_IN_SOFTIRQ_READ))
return 0;
} else {
- if (curr->hardirq_context)
+ if (lockdep_hardirq_context(curr))
if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ))
return 0;
if (curr->softirq_context)
@@ -3928,7 +3928,7 @@ lock_used:

static inline unsigned int task_irq_context(struct task_struct *task)
{
- return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+ return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context(task) +
LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
}

@@ -4021,7 +4021,7 @@ static inline short task_wait_context(struct task_struct *curr)
* Set appropriate wait type for the context; for IRQs we have to take
* into account force_irqthread as that is implied by PREEMPT_RT.
*/
- if (curr->hardirq_context) {
+ if (lockdep_hardirq_context(curr)) {
/*
* Check if force_irqthreads will run us threaded.
*/
@@ -4864,11 +4864,11 @@ static void check_flags(unsigned long flags)
return;

if (irqs_disabled_flags(flags)) {
- if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled(current))) {
printk("possible reason: unannotated irqs-off.\n");
}
} else {
- if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
+ if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled(current))) {
printk("possible reason: unannotated irqs-on.\n");
}
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c4201b7..342c53f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned long pending)
* where hardirqs are disabled legitimately:
*/
#ifdef CONFIG_TRACE_IRQFLAGS
+
+DEFINE_PER_CPU(int, hardirqs_enabled);
+DEFINE_PER_CPU(int, hardirq_context);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
+EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
+
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;