Reschedule vector tracepoints may be called in cpu idle state.
This causes lockdep check warning below.
So, add rcu_irq_enter/exit() to smp_trace_reschedule_interrupt().
[ 50.720557] Testing event reschedule_exit:
[ 50.721349]
[ 50.721502] ===============================
[ 50.721835] [ INFO: suspicious RCU usage. ]
[ 50.722169] 3.10.0-rc6-00004-gcf910e8 #190 Not tainted
[ 50.722582] -------------------------------
[ 50.722915] /c/kernel-tests/src/linux/arch/x86/include/asm/trace/irq_vectors.h:50 suspicious rcu_dereference_check() usage!
[ 50.723770]
[ 50.723770] other info that might help us debug this:
[ 50.723770]
[ 50.724385]
[ 50.724385] RCU used illegally from idle CPU!
[ 50.724385] rcu_scheduler_active = 1, debug_locks = 0
[ 50.725232] RCU used illegally from extended quiescent state!
[ 50.725690] no locks held by swapper/0/0.
[ 50.726010]
[ 50.726010] stack backtrace:
[ 50.726359] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc6-00004-gcf910e8 #190
[ 50.726965] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 50.727417] 00000001 00000001 79c53f04 798bd9f9 79c53f2c 79077a70 79b412c6 79b41fd1
[ 50.728159] 00000001 00000000 79c5ef8c 87147c58 00000000 79c55800 79c53f38 79010b65
[ 50.728849] 79c52000 79c53f7c 798c720e 79c52000 79c5ef8c 00000004 00000000 79c55800
[ 50.729532] Call Trace:
[ 50.729730] [<798bd9f9>] dump_stack+0x16/0x18
[ 50.730072] [<79077a70>] lockdep_rcu_suspicious+0xf2/0xfa
[ 50.730498] [<79010b65>] smp_trace_reschedule_interrupt+0x1c8/0x1d0
[ 50.730979] [<798c720e>] trace_reschedule_interrupt+0x36/0x3c
[ 50.731214] [<7901875f>] ? native_safe_halt+0x5/0x7
[ 50.731214] [<790085cc>] default_idle+0xb1/0x1e2
[ 50.731214] [<79008d05>] arch_cpu_idle+0xe/0x10
[ 50.731214] [<79069ddf>] cpu_startup_entry+0x1e4/0x2c3
[ 50.731214] [<798adb34>] rest_init+0x12c/0x132
[ 50.731214] [<798ada08>] ? __read_lock_failed+0x14/0x14
[ 50.731214] [<79d309e4>] start_kernel+0x38d/0x393
[ 50.731214] [<79d30489>] ? repair_env_string+0x51/0x51
[ 50.731214] [<79d302c3>] i386_start_kernel+0x79/0x7d
[ 50.771947] OK
[ 50.772099] Testing event reschedule_entry: OK
Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/kernel/smp.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index f4fe0b8..b959056 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -268,9 +268,11 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
void smp_trace_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
+ rcu_irq_enter();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
+ rcu_irq_exit();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
--
1.7.1
Any comments?
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of Seiji Aguchi
> Sent: Monday, June 24, 2013 4:21 PM
> To: [email protected]; [email protected]; [email protected]; [email protected]
> Cc: [email protected]; [email protected]
> Subject: [PATCH -tip] x86,trace: Add rcu_irq_enter/exit() in smp_trace_reschedule_interrupt()
>
> Reschedule vector tracepoints may be called in cpu idle state.
> This causes lockdep check warning below.
> So, add rcu_irq_enter/exit() to smp_trace_reschedule_interrupt().
>
> [ 50.720557] Testing event reschedule_exit:
> [ 50.721349]
> [ 50.721502] ===============================
> [ 50.721835] [ INFO: suspicious RCU usage. ]
> [ 50.722169] 3.10.0-rc6-00004-gcf910e8 #190 Not tainted
> [ 50.722582] -------------------------------
> [ 50.722915] /c/kernel-tests/src/linux/arch/x86/include/asm/trace/irq_vectors.h:50 suspicious rcu_dereference_check() usage!
> [ 50.723770]
> [ 50.723770] other info that might help us debug this:
> [ 50.723770]
> [ 50.724385]
> [ 50.724385] RCU used illegally from idle CPU!
> [ 50.724385] rcu_scheduler_active = 1, debug_locks = 0
> [ 50.725232] RCU used illegally from extended quiescent state!
> [ 50.725690] no locks held by swapper/0/0.
> [ 50.726010]
> [ 50.726010] stack backtrace:
> [ 50.726359] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc6-00004-gcf910e8 #190
> [ 50.726965] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
>
> [ 50.727417] 00000001 00000001 79c53f04 798bd9f9 79c53f2c 79077a70 79b412c6 79b41fd1
> [ 50.728159] 00000001 00000000 79c5ef8c 87147c58 00000000 79c55800 79c53f38 79010b65
> [ 50.728849] 79c52000 79c53f7c 798c720e 79c52000 79c5ef8c 00000004 00000000 79c55800
> [ 50.729532] Call Trace:
> [ 50.729730] [<798bd9f9>] dump_stack+0x16/0x18
> [ 50.730072] [<79077a70>] lockdep_rcu_suspicious+0xf2/0xfa
> [ 50.730498] [<79010b65>] smp_trace_reschedule_interrupt+0x1c8/0x1d0
> [ 50.730979] [<798c720e>] trace_reschedule_interrupt+0x36/0x3c
> [ 50.731214] [<7901875f>] ? native_safe_halt+0x5/0x7
> [ 50.731214] [<790085cc>] default_idle+0xb1/0x1e2
> [ 50.731214] [<79008d05>] arch_cpu_idle+0xe/0x10
> [ 50.731214] [<79069ddf>] cpu_startup_entry+0x1e4/0x2c3
> [ 50.731214] [<798adb34>] rest_init+0x12c/0x132
> [ 50.731214] [<798ada08>] ? __read_lock_failed+0x14/0x14
> [ 50.731214] [<79d309e4>] start_kernel+0x38d/0x393
> [ 50.731214] [<79d30489>] ? repair_env_string+0x51/0x51
> [ 50.731214] [<79d302c3>] i386_start_kernel+0x79/0x7d
> [ 50.771947] OK
> [ 50.772099] Testing event reschedule_entry: OK
>
> Signed-off-by: Seiji Aguchi <[email protected]>
> ---
> arch/x86/kernel/smp.c | 2 ++
> 1 files changed, 2 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> index f4fe0b8..b959056 100644
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -268,9 +268,11 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
> void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> {
> ack_APIC_irq();
> + rcu_irq_enter();
> trace_reschedule_entry(RESCHEDULE_VECTOR);
> __smp_reschedule_interrupt();
> trace_reschedule_exit(RESCHEDULE_VECTOR);
> + rcu_irq_exit();
> /*
> * KVM uses this interrupt to force a cpu out of guest mode
> */
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
[ Added Peter Z. and Paul ]
On Mon, 2013-06-24 at 16:21 -0400, Seiji Aguchi wrote:
> Reschedule vector tracepoints may be called in cpu idle state.
> This causes lockdep check warning below.
> So, add rcu_irq_enter/exit() to smp_trace_reschedule_interrupt().
>
> [ 50.720557] Testing event reschedule_exit:
> [ 50.721349]
> [ 50.721502] ===============================
> [ 50.721835] [ INFO: suspicious RCU usage. ]
> [ 50.722169] 3.10.0-rc6-00004-gcf910e8 #190 Not tainted
> [ 50.722582] -------------------------------
> [ 50.722915] /c/kernel-tests/src/linux/arch/x86/include/asm/trace/irq_vectors.h:50 suspicious rcu_dereference_check() usage!
> [ 50.723770]
> [ 50.723770] other info that might help us debug this:
> [ 50.723770]
> [ 50.724385]
> [ 50.724385] RCU used illegally from idle CPU!
> [ 50.724385] rcu_scheduler_active = 1, debug_locks = 0
> [ 50.725232] RCU used illegally from extended quiescent state!
> [ 50.725690] no locks held by swapper/0/0.
> [ 50.726010]
> [ 50.726010] stack backtrace:
> [ 50.726359] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.0-rc6-00004-gcf910e8 #190
> [ 50.726965] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
>
> [ 50.727417] 00000001 00000001 79c53f04 798bd9f9 79c53f2c 79077a70 79b412c6 79b41fd1
> [ 50.728159] 00000001 00000000 79c5ef8c 87147c58 00000000 79c55800 79c53f38 79010b65
> [ 50.728849] 79c52000 79c53f7c 798c720e 79c52000 79c5ef8c 00000004 00000000 79c55800
> [ 50.729532] Call Trace:
> [ 50.729730] [<798bd9f9>] dump_stack+0x16/0x18
> [ 50.730072] [<79077a70>] lockdep_rcu_suspicious+0xf2/0xfa
> [ 50.730498] [<79010b65>] smp_trace_reschedule_interrupt+0x1c8/0x1d0
> [ 50.730979] [<798c720e>] trace_reschedule_interrupt+0x36/0x3c
> [ 50.731214] [<7901875f>] ? native_safe_halt+0x5/0x7
> [ 50.731214] [<790085cc>] default_idle+0xb1/0x1e2
> [ 50.731214] [<79008d05>] arch_cpu_idle+0xe/0x10
> [ 50.731214] [<79069ddf>] cpu_startup_entry+0x1e4/0x2c3
> [ 50.731214] [<798adb34>] rest_init+0x12c/0x132
> [ 50.731214] [<798ada08>] ? __read_lock_failed+0x14/0x14
> [ 50.731214] [<79d309e4>] start_kernel+0x38d/0x393
> [ 50.731214] [<79d30489>] ? repair_env_string+0x51/0x51
> [ 50.731214] [<79d302c3>] i386_start_kernel+0x79/0x7d
> [ 50.771947] OK
> [ 50.772099] Testing event reschedule_entry: OK
>
> Signed-off-by: Seiji Aguchi <[email protected]>
> ---
> arch/x86/kernel/smp.c | 2 ++
> 1 files changed, 2 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> index f4fe0b8..b959056 100644
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -268,9 +268,11 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
> void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> {
> ack_APIC_irq();
> + rcu_irq_enter();
> trace_reschedule_entry(RESCHEDULE_VECTOR);
> __smp_reschedule_interrupt();
> trace_reschedule_exit(RESCHEDULE_VECTOR);
> + rcu_irq_exit();
The question is, should we add normal irq_enter/exit here? As that
should be OK to nest. There's a comment in scheduler_ipi():
/*
* Not all reschedule IPI handlers call irq_enter/irq_exit, since
* traditionally all their work was done from the interrupt return
* path. Now that we actually do some work, we need to make sure
* we do call them.
*
* Some archs already do call them, luckily irq_enter/exit nest
* properly.
*
* Arguably we should visit all archs and update all handlers,
* however a fair share of IPIs are still resched only so this would
* somewhat pessimize the simple resched case.
*/
just before it calls irq_enter(). Seems that not calling irq_enter() for
the reschedule ipi interrupt is more of a legacy thing. It also states
that its OK for an arch to call irq_enter() before calling this as it
can nest. I wonder if we should invest time in fixing all archs and
remove this irq_enter? But that's out of scope for this change.
Either way, the tracepoint requires rcu but for accuracy it also
requires irq_enter() (tracepoints record the irq context), thus, the
tracepoint interrupt handler should be calling irq_enter() and not
rcu_irq_enter() (irq_enter() calls rcu_irq_enter())
-- Steve
> /*
> * KVM uses this interrupt to force a cpu out of guest mode
> */
> > diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> > index f4fe0b8..b959056 100644
> > --- a/arch/x86/kernel/smp.c
> > +++ b/arch/x86/kernel/smp.c
> > @@ -268,9 +268,11 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
> > void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> > {
> > ack_APIC_irq();
> > + rcu_irq_enter();
> > trace_reschedule_entry(RESCHEDULE_VECTOR);
> > __smp_reschedule_interrupt();
> > trace_reschedule_exit(RESCHEDULE_VECTOR);
> > + rcu_irq_exit();
>
> The question is, should we add normal irq_enter/exit here? As that
> should be OK to nest. There's a comment in scheduler_ipi():
> Either way, the tracepoint requires rcu but for accuracy it also
> requires irq_enter() (tracepoints record the irq context), thus, the
> tracepoint interrupt handler should be calling irq_enter() and not
> rcu_irq_enter() (irq_enter() calls rcu_irq_enter())
OK.
So, ack_APIC_irq() and irq_enter() are needed.
It can be shared with smp_call_function() and smp_call_function_single() like this.
I will update the patch.
arch/x86/kernel/smp.c | 23 ++++++++++++-----------
1 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index f4fe0b8..732b618 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -265,23 +265,24 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
*/
}
-void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+static inline void smp_entering_irq(void)
{
ack_APIC_irq();
+ irq_enter();
+}
+
+void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+{
+ smp_entering_irq();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
+ exiting_irq();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
}
-static inline void call_function_entering_irq(void)
-{
- ack_APIC_irq();
- irq_enter();
-}
-
static inline void __smp_call_function_interrupt(void)
{
generic_smp_call_function_interrupt();
@@ -290,14 +291,14 @@ static inline void __smp_call_function_interrupt(void)
void smp_call_function_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
__smp_call_function_interrupt();
exiting_irq();
}
void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@@ -312,14 +313,14 @@ static inline void __smp_call_function_single_interrupt(void)
void smp_call_function_single_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
__smp_call_function_single_interrupt();
exiting_irq();
}
void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
--
1.7.1
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On Fri, Jun 28, 2013 at 10:21:43AM -0400, Steven Rostedt wrote:
> [ Added Peter Z. and Paul ]
> > void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> > {
> > ack_APIC_irq();
> > + rcu_irq_enter();
> > trace_reschedule_entry(RESCHEDULE_VECTOR);
> > __smp_reschedule_interrupt();
> > trace_reschedule_exit(RESCHEDULE_VECTOR);
> > + rcu_irq_exit();
>
> The question is, should we add normal irq_enter/exit here? As that
> should be OK to nest. There's a comment in scheduler_ipi():
>
> /*
> * Not all reschedule IPI handlers call irq_enter/irq_exit, since
> * traditionally all their work was done from the interrupt return
> * path. Now that we actually do some work, we need to make sure
> * we do call them.
> *
> * Some archs already do call them, luckily irq_enter/exit nest
> * properly.
> *
> * Arguably we should visit all archs and update all handlers,
> * however a fair share of IPIs are still resched only so this would
> * somewhat pessimize the simple resched case.
> */
>
> just before it calls irq_enter(). Seems that not calling irq_enter() for
> the reschedule ipi interrupt is more of a legacy thing. It also states
> that its OK for an arch to call irq_enter() before calling this as it
> can nest. I wonder if we should invest time in fixing all archs and
> remove this irq_enter? But that's out of scope for this change.
That comment also states why I never did the arch sweep; doing
irq_enter()/irq_exit() for the pure empty reschedule interrupt makes it
more expensive.
Back when I introduced scheduler_ipi() I measured the amount of pure
resched interrupts (no schedule_ipi() content, pure interrupt return
path work) vs actually doing something in schedule_ipi() and found a
significant number of interrupts were 'pure'.
Things might have changed; but you'd better remeasure if you want to go
sweep the arch tree.
On Fri, 2013-06-28 at 15:13 +0000, Seiji Aguchi wrote:
> OK.
> So, ack_APIC_irq() and irq_enter() are needed.
> It can be shared with smp_call_function() and smp_call_function_single() like this.
> I will update the patch.
>
> arch/x86/kernel/smp.c | 23 ++++++++++++-----------
> 1 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
> index f4fe0b8..732b618 100644
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -265,23 +265,24 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
> */
> }
>
> -void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> +static inline void smp_entering_irq(void)
> {
> ack_APIC_irq();
> + irq_enter();
> +}
> +
> +void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> +{
Sure, but we should probably add a comment here to why
smp_trace_reschedule_interrupt() is different than
smp_reschedule_interrupt().
/*
* Need to call irq_enter() before calling the trace point.
* __smp_reschedule_interrupt() calls irq_enter/exit() too (in
* scheduler_ipi(). This is OK, since those functions are allowed
* to nest.
*/
-- Steve
> + smp_entering_irq();
> trace_reschedule_entry(RESCHEDULE_VECTOR);
> __smp_reschedule_interrupt();
> trace_reschedule_exit(RESCHEDULE_VECTOR);
> + exiting_irq();
> /*
> * KVM uses this interrupt to force a cpu out of guest mode
> */
> }
>
> -static inline void call_function_entering_irq(void)
> -{
> - ack_APIC_irq();
> - irq_enter();
> -}
> -
> static inline void __smp_call_function_interrupt(void)
> {
> generic_smp_call_function_interrupt();
> @@ -290,14 +291,14 @@ static inline void __smp_call_function_interrupt(void)
>
> void smp_call_function_interrupt(struct pt_regs *regs)
> {
> - call_function_entering_irq();
> + smp_entering_irq();
> __smp_call_function_interrupt();
> exiting_irq();
> }
>
> void smp_trace_call_function_interrupt(struct pt_regs *regs)
> {
> - call_function_entering_irq();
> + smp_entering_irq();
> trace_call_function_entry(CALL_FUNCTION_VECTOR);
> __smp_call_function_interrupt();
> trace_call_function_exit(CALL_FUNCTION_VECTOR);
> @@ -312,14 +313,14 @@ static inline void __smp_call_function_single_interrupt(void)
>
> void smp_call_function_single_interrupt(struct pt_regs *regs)
> {
> - call_function_entering_irq();
> + smp_entering_irq();
> __smp_call_function_single_interrupt();
> exiting_irq();
> }
>
> void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
> {
> - call_function_entering_irq();
> + smp_entering_irq();
> trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
> __smp_call_function_single_interrupt();
> trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
On Fri, 2013-06-28 at 17:19 +0200, Peter Zijlstra wrote:
> Things might have changed; but you'd better remeasure if you want to go
> sweep the arch tree.
Fair enough.
-- Steve
> > +void smp_trace_reschedule_interrupt(struct pt_regs *regs)
> > +{
>
> Sure, but we should probably add a comment here to why
> smp_trace_reschedule_interrupt() is different than
> smp_reschedule_interrupt().
>
> /*
> * Need to call irq_enter() before calling the trace point.
> * __smp_reschedule_interrupt() calls irq_enter/exit() too (in
> * scheduler_ipi(). This is OK, since those functions are allowed
> * to nest.
> */
>
> -- Steve
I will add the comment.
Thanks.
Seiji
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?