2010-08-09 05:13:21

by Jason Wessel

[permalink] [raw]
Subject: [RFC PATCH 0/2] RCU stall handler for kernel debugger

The purpose of this short series is to allow the kernel debugger to
reset the rcu stall timer when restarting the normal kernel execution
so as to prevent the rcu stall timer from issuing NMIs to print stack
traces that should be ignored because the kernel went away for a
while.

---
Jason Wessel (2):
debug_core: move all watch dog syncs to a single function
rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

include/linux/rcupdate.h | 8 ++++++++
kernel/debug/debug_core.c | 17 +++++++++++------
kernel/rcutree.c | 9 +++++++++
3 files changed, 28 insertions(+), 6 deletions(-)


2010-08-09 05:13:32

by Jason Wessel

[permalink] [raw]
Subject: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

When returning from the kernel debugger allow a reset of the rcu
jiffies_stall value to prevent the rcu stall detector from sending NMI
events which stack dumps on all the cpus in the system.

Signed-off-by: Jason Wessel <[email protected]>
CC: Dipankar Sarma <[email protected]>
CC: Paul E. McKenney <[email protected]>
CC: Ingo Molnar <[email protected]>
---
include/linux/rcupdate.h | 8 ++++++++
kernel/debug/debug_core.c | 2 ++
kernel/rcutree.c | 9 +++++++++
3 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a..abd3ab6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -599,4 +599,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
#define rcu_dereference_index_check(p, c) \
__rcu_dereference_index_check((p), (c))

+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+extern void rcu_cpu_stall_reset(void);
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+static inline void rcu_cpu_stall_reset(void)
+{
+}
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
#endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index e4d6819..1600e90 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -47,6 +47,7 @@
#include <linux/pid.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/rcupdate.h>

#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -474,6 +475,7 @@ static void dbg_touch_watchdogs(void)
{
touch_softlockup_watchdog_sync();
clocksource_touch_watchdog();
+ rcu_cpu_stall_reset();
}

static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d5bc439..209b755 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -532,6 +532,9 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)

if (rcu_cpu_stall_panicking)
return;
+ /* Gracefully handle a watch dog reset when jiffies_stall == 0 */
+ if (!rsp->jiffies_stall)
+ return;
delta = jiffies - rsp->jiffies_stall;
rnp = rdp->mynode;
if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
@@ -561,6 +564,12 @@ static void __init check_cpu_stall_init(void)
atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
}

+void rcu_cpu_stall_reset(void)
+{
+ rcu_sched_state.jiffies_stall = 0;
+ rcu_bh_state.jiffies_stall = 0;
+}
+
#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

static void record_gp_stall_check_time(struct rcu_state *rsp)
--
1.6.3.3

2010-08-09 05:13:58

by Jason Wessel

[permalink] [raw]
Subject: [RFC PATCH 1/2] debug_core: move all watch dog syncs to a single function

Move the various clock and watch dog syncs to a single function in
advance of adding another sync for the rcu stall detector.

Signed-off-by: Jason Wessel <[email protected]>
---
kernel/debug/debug_core.c | 15 +++++++++------
1 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3c2d497..e4d6819 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -470,6 +470,12 @@ static void dbg_cpu_switch(int cpu, int next_cpu)
kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER;
}

+static void dbg_touch_watchdogs(void)
+{
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+}
+
static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
{
unsigned long flags;
@@ -523,8 +529,7 @@ return_normal:
if (trace_on)
tracing_on();
atomic_dec(&cpu_in_kgdb[cpu]);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
+ dbg_touch_watchdogs();
local_irq_restore(flags);
return 0;
}
@@ -541,8 +546,7 @@ return_normal:
(kgdb_info[cpu].task &&
kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
atomic_set(&kgdb_active, -1);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
+ dbg_touch_watchdogs();
local_irq_restore(flags);

goto acquirelock;
@@ -659,8 +663,7 @@ kgdb_restore:
tracing_on();
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
- touch_softlockup_watchdog_sync();
- clocksource_touch_watchdog();
+ dbg_touch_watchdogs();
local_irq_restore(flags);

return kgdb_info[cpu].ret_state;
--
1.6.3.3

2010-08-09 17:44:19

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On Mon, Aug 09, 2010 at 12:12:12AM -0500, Jason Wessel wrote:
> When returning from the kernel debugger allow a reset of the rcu
> jiffies_stall value to prevent the rcu stall detector from sending NMI
> events which stack dumps on all the cpus in the system.

Thank you for forwarding this!

A couple of questions below.

> Signed-off-by: Jason Wessel <[email protected]>
> CC: Dipankar Sarma <[email protected]>
> CC: Paul E. McKenney <[email protected]>
> CC: Ingo Molnar <[email protected]>
> ---
> include/linux/rcupdate.h | 8 ++++++++
> kernel/debug/debug_core.c | 2 ++
> kernel/rcutree.c | 9 +++++++++
> 3 files changed, 19 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 9fbc54a..abd3ab6 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -599,4 +599,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
> #define rcu_dereference_index_check(p, c) \
> __rcu_dereference_index_check((p), (c))
>
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +extern void rcu_cpu_stall_reset(void);
> +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +static inline void rcu_cpu_stall_reset(void)
> +{
> +}
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
> #endif /* __LINUX_RCUPDATE_H */
> diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
> index e4d6819..1600e90 100644
> --- a/kernel/debug/debug_core.c
> +++ b/kernel/debug/debug_core.c
> @@ -47,6 +47,7 @@
> #include <linux/pid.h>
> #include <linux/smp.h>
> #include <linux/mm.h>
> +#include <linux/rcupdate.h>
>
> #include <asm/cacheflush.h>
> #include <asm/byteorder.h>
> @@ -474,6 +475,7 @@ static void dbg_touch_watchdogs(void)
> {
> touch_softlockup_watchdog_sync();
> clocksource_touch_watchdog();
> + rcu_cpu_stall_reset();
> }
>
> static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index d5bc439..209b755 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -532,6 +532,9 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
>
> if (rcu_cpu_stall_panicking)
> return;
> + /* Gracefully handle a watch dog reset when jiffies_stall == 0 */
> + if (!rsp->jiffies_stall)
> + return;
> delta = jiffies - rsp->jiffies_stall;
> rnp = rdp->mynode;
> if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
> @@ -561,6 +564,12 @@ static void __init check_cpu_stall_init(void)
> atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
> }
>
> +void rcu_cpu_stall_reset(void)
> +{
> + rcu_sched_state.jiffies_stall = 0;
> + rcu_bh_state.jiffies_stall = 0;
> +}
> +

OK, so you are suppressing RCU CPU stall warnings for rcu_sched and
rcu_bh, but not for preemptible RCU. I believe that you want all of
them covered.

I have a number of recent patches that allow RCU CPU stall warnings to
be suppressed, one of which allows them to be suppressed using sysfs.
Would that work for you, or do you need an in-kernel interface?

If you do need an in-kernel interface, I could export (and probably
rename) rcu_panic(), which is a static in 2.6.35. This assumes that you
never want to re-enable RCU CPU stall warnings once you suppress them,
which is what your patch appears to do.

So, if I export a suppress_rcu_cpu_stall() function that permanently
disabled RCU CPU stall warnings, would that work for you? (They could
be manually re-enabled via sysfs.)

> #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>
> static void record_gp_stall_check_time(struct rcu_state *rsp)
> --
> 1.6.3.3
>

2010-08-09 18:27:04

by Jason Wessel

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On 08/09/2010 12:43 PM, Paul E. McKenney wrote:
> On Mon, Aug 09, 2010 at 12:12:12AM -0500, Jason Wessel wrote:
>
>> +void rcu_cpu_stall_reset(void)
>> +{
>> + rcu_sched_state.jiffies_stall = 0;
>> + rcu_bh_state.jiffies_stall = 0;
>> +}
>> +
>
> OK, so you are suppressing RCU CPU stall warnings for rcu_sched and
> rcu_bh, but not for preemptible RCU. I believe that you want all of
> them covered.
>

What is the state variable for the preemptible RCU I had not hit a
warning in my testing so I must needs some more test cases. :-)

> I have a number of recent patches that allow RCU CPU stall warnings to
> be suppressed, one of which allows them to be suppressed using sysfs.
> Would that work for you, or do you need an in-kernel interface?
>

We need an in-kernel interface for sure.

> If you do need an in-kernel interface, I could export (and probably
> rename) rcu_panic(), which is a static in 2.6.35. This assumes that you
> never want to re-enable RCU CPU stall warnings once you suppress them,
> which is what your patch appears to do.
>
> So, if I export a suppress_rcu_cpu_stall() function that permanently
> disabled RCU CPU stall warnings, would that work for you? (They could
> be manually re-enabled via sysfs.)
>

This is an RFC patch for a reason. The intent behind the interface is
to allow for one stall check cycle to go by after resuming kernel
execution and after that the normal rules are in play. Code flow
wise, it looked like the easiest thing to do was set the jiffies_stall
value to zero and then exit when the. The patch I created was
supposed to only ignore one stall cycle.

Here is the pseudo code.

/* before restarting kernel execution zero out the jiffies_stall value.

__rcu_pending() {

check_cpu_stall(); <- Here we check if the stall val is set to zero
and just return
/* do all normal work */

}

In the normal flow of things rc_start_gp() will ultimately call
record_gp_stall_check_time which updates the jiffies_stall back to non
zero and the stall accounting is back in play.


Jason.

2010-08-09 19:01:51

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On Mon, Aug 09, 2010 at 01:26:19PM -0500, Jason Wessel wrote:
> On 08/09/2010 12:43 PM, Paul E. McKenney wrote:
> > On Mon, Aug 09, 2010 at 12:12:12AM -0500, Jason Wessel wrote:
> >
> >> +void rcu_cpu_stall_reset(void)
> >> +{
> >> + rcu_sched_state.jiffies_stall = 0;
> >> + rcu_bh_state.jiffies_stall = 0;
> >> +}
> >> +
> >
> > OK, so you are suppressing RCU CPU stall warnings for rcu_sched and
> > rcu_bh, but not for preemptible RCU. I believe that you want all of
> > them covered.
>
> What is the state variable for the preemptible RCU I had not hit a
> warning in my testing so I must needs some more test cases. :-)

Well, you won't hit preemptible RCU unless you set TREE_PREEMPT_RCU. ;-)

> > I have a number of recent patches that allow RCU CPU stall warnings to
> > be suppressed, one of which allows them to be suppressed using sysfs.
> > Would that work for you, or do you need an in-kernel interface?
>
> We need an in-kernel interface for sure.

OK, good to know.

> > If you do need an in-kernel interface, I could export (and probably
> > rename) rcu_panic(), which is a static in 2.6.35. This assumes that you
> > never want to re-enable RCU CPU stall warnings once you suppress them,
> > which is what your patch appears to do.
> >
> > So, if I export a suppress_rcu_cpu_stall() function that permanently
> > disabled RCU CPU stall warnings, would that work for you? (They could
> > be manually re-enabled via sysfs.)
>
> This is an RFC patch for a reason. The intent behind the interface is
> to allow for one stall check cycle to go by after resuming kernel
> execution and after that the normal rules are in play. Code flow
> wise, it looked like the easiest thing to do was set the jiffies_stall
> value to zero and then exit when the. The patch I created was
> supposed to only ignore one stall cycle.
>
> Here is the pseudo code.
>
> /* before restarting kernel execution zero out the jiffies_stall value.
>
> __rcu_pending() {
>
> check_cpu_stall(); <- Here we check if the stall val is set to zero
> and just return
> /* do all normal work */
>
> }
>
> In the normal flow of things rc_start_gp() will ultimately call
> record_gp_stall_check_time which updates the jiffies_stall back to non
> zero and the stall accounting is back in play.

Ah, I get it now. Just out of curiosity, why not set the various
->jiffies_stall fields to jiffies + RCU_SECONDS_TILL_STALL_CHECK?
Is the value of jiffies likely to advance a lot after you call
rcu_cpu_stall_reset(), perhaps due to the system trying to catch up with
the passage of time?

Thanx, Paul

2010-08-11 04:02:16

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On Mon, Aug 09, 2010 at 12:01:42PM -0700, Paul E. McKenney wrote:
> On Mon, Aug 09, 2010 at 01:26:19PM -0500, Jason Wessel wrote:
> > On 08/09/2010 12:43 PM, Paul E. McKenney wrote:
> > > On Mon, Aug 09, 2010 at 12:12:12AM -0500, Jason Wessel wrote:
> > >
> > >> +void rcu_cpu_stall_reset(void)
> > >> +{
> > >> + rcu_sched_state.jiffies_stall = 0;
> > >> + rcu_bh_state.jiffies_stall = 0;
> > >> +}
> > >> +
> > >
> > > OK, so you are suppressing RCU CPU stall warnings for rcu_sched and
> > > rcu_bh, but not for preemptible RCU. I believe that you want all of
> > > them covered.
> >
> > What is the state variable for the preemptible RCU I had not hit a
> > warning in my testing so I must needs some more test cases. :-)
>
> Well, you won't hit preemptible RCU unless you set TREE_PREEMPT_RCU. ;-)
>
> > > I have a number of recent patches that allow RCU CPU stall warnings to
> > > be suppressed, one of which allows them to be suppressed using sysfs.
> > > Would that work for you, or do you need an in-kernel interface?
> >
> > We need an in-kernel interface for sure.
>
> OK, good to know.
>
> > > If you do need an in-kernel interface, I could export (and probably
> > > rename) rcu_panic(), which is a static in 2.6.35. This assumes that you
> > > never want to re-enable RCU CPU stall warnings once you suppress them,
> > > which is what your patch appears to do.
> > >
> > > So, if I export a suppress_rcu_cpu_stall() function that permanently
> > > disabled RCU CPU stall warnings, would that work for you? (They could
> > > be manually re-enabled via sysfs.)
> >
> > This is an RFC patch for a reason. The intent behind the interface is
> > to allow for one stall check cycle to go by after resuming kernel
> > execution and after that the normal rules are in play. Code flow
> > wise, it looked like the easiest thing to do was set the jiffies_stall
> > value to zero and then exit when the. The patch I created was
> > supposed to only ignore one stall cycle.
> >
> > Here is the pseudo code.
> >
> > /* before restarting kernel execution zero out the jiffies_stall value.
> >
> > __rcu_pending() {
> >
> > check_cpu_stall(); <- Here we check if the stall val is set to zero
> > and just return
> > /* do all normal work */
> >
> > }
> >
> > In the normal flow of things rc_start_gp() will ultimately call
> > record_gp_stall_check_time which updates the jiffies_stall back to non
> > zero and the stall accounting is back in play.
>
> Ah, I get it now. Just out of curiosity, why not set the various
> ->jiffies_stall fields to jiffies + RCU_SECONDS_TILL_STALL_CHECK?
> Is the value of jiffies likely to advance a lot after you call
> rcu_cpu_stall_reset(), perhaps due to the system trying to catch up with
> the passage of time?

Here is an initial patch. Untested, probably doesn't even compile.
It is against my -rcu tree:

git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git rcu/testing

Thoughts?

Thanx, Paul

commit 1a745a8467f285e17ded055699ecc557d1b1893e
Author: Paul E. McKenney <[email protected]>
Date: Tue Aug 10 14:28:53 2010 -0700

rcu: permit suppressing current grace period's CPU stall warnings

When using a kernel debugger, a long sojourn in the debugger can get
you lots of RCU CPU stall warnings once you resume. This might not be
helpful, especially if you are using the system console. This patch
therefore allows RCU CPU stall warnings to be suppressed, but only for
the duration of the current set of grace periods.

Requested-by: Jason Wessel <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4cc5eba..3fa1797 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -178,6 +178,10 @@ static inline void rcu_sched_force_quiescent_state(void)
{
}

+static inline void rcu_cpu_stall_reset(void)
+{
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC

extern int rcu_scheduler_active __read_mostly;
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c13b85d..0726809 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -36,6 +36,7 @@ extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
+extern void rcu_cpu_stall_reset(void);

#ifdef CONFIG_TREE_PREEMPT_RCU

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ff21411..42140a8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -565,6 +565,22 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
return NOTIFY_DONE;
}

+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+ rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_preempt_stall_reset();
+}
+
static struct notifier_block rcu_panic_block = {
.notifier_call = rcu_panic,
};
@@ -584,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
}

+void rcu_cpu_stall_reset(void)
+{
+}
+
static void __init check_cpu_stall_init(void)
{
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bb4d086..7abd439 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -372,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static void rcu_print_task_stall(struct rcu_node *rnp);
+static void rcu_preempt_stall_reset(void);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 63bb771..561410f 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -417,6 +417,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
}
}

+/*
+ * Suppress preemptible RCU's CPU stall warnings by pushing the
+ * time of the next stall-warning message comfortably far into the
+ * future.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+ rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+}
+
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

/*
@@ -867,6 +877,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
{
}

+/*
+ * Because preemptible RCU does not exist, there is no need to suppress
+ * its CPU stall warnings.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+}
+
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

/*

2010-08-11 14:07:33

by Jason Wessel

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On 08/10/2010 11:01 PM, Paul E. McKenney wrote:
> On Mon, Aug 09, 2010 at 12:01:42PM -0700, Paul E. McKenney wrote:
>
> Here is an initial patch. Untested, probably doesn't even compile.
> It is against my -rcu tree:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git rcu/testing
>
> Thoughts?
>


Paul, I tried this out and code wise it looks fine to me. I performed
a compile and regression test with the various kernel config options
to cover this new code.

Thanks for fixing the rcu_preempt case that my original patch missed.
Because you used the same name for the function it was an easy drop in
replacement. :-)

Signed-off-by: Jason Wessel <[email protected]>

Cheers,
Jason.

2010-08-11 16:32:29

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On Wed, Aug 11, 2010 at 09:07:05AM -0500, Jason Wessel wrote:
> On 08/10/2010 11:01 PM, Paul E. McKenney wrote:
> > On Mon, Aug 09, 2010 at 12:01:42PM -0700, Paul E. McKenney wrote:
> >
> > Here is an initial patch. Untested, probably doesn't even compile.
> > It is against my -rcu tree:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git rcu/testing
> >
> > Thoughts?
>
> Paul, I tried this out and code wise it looks fine to me. I performed
> a compile and regression test with the various kernel config options
> to cover this new code.

Very good, I have added your Tested-by.

> Thanks for fixing the rcu_preempt case that my original patch missed.
> Because you used the same name for the function it was an easy drop in
> replacement. :-)

No problem! As to keeping your name, don't worry, I won't let that
happen again. ;-)

> Signed-off-by: Jason Wessel <[email protected]>

Ah, yes -- I started from your patch, so I should have kept your
Signed-off-by. My apologies for failing to do so. I have added it
back in, please see below.

I have this commit queued for 2.6.37.

Thanx, Paul

commit e7732983e72865c30754b8be70482e6e8842a945
Author: Paul E. McKenney <[email protected]>
Date: Tue Aug 10 14:28:53 2010 -0700

rcu: permit suppressing current grace period's CPU stall warnings

When using a kernel debugger, a long sojourn in the debugger can get
you lots of RCU CPU stall warnings once you resume. This might not be
helpful, especially if you are using the system console. This patch
therefore allows RCU CPU stall warnings to be suppressed, but only for
the duration of the current set of grace periods.

This differs from Jason's original patch in that it adds support for
tiny RCU and preemptible RCU, and uses a slightly different method for
suppressing the RCU CPU stall warning messages.

Signed-off-by: Jason Wessel <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Tested-by: Jason Wessel <[email protected]>

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4cc5eba..3fa1797 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -178,6 +178,10 @@ static inline void rcu_sched_force_quiescent_state(void)
{
}

+static inline void rcu_cpu_stall_reset(void)
+{
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC

extern int rcu_scheduler_active __read_mostly;
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c13b85d..0726809 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -36,6 +36,7 @@ extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
+extern void rcu_cpu_stall_reset(void);

#ifdef CONFIG_TREE_PREEMPT_RCU

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ff21411..42140a8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -565,6 +565,22 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
return NOTIFY_DONE;
}

+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+ rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_preempt_stall_reset();
+}
+
static struct notifier_block rcu_panic_block = {
.notifier_call = rcu_panic,
};
@@ -584,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
}

+void rcu_cpu_stall_reset(void)
+{
+}
+
static void __init check_cpu_stall_init(void)
{
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bb4d086..7abd439 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -372,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static void rcu_print_task_stall(struct rcu_node *rnp);
+static void rcu_preempt_stall_reset(void);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 63bb771..561410f 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -417,6 +417,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
}
}

+/*
+ * Suppress preemptible RCU's CPU stall warnings by pushing the
+ * time of the next stall-warning message comfortably far into the
+ * future.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+ rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+}
+
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

/*
@@ -867,6 +877,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
{
}

+/*
+ * Because preemptible RCU does not exist, there is no need to suppress
+ * its CPU stall warnings.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+}
+
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

/*

2018-06-19 21:19:23

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] rcu,debug_core: allow the kernel debugger to reset the rcu stall timer

On Mon, Aug 09, 2010 at 12:12:12AM -0500, Jason Wessel wrote:
> When returning from the kernel debugger allow a reset of the rcu
> jiffies_stall value to prevent the rcu stall detector from sending NMI
> events which stack dumps on all the cpus in the system.

Not sure where the 2010 date came from, but it almost fooled me into
deleting your emails unread. ;-)

> Signed-off-by: Jason Wessel <[email protected]>
> CC: Dipankar Sarma <[email protected]>
> CC: Paul E. McKenney <[email protected]>
> CC: Ingo Molnar <[email protected]>
> ---
> include/linux/rcupdate.h | 8 ++++++++
> kernel/debug/debug_core.c | 2 ++
> kernel/rcutree.c | 9 +++++++++
> 3 files changed, 19 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 9fbc54a..abd3ab6 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -599,4 +599,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
> #define rcu_dereference_index_check(p, c) \
> __rcu_dereference_index_check((p), (c))
>
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +extern void rcu_cpu_stall_reset(void);
> +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +static inline void rcu_cpu_stall_reset(void)
> +{
> +}
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
> #endif /* __LINUX_RCUPDATE_H */
> diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
> index e4d6819..1600e90 100644
> --- a/kernel/debug/debug_core.c
> +++ b/kernel/debug/debug_core.c
> @@ -47,6 +47,7 @@
> #include <linux/pid.h>
> #include <linux/smp.h>
> #include <linux/mm.h>
> +#include <linux/rcupdate.h>
>
> #include <asm/cacheflush.h>
> #include <asm/byteorder.h>
> @@ -474,6 +475,7 @@ static void dbg_touch_watchdogs(void)
> {
> touch_softlockup_watchdog_sync();
> clocksource_touch_watchdog();
> + rcu_cpu_stall_reset();
> }
>
> static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index d5bc439..209b755 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -532,6 +532,9 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
>
> if (rcu_cpu_stall_panicking)
> return;
> + /* Gracefully handle a watch dog reset when jiffies_stall == 0 */
> + if (!rsp->jiffies_stall)
> + return;

Why not just use the existing rcu_cpu_stall_reset()? It sets the next
stall a long way into the future, like 2 billion jiffies on 32-bit
systems.

> delta = jiffies - rsp->jiffies_stall;
> rnp = rdp->mynode;
> if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
> @@ -561,6 +564,12 @@ static void __init check_cpu_stall_init(void)
> atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
> }
>
> +void rcu_cpu_stall_reset(void)
> +{
> + rcu_sched_state.jiffies_stall = 0;
> + rcu_bh_state.jiffies_stall = 0;

This should get you a compiler warning given the existing
rcu_cpu_stall_reset(). It also fails to do anything about
rcu_preempt_state on PREEMPT=y kernels.

What happens if you just remove the rcutree.c changes from your
series and test with the result?

Thanx, Paul

> +}
> +
> #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>
> static void record_gp_stall_check_time(struct rcu_state *rsp)
> --
> 1.6.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>


2018-06-20 09:58:09

by Daniel Thompson

[permalink] [raw]
Subject: Re: [Kgdb-bugreport] [RFC PATCH 1/2] debug_core: move all watch dog syncs to a single function

On Mon, Aug 09, 2010 at 12:12:11AM -0500, Jason Wessel wrote:
> Move the various clock and watch dog syncs to a single function in
> advance of adding another sync for the rcu stall detector.
>
> Signed-off-by: Jason Wessel <[email protected]>

Small but sensible tidy up IMHO (whether or not patch 2 is altered).

Reviewed-by: Daniel Thompson <[email protected]>


> ---
> kernel/debug/debug_core.c | 15 +++++++++------
> 1 files changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
> index 3c2d497..e4d6819 100644
> --- a/kernel/debug/debug_core.c
> +++ b/kernel/debug/debug_core.c
> @@ -470,6 +470,12 @@ static void dbg_cpu_switch(int cpu, int next_cpu)
> kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER;
> }
>
> +static void dbg_touch_watchdogs(void)
> +{
> + touch_softlockup_watchdog_sync();
> + clocksource_touch_watchdog();
> +}
> +
> static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
> {
> unsigned long flags;
> @@ -523,8 +529,7 @@ return_normal:
> if (trace_on)
> tracing_on();
> atomic_dec(&cpu_in_kgdb[cpu]);
> - touch_softlockup_watchdog_sync();
> - clocksource_touch_watchdog();
> + dbg_touch_watchdogs();
> local_irq_restore(flags);
> return 0;
> }
> @@ -541,8 +546,7 @@ return_normal:
> (kgdb_info[cpu].task &&
> kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
> atomic_set(&kgdb_active, -1);
> - touch_softlockup_watchdog_sync();
> - clocksource_touch_watchdog();
> + dbg_touch_watchdogs();
> local_irq_restore(flags);
>
> goto acquirelock;
> @@ -659,8 +663,7 @@ kgdb_restore:
> tracing_on();
> /* Free kgdb_active */
> atomic_set(&kgdb_active, -1);
> - touch_softlockup_watchdog_sync();
> - clocksource_touch_watchdog();
> + dbg_touch_watchdogs();
> local_irq_restore(flags);
>
> return kgdb_info[cpu].ret_state;
> --
> 1.6.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
>
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> _______________________________________________
> Kgdb-bugreport mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/kgdb-bugreport