Hello!
This series contains torture-test and torture-test-scripting updates
for v6.5:
1. Add long_hold to adjust lock-hold delays.
2. Correct name of use_softirq module parameter.
3. rcu/rcuscale: Move rcu_scale_*() after kfree_scale_cleanup(),
courtesy of Qiuxu Zhuo.
4. rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading
rcuscale, courtesy of Qiuxu Zhuo.
5. doc/rcutorture: Add description of rcutorture.stall_cpu_block,
courtesy of Zqiang.
6. Remove duplicated argument -enable-kvm for ppc64, courtesy of
Zhouyi Zhou.
Thanx, Paul
------------------------------------------------------------------------
b/Documentation/admin-guide/kernel-parameters.txt | 12
b/kernel/locking/locktorture.c | 51 +-
b/kernel/rcu/rcuscale.c | 194 +++++-----
b/tools/testing/selftests/rcutorture/bin/functions.sh | 2
b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot | 2
b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot | 2
kernel/rcu/rcuscale.c | 5
7 files changed, 137 insertions(+), 131 deletions(-)
This commit adds a long_hold module parameter to allow testing diagnostics
for excessive lock-hold times. Also adjust torture_param() invocations
for longer line length while in the area.
Signed-off-by: Paul E. McKenney <[email protected]>
---
kernel/locking/locktorture.c | 51 ++++++++++++++++--------------------
1 file changed, 22 insertions(+), 29 deletions(-)
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 153ddc4c47ef..949d3deae506 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -33,24 +33,19 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <[email protected]>");
-torture_param(int, nwriters_stress, -1,
- "Number of write-locking stress-test threads");
-torture_param(int, nreaders_stress, -1,
- "Number of read-locking stress-test threads");
+torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads");
+torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads");
+torture_param(int, long_hold, 100, "Do occasional long hold of lock (ms), 0=disable");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
-torture_param(int, onoff_interval, 0,
- "Time between CPU hotplugs (s), 0=disable");
-torture_param(int, shuffle_interval, 3,
- "Number of jiffies between shuffles, 0=disable");
+torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable");
+torture_param(int, shuffle_interval, 3, "Number of jiffies between shuffles, 0=disable");
torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
-torture_param(int, stat_interval, 60,
- "Number of seconds between stats printk()s");
+torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(int, rt_boost, 2,
- "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
+ "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
-torture_param(int, verbose, 1,
- "Enable verbose debugging printk()s");
+torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
/* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
#define MAX_NESTED_LOCKS 8
@@ -120,7 +115,7 @@ static int torture_lock_busted_write_lock(int tid __maybe_unused)
static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
{
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
@@ -198,16 +193,18 @@ __acquires(torture_spinlock)
static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
+ unsigned long j;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
*/
- if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+ if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) {
+ j = jiffies;
mdelay(longdelay_ms);
- if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+ pr_alert("%s: delay = %lu jiffies.\n", __func__, jiffies - j);
+ }
+ if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 200 * shortdelay_us)))
udelay(shortdelay_us);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
@@ -322,7 +319,7 @@ __acquires(torture_rwlock)
static void torture_rwlock_write_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a short delay mostly to emulate likely code, and
* we want a long delay occasionally to force massive contention.
@@ -455,14 +452,12 @@ __acquires(torture_mutex)
static void torture_mutex_delay(struct torture_random_state *trsp)
{
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 5);
- else
- mdelay(longdelay_ms / 5);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
}
@@ -630,7 +625,7 @@ __acquires(torture_rtmutex)
static void torture_rtmutex_delay(struct torture_random_state *trsp)
{
const unsigned long shortdelay_us = 2;
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/*
* We want a short delay mostly to emulate likely code, and
@@ -640,7 +635,7 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp)
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms);
if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * 2 * shortdelay_us)))
+ (cxt.nrealwriters_stress * 200 * shortdelay_us)))
udelay(shortdelay_us);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
@@ -695,14 +690,12 @@ __acquires(torture_rwsem)
static void torture_rwsem_write_delay(struct torture_random_state *trsp)
{
- const unsigned long longdelay_ms = 100;
+ const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX;
/* We want a long delay occasionally to force massive contention. */
if (!(torture_random(trsp) %
(cxt.nrealwriters_stress * 2000 * longdelay_ms)))
mdelay(longdelay_ms * 10);
- else
- mdelay(longdelay_ms / 10);
if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
torture_preempt_schedule(); /* Allow test to be preempted. */
}
@@ -848,8 +841,8 @@ static int lock_torture_writer(void *arg)
lwsp->n_lock_acquired++;
}
- cxt.cur_ops->write_delay(&rand);
if (!skip_main_lock) {
+ cxt.cur_ops->write_delay(&rand);
lock_is_write_held = false;
WRITE_ONCE(last_lock_release, jiffies);
cxt.cur_ops->writeunlock(tid);
--
2.40.1
From: Zqiang <[email protected]>
If you build a kernel with CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y,
then run the rcutorture tests specifying stalls as follows:
runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4" \
bootparams="console=ttyS0 rcutorture.stall_cpu=30 \
rcutorture.stall_no_softlockup=1 rcutorture.stall_cpu_block=1" -d
The tests will produce the following splat:
[ 10.841071] rcu-torture: rcu_torture_stall begin CPU stall
[ 10.841073] rcu_torture_stall start on CPU 3.
[ 10.841077] BUG: scheduling while atomic: rcu_torture_sta/66/0x0000000
....
[ 10.841108] Call Trace:
[ 10.841110] <TASK>
[ 10.841112] dump_stack_lvl+0x64/0xb0
[ 10.841118] dump_stack+0x10/0x20
[ 10.841121] __schedule_bug+0x8b/0xb0
[ 10.841126] __schedule+0x2172/0x2940
[ 10.841157] schedule+0x9b/0x150
[ 10.841160] schedule_timeout+0x2e8/0x4f0
[ 10.841192] schedule_timeout_uninterruptible+0x47/0x50
[ 10.841195] rcu_torture_stall+0x2e8/0x300
[ 10.841199] kthread+0x175/0x1a0
[ 10.841206] ret_from_fork+0x2c/0x50
This is because the rcutorture.stall_cpu_block=1 module parameter causes
rcu_torture_stall() to invoke schedule_timeout_uninterruptible() within
an RCU read-side critical section. This in turn results in a quiescent
state (which prevents the stall) and a sleep in an atomic context (which
produces the above splat).
Although this code is operating as designed, the design has proven to
be counterintuitive to many. This commit therefore updates the description
in kernel-parameters.txt accordingly.
Signed-off-by: Zqiang <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9e5bab29685f..eaffe0f8771d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5087,8 +5087,16 @@
rcutorture.stall_cpu_block= [KNL]
Sleep while stalling if set. This will result
- in warnings from preemptible RCU in addition
- to any other stall-related activity.
+ in warnings from preemptible RCU in addition to
+ any other stall-related activity. Note that
+ in kernels built with CONFIG_PREEMPTION=n and
+ CONFIG_PREEMPT_COUNT=y, this parameter will
+ cause the CPU to pass through a quiescent state.
+ Any such quiescent states will suppress RCU CPU
+ stall warnings, but the time-based sleep will
+ also result in scheduling-while-atomic splats.
+ Which might or might not be what you want.
+
rcutorture.stall_cpu_holdoff= [KNL]
Time to wait (s) after boot before inducing stall.
--
2.40.1
From: Zhouyi Zhou <[email protected]>
The qemu argument -enable-kvm is duplicated because the qemu_args bash
variable in kvm-test-1-run.sh has already provides it. This commit
therefore removes the ppc64-specific copy in functions.sh.
Signed-off-by: Zhouyi Zhou <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
---
tools/testing/selftests/rcutorture/bin/functions.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index b52d5069563c..48b9147e8c91 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -250,7 +250,7 @@ identify_qemu_args () {
echo -machine virt,gic-version=host -cpu host
;;
qemu-system-ppc64)
- echo -enable-kvm -M pseries -nodefaults
+ echo -M pseries -nodefaults
echo -device spapr-vscsi
if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
then
--
2.40.1
On Wed, May 10, 2023 at 10:13 AM Paul E. McKenney <[email protected]> wrote:
>
> From: Zhouyi Zhou <[email protected]>
>
> The qemu argument -enable-kvm is duplicated because the qemu_args bash
> variable in kvm-test-1-run.sh has already provides it. This commit
drop the has.
- Joel
> therefore removes the ppc64-specific copy in functions.sh.
>
> Signed-off-by: Zhouyi Zhou <[email protected]>
> Signed-off-by: Paul E. McKenney <[email protected]>
> ---
> tools/testing/selftests/rcutorture/bin/functions.sh | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
> index b52d5069563c..48b9147e8c91 100644
> --- a/tools/testing/selftests/rcutorture/bin/functions.sh
> +++ b/tools/testing/selftests/rcutorture/bin/functions.sh
> @@ -250,7 +250,7 @@ identify_qemu_args () {
> echo -machine virt,gic-version=host -cpu host
> ;;
> qemu-system-ppc64)
> - echo -enable-kvm -M pseries -nodefaults
> + echo -M pseries -nodefaults
> echo -device spapr-vscsi
> if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
> then
> --
> 2.40.1
>
On Wed, May 10, 2023 at 10:12 AM Paul E. McKenney <[email protected]> wrote:
>
> From: Zqiang <[email protected]>
>
> If you build a kernel with CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y,
> then run the rcutorture tests specifying stalls as follows:
>
> runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4" \
> bootparams="console=ttyS0 rcutorture.stall_cpu=30 \
> rcutorture.stall_no_softlockup=1 rcutorture.stall_cpu_block=1" -d
>
> The tests will produce the following splat:
>
> [ 10.841071] rcu-torture: rcu_torture_stall begin CPU stall
> [ 10.841073] rcu_torture_stall start on CPU 3.
> [ 10.841077] BUG: scheduling while atomic: rcu_torture_sta/66/0x0000000
> ....
> [ 10.841108] Call Trace:
> [ 10.841110] <TASK>
> [ 10.841112] dump_stack_lvl+0x64/0xb0
> [ 10.841118] dump_stack+0x10/0x20
> [ 10.841121] __schedule_bug+0x8b/0xb0
> [ 10.841126] __schedule+0x2172/0x2940
> [ 10.841157] schedule+0x9b/0x150
> [ 10.841160] schedule_timeout+0x2e8/0x4f0
> [ 10.841192] schedule_timeout_uninterruptible+0x47/0x50
> [ 10.841195] rcu_torture_stall+0x2e8/0x300
> [ 10.841199] kthread+0x175/0x1a0
> [ 10.841206] ret_from_fork+0x2c/0x50
Another way to get rid of the warning would be to replace the
cur_ops->readlock() with rcu_read_lock(). Though perhaps that will not
test whether the particular RCU flavor under testing is capable of
causing a stall :-).
> rcutorture.stall_cpu_block= [KNL]
> Sleep while stalling if set. This will result
> - in warnings from preemptible RCU in addition
> - to any other stall-related activity.
> + in warnings from preemptible RCU in addition to
> + any other stall-related activity. Note that
> + in kernels built with CONFIG_PREEMPTION=n and
> + CONFIG_PREEMPT_COUNT=y, this parameter will
> + cause the CPU to pass through a quiescent state.
> + Any such quiescent states will suppress RCU CPU
> + stall warnings, but the time-based sleep will
> + also result in scheduling-while-atomic splats.
Could change last part to "but may also result in
scheduling-while-atomic splats as preemption might be disabled for
certain RCU flavors in order to cause the stall".
> + Which might or might not be what you want.
> +
Suggest drop this line ;-).
- Joel
> rcutorture.stall_cpu_holdoff= [KNL]
> Time to wait (s) after boot before inducing stall.
> --
> 2.40.1
>
On Wed, May 10, 2023 at 10:12 AM Paul E. McKenney <[email protected]> wrote:
>
> Hello!
>
> This series contains torture-test and torture-test-scripting updates
> for v6.5:
>
> 1. Add long_hold to adjust lock-hold delays.
>
> 2. Correct name of use_softirq module parameter.
>
> 3. rcu/rcuscale: Move rcu_scale_*() after kfree_scale_cleanup(),
> courtesy of Qiuxu Zhuo.
>
> 4. rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading
> rcuscale, courtesy of Qiuxu Zhuo.
>
> 5. doc/rcutorture: Add description of rcutorture.stall_cpu_block,
> courtesy of Zqiang.
>
> 6. Remove duplicated argument -enable-kvm for ppc64, courtesy of
> Zhouyi Zhou.
Other than the small nits I mentioned, this series LGTM. Feel free to add:
Reviewed-by: Joel Fernandes (Google) <[email protected]>
- Joel
>
> Thanx, Paul
>
> ------------------------------------------------------------------------
>
> b/Documentation/admin-guide/kernel-parameters.txt | 12
> b/kernel/locking/locktorture.c | 51 +-
> b/kernel/rcu/rcuscale.c | 194 +++++-----
> b/tools/testing/selftests/rcutorture/bin/functions.sh | 2
> b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot | 2
> b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot | 2
> kernel/rcu/rcuscale.c | 5
> 7 files changed, 137 insertions(+), 131 deletions(-)
On Thu, May 11, 2023 at 1:26 PM Joel Fernandes <[email protected]> wrote:
>
> On Wed, May 10, 2023 at 10:13 AM Paul E. McKenney <[email protected]> wrote:
> >
> > From: Zhouyi Zhou <[email protected]>
> >
> > The qemu argument -enable-kvm is duplicated because the qemu_args bash
> > variable in kvm-test-1-run.sh has already provides it. This commit
>
> drop the has.
Thank Joel for the fix ;-)
Should I resend the patch, or would Paul do me the favor instead ;-)
Thank you all
Zhouyi
>
> - Joel
>
> > therefore removes the ppc64-specific copy in functions.sh.
> >
> > Signed-off-by: Zhouyi Zhou <[email protected]>
> > Signed-off-by: Paul E. McKenney <[email protected]>
> > ---
> > tools/testing/selftests/rcutorture/bin/functions.sh | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
> > index b52d5069563c..48b9147e8c91 100644
> > --- a/tools/testing/selftests/rcutorture/bin/functions.sh
> > +++ b/tools/testing/selftests/rcutorture/bin/functions.sh
> > @@ -250,7 +250,7 @@ identify_qemu_args () {
> > echo -machine virt,gic-version=host -cpu host
> > ;;
> > qemu-system-ppc64)
> > - echo -enable-kvm -M pseries -nodefaults
> > + echo -M pseries -nodefaults
> > echo -device spapr-vscsi
> > if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
> > then
> > --
> > 2.40.1
> >
On Thu, May 11, 2023 at 02:18:57PM +0800, Zhouyi Zhou wrote:
> On Thu, May 11, 2023 at 1:26 PM Joel Fernandes <[email protected]> wrote:
> >
> > On Wed, May 10, 2023 at 10:13 AM Paul E. McKenney <[email protected]> wrote:
> > >
> > > From: Zhouyi Zhou <[email protected]>
> > >
> > > The qemu argument -enable-kvm is duplicated because the qemu_args bash
> > > variable in kvm-test-1-run.sh has already provides it. This commit
> >
> > drop the has.
Good eyes, and thank you!
> Thank Joel for the fix ;-)
> Should I resend the patch, or would Paul do me the favor instead ;-)
>
> Thank you all
I will do it on my next rebase. ;-)
Thanx, Paul
> Zhouyi
> >
> > - Joel
> >
> > > therefore removes the ppc64-specific copy in functions.sh.
> > >
> > > Signed-off-by: Zhouyi Zhou <[email protected]>
> > > Signed-off-by: Paul E. McKenney <[email protected]>
> > > ---
> > > tools/testing/selftests/rcutorture/bin/functions.sh | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
> > > index b52d5069563c..48b9147e8c91 100644
> > > --- a/tools/testing/selftests/rcutorture/bin/functions.sh
> > > +++ b/tools/testing/selftests/rcutorture/bin/functions.sh
> > > @@ -250,7 +250,7 @@ identify_qemu_args () {
> > > echo -machine virt,gic-version=host -cpu host
> > > ;;
> > > qemu-system-ppc64)
> > > - echo -enable-kvm -M pseries -nodefaults
> > > + echo -M pseries -nodefaults
> > > echo -device spapr-vscsi
> > > if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
> > > then
> > > --
> > > 2.40.1
> > >
On Wed, May 10, 2023 at 10:48:21PM -0700, Joel Fernandes wrote:
> On Wed, May 10, 2023 at 10:12 AM Paul E. McKenney <[email protected]> wrote:
> >
> > Hello!
> >
> > This series contains torture-test and torture-test-scripting updates
> > for v6.5:
> >
> > 1. Add long_hold to adjust lock-hold delays.
> >
> > 2. Correct name of use_softirq module parameter.
> >
> > 3. rcu/rcuscale: Move rcu_scale_*() after kfree_scale_cleanup(),
> > courtesy of Qiuxu Zhuo.
> >
> > 4. rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading
> > rcuscale, courtesy of Qiuxu Zhuo.
> >
> > 5. doc/rcutorture: Add description of rcutorture.stall_cpu_block,
> > courtesy of Zqiang.
> >
> > 6. Remove duplicated argument -enable-kvm for ppc64, courtesy of
> > Zhouyi Zhou.
>
> Other than the small nits I mentioned, this series LGTM. Feel free to add:
>
> Reviewed-by: Joel Fernandes (Google) <[email protected]>
I will do 1-4 and 6 right on my next rebase, and 5 once we come to
agreement.
Thanx, Paul
> - Joel
>
>
> >
> > Thanx, Paul
> >
> > ------------------------------------------------------------------------
> >
> > b/Documentation/admin-guide/kernel-parameters.txt | 12
> > b/kernel/locking/locktorture.c | 51 +-
> > b/kernel/rcu/rcuscale.c | 194 +++++-----
> > b/tools/testing/selftests/rcutorture/bin/functions.sh | 2
> > b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot | 2
> > b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot | 2
> > kernel/rcu/rcuscale.c | 5
> > 7 files changed, 137 insertions(+), 131 deletions(-)
On Wed, May 10, 2023 at 10:47:36PM -0700, Joel Fernandes wrote:
> On Wed, May 10, 2023 at 10:12 AM Paul E. McKenney <[email protected]> wrote:
> >
> > From: Zqiang <[email protected]>
> >
> > If you build a kernel with CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y,
> > then run the rcutorture tests specifying stalls as follows:
> >
> > runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4" \
> > bootparams="console=ttyS0 rcutorture.stall_cpu=30 \
> > rcutorture.stall_no_softlockup=1 rcutorture.stall_cpu_block=1" -d
> >
> > The tests will produce the following splat:
> >
> > [ 10.841071] rcu-torture: rcu_torture_stall begin CPU stall
> > [ 10.841073] rcu_torture_stall start on CPU 3.
> > [ 10.841077] BUG: scheduling while atomic: rcu_torture_sta/66/0x0000000
> > ....
> > [ 10.841108] Call Trace:
> > [ 10.841110] <TASK>
> > [ 10.841112] dump_stack_lvl+0x64/0xb0
> > [ 10.841118] dump_stack+0x10/0x20
> > [ 10.841121] __schedule_bug+0x8b/0xb0
> > [ 10.841126] __schedule+0x2172/0x2940
> > [ 10.841157] schedule+0x9b/0x150
> > [ 10.841160] schedule_timeout+0x2e8/0x4f0
> > [ 10.841192] schedule_timeout_uninterruptible+0x47/0x50
> > [ 10.841195] rcu_torture_stall+0x2e8/0x300
> > [ 10.841199] kthread+0x175/0x1a0
> > [ 10.841206] ret_from_fork+0x2c/0x50
>
> Another way to get rid of the warning would be to replace the
> cur_ops->readlock() with rcu_read_lock(). Though perhaps that will not
> test whether the particular RCU flavor under testing is capable of
> causing a stall :-).
Exactly!
> > rcutorture.stall_cpu_block= [KNL]
> > Sleep while stalling if set. This will result
> > - in warnings from preemptible RCU in addition
> > - to any other stall-related activity.
> > + in warnings from preemptible RCU in addition to
> > + any other stall-related activity. Note that
> > + in kernels built with CONFIG_PREEMPTION=n and
> > + CONFIG_PREEMPT_COUNT=y, this parameter will
> > + cause the CPU to pass through a quiescent state.
> > + Any such quiescent states will suppress RCU CPU
> > + stall warnings, but the time-based sleep will
> > + also result in scheduling-while-atomic splats.
>
> Could change last part to "but may also result in
> scheduling-while-atomic splats as preemption might be disabled for
> certain RCU flavors in order to cause the stall".
Is that needed given the earlier "in kernels built with
CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y"?
> > + Which might or might not be what you want.
> > +
>
> Suggest drop this line ;-).
OK, I will bite. ;-)
What is your concern with this line?
Thanx, Paul
> - Joel
>
> > rcutorture.stall_cpu_holdoff= [KNL]
> > Time to wait (s) after boot before inducing stall.
> > --
> > 2.40.1
> >
On Thu, May 11, 2023 at 11:11 AM Paul E. McKenney <[email protected]> wrote:
>
> On Wed, May 10, 2023 at 10:47:36PM -0700, Joel Fernandes wrote:
> > On Wed, May 10, 2023 at 10:12 AM Paul E. McKenney <[email protected]> wrote:
> > >
> > > From: Zqiang <[email protected]>
> > >
> > > If you build a kernel with CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y,
> > > then run the rcutorture tests specifying stalls as follows:
> > >
> > > runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4" \
> > > bootparams="console=ttyS0 rcutorture.stall_cpu=30 \
> > > rcutorture.stall_no_softlockup=1 rcutorture.stall_cpu_block=1" -d
> > >
> > > The tests will produce the following splat:
> > >
> > > [ 10.841071] rcu-torture: rcu_torture_stall begin CPU stall
> > > [ 10.841073] rcu_torture_stall start on CPU 3.
> > > [ 10.841077] BUG: scheduling while atomic: rcu_torture_sta/66/0x0000000
> > > ....
> > > [ 10.841108] Call Trace:
> > > [ 10.841110] <TASK>
> > > [ 10.841112] dump_stack_lvl+0x64/0xb0
> > > [ 10.841118] dump_stack+0x10/0x20
> > > [ 10.841121] __schedule_bug+0x8b/0xb0
> > > [ 10.841126] __schedule+0x2172/0x2940
> > > [ 10.841157] schedule+0x9b/0x150
> > > [ 10.841160] schedule_timeout+0x2e8/0x4f0
> > > [ 10.841192] schedule_timeout_uninterruptible+0x47/0x50
> > > [ 10.841195] rcu_torture_stall+0x2e8/0x300
> > > [ 10.841199] kthread+0x175/0x1a0
> > > [ 10.841206] ret_from_fork+0x2c/0x50
> >
> > Another way to get rid of the warning would be to replace the
> > cur_ops->readlock() with rcu_read_lock(). Though perhaps that will not
> > test whether the particular RCU flavor under testing is capable of
> > causing a stall :-).
>
> Exactly!
>
> > > rcutorture.stall_cpu_block= [KNL]
> > > Sleep while stalling if set. This will result
> > > - in warnings from preemptible RCU in addition
> > > - to any other stall-related activity.
> > > + in warnings from preemptible RCU in addition to
> > > + any other stall-related activity. Note that
> > > + in kernels built with CONFIG_PREEMPTION=n and
> > > + CONFIG_PREEMPT_COUNT=y, this parameter will
> > > + cause the CPU to pass through a quiescent state.
> > > + Any such quiescent states will suppress RCU CPU
> > > + stall warnings, but the time-based sleep will
> > > + also result in scheduling-while-atomic splats.
> >
> > Could change last part to "but may also result in
> > scheduling-while-atomic splats as preemption might be disabled for
> > certain RCU flavors in order to cause the stall".
>
> Is that needed given the earlier "in kernels built with
> CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y"?
Hmm, I guess is not clear to the reader without code reading about why
preempt got disabled. So I would add that last part I mentioned, but I
am Ok either way, it is just a suggestion.
>
> > > + Which might or might not be what you want.
> > > +
> >
> > Suggest drop this line ;-).
>
> OK, I will bite. ;-)
>
> What is your concern with this line?
It is not needed IMO.
thanks,
- Joel
> > > rcutorture.stall_cpu_holdoff= [KNL]
> > > Time to wait (s) after boot before inducing stall.
> > > --
> > > 2.40.1
> > >
On Thu, May 11, 2023 at 10:00:18PM -0700, Joel Fernandes wrote:
> On Thu, May 11, 2023 at 11:11 AM Paul E. McKenney <[email protected]> wrote:
> >
> > On Wed, May 10, 2023 at 10:47:36PM -0700, Joel Fernandes wrote:
> > > On Wed, May 10, 2023 at 10:12 AM Paul E. McKenney <[email protected]> wrote:
> > > >
> > > > From: Zqiang <[email protected]>
> > > >
> > > > If you build a kernel with CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y,
> > > > then run the rcutorture tests specifying stalls as follows:
> > > >
> > > > runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4" \
> > > > bootparams="console=ttyS0 rcutorture.stall_cpu=30 \
> > > > rcutorture.stall_no_softlockup=1 rcutorture.stall_cpu_block=1" -d
> > > >
> > > > The tests will produce the following splat:
> > > >
> > > > [ 10.841071] rcu-torture: rcu_torture_stall begin CPU stall
> > > > [ 10.841073] rcu_torture_stall start on CPU 3.
> > > > [ 10.841077] BUG: scheduling while atomic: rcu_torture_sta/66/0x0000000
> > > > ....
> > > > [ 10.841108] Call Trace:
> > > > [ 10.841110] <TASK>
> > > > [ 10.841112] dump_stack_lvl+0x64/0xb0
> > > > [ 10.841118] dump_stack+0x10/0x20
> > > > [ 10.841121] __schedule_bug+0x8b/0xb0
> > > > [ 10.841126] __schedule+0x2172/0x2940
> > > > [ 10.841157] schedule+0x9b/0x150
> > > > [ 10.841160] schedule_timeout+0x2e8/0x4f0
> > > > [ 10.841192] schedule_timeout_uninterruptible+0x47/0x50
> > > > [ 10.841195] rcu_torture_stall+0x2e8/0x300
> > > > [ 10.841199] kthread+0x175/0x1a0
> > > > [ 10.841206] ret_from_fork+0x2c/0x50
> > >
> > > Another way to get rid of the warning would be to replace the
> > > cur_ops->readlock() with rcu_read_lock(). Though perhaps that will not
> > > test whether the particular RCU flavor under testing is capable of
> > > causing a stall :-).
> >
> > Exactly!
> >
> > > > rcutorture.stall_cpu_block= [KNL]
> > > > Sleep while stalling if set. This will result
> > > > - in warnings from preemptible RCU in addition
> > > > - to any other stall-related activity.
> > > > + in warnings from preemptible RCU in addition to
> > > > + any other stall-related activity. Note that
> > > > + in kernels built with CONFIG_PREEMPTION=n and
> > > > + CONFIG_PREEMPT_COUNT=y, this parameter will
> > > > + cause the CPU to pass through a quiescent state.
> > > > + Any such quiescent states will suppress RCU CPU
> > > > + stall warnings, but the time-based sleep will
> > > > + also result in scheduling-while-atomic splats.
> > >
> > > Could change last part to "but may also result in
> > > scheduling-while-atomic splats as preemption might be disabled for
> > > certain RCU flavors in order to cause the stall".
> >
> > Is that needed given the earlier "in kernels built with
> > CONFIG_PREEMPTION=n and CONFIG_PREEMPT_COUNT=y"?
>
> Hmm, I guess is not clear to the reader without code reading about why
> preempt got disabled. So I would add that last part I mentioned, but I
> am Ok either way, it is just a suggestion.
I will figure something out to more tightly tie this to the previous
CONFIG_PREEMPTION=n.
> > > > + Which might or might not be what you want.
> > > > +
> > >
> > > Suggest drop this line ;-).
> >
> > OK, I will bite. ;-)
> >
> > What is your concern with this line?
>
> It is not needed IMO.
It actually is, otherwise the various testing services complain about
getting splats. I will upgrade it to something more explicit.
Thanx, Paul
> thanks,
>
> - Joel
>
>
> > > > rcutorture.stall_cpu_holdoff= [KNL]
> > > > Time to wait (s) after boot before inducing stall.
> > > > --
> > > > 2.40.1
> > > >