Hello!
This series contains torture-test updates:
1. Add batch number to script printout to allow easier estimation
of test duration.
2. Dump stack when RCU's grace-period kthread stalls.
3. Set the scripting's default test-hang "grace period" to three
minutes, and allow this to be overridden.
4. Remove the obsolete CONFIG_RCU_USER_QS Kconfig option from
the rcutorture selftest documentation, courtesy of Yang Shi.
5. Print symbolic name for rcu_torture_writer_state, saving the
time otherwise required to manually translate from integer
to symbol.
6. Print symbolic name for ->gp_state, again saving the time
otherwise required to manually translate from integer to symbol.
7. Abbreviate console error dump, allowing the results of an
rcutorture script run to be observed with less back-scrolling.
8. Place console.log files correctly from the get-go, preventing
multiple qemu instances from outputting to the same file.
Thanx, Paul
------------------------------------------------------------------------
b/kernel/rcu/rcutorture.c | 24 +++++
b/kernel/rcu/tree.c | 22 ++++-
b/kernel/rcu/tree.h | 9 ++
b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 9 --
b/tools/testing/selftests/rcutorture/bin/kvm.sh | 22 +++--
b/tools/testing/selftests/rcutorture/bin/parse-console.sh | 41 ++++++++--
b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt | 1
b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 4
8 files changed, 104 insertions(+), 28 deletions(-)
Currently, the scripts print "----Start batch" at the beginning of each
batch, which does serve as a good visual delimiter between batches.
Unfortunately, if there are a lot of batches, it is hard to quickly
estimate test runtime from the output of "--dryrun sched". This commit
therefore adds a batch number, so that the beginning-of-batch output
looks like this "----Start batch 10" for the tenth batch.
Signed-off-by: Paul E. McKenney <[email protected]>
---
tools/testing/selftests/rcutorture/bin/kvm.sh | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index f6483609ebc2..013c48239a66 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -307,10 +307,10 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast)
+function dump(first, pastlast, batchnum)
{
- print "echo ----Start batch: `date`";
- print "echo ----Start batch: `date` >> " rd "/log";
+ print "echo ----Start batch " batchnum ": `date`";
+ print "echo ----Start batch " batchnum ": `date` >> " rd "/log";
jn=1
for (j = first; j < pastlast; j++) {
builddir=KVM "/b" jn
@@ -371,25 +371,28 @@ END {
njobs = i;
nc = ncpus;
first = 0;
+ batchnum = 1;
# Each pass through the following loop considers one test.
for (i = 0; i < njobs; i++) {
if (ncpus == 0) {
# Sequential test specified, each test its own batch.
- dump(i, i + 1);
+ dump(i, i + 1, batchnum);
first = i;
+ batchnum++;
} else if (nc < cpus[i] && i != 0) {
# Out of CPUs, dump out a batch.
- dump(first, i);
+ dump(first, i, batchnum);
first = i;
nc = ncpus;
+ batchnum++;
}
# Account for the CPUs needed by the current test.
nc -= cpus[i];
}
# Dump the last batch.
if (ncpus != 0)
- dump(first, i);
+ dump(first, i, batchnum);
}' >> $T/script
cat << ___EOF___ >> $T/script
--
2.5.2
This commit increases debug information in the case where the grace-period
kthread is being prevented from running by dumping that kthread's stack.
Signed-off-by: Paul E. McKenney <[email protected]>
---
kernel/rcu/tree.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 81aa1cdc6bc9..7b78c88e19a3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1196,12 +1196,15 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
- if (j - gpa > 2 * HZ)
+ if (j - gpa > 2 * HZ) {
pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
rsp->name, j - gpa,
rsp->gpnum, rsp->completed,
rsp->gp_flags, rsp->gp_state,
- rsp->gp_kthread ? rsp->gp_kthread->state : 0);
+ rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
+ if (rsp->gp_kthread)
+ sched_show_task(rsp->gp_kthread);
+ }
}
/*
--
2.5.2
The default test grace period of two minutes is insufficient in some
cases and excessive in others. This commit therefore increases the
default to three minutes, but also adds a --shutdown-grace parameter
to allow the default to be overridden.
Signed-off-by: Paul E. McKenney <[email protected]>
---
tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 4 +---
tools/testing/selftests/rcutorture/bin/kvm.sh | 7 +++++++
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5236e073919d..d39273dae464 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -38,8 +38,6 @@
#
# Authors: Paul E. McKenney <[email protected]>
-grace=120
-
T=/tmp/kvm-test-1-run.sh.$$
trap 'rm -rf $T' 0
touch $T
@@ -214,7 +212,7 @@ then
else
break
fi
- if test $kruntime -ge $((seconds + grace))
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 013c48239a66..4a431767f77a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -42,6 +42,7 @@ TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KMAKE_ARG=""
+TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
resdir=""
configs=""
@@ -149,6 +150,11 @@ do
resdir=$2
shift
;;
+ --shutdown-grace)
+ checkarg --shutdown-grace "(seconds)" "$#" "$2" '^[0-9]*$' '^error'
+ TORTURE_SHUTDOWN_GRACE=$2
+ shift
+ ;;
--torture)
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
TORTURE_SUITE=$2
@@ -266,6 +272,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
if ! test -e $resdir
then
--
2.5.2
From: Yang Shi <[email protected]>
Commit d1ec4c34c7a9 ("rcu: Drop RCU_USER_QS in favor of NO_HZ_FULL") has
removed RCU_USER_QS from Kconfig file, so remove it from some documents
to avoid any confusion.
Signed-off-by: Yang Shi <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
---
tools/testing/selftests/rcutorture/doc/TINY_RCU.txt | 1 -
tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 4 ----
2 files changed, 5 deletions(-)
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 9ef33a743b73..24396ae8355b 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -20,7 +20,6 @@ CONFIG_PROVE_RCU
CONFIG_NO_HZ_FULL_SYSIDLE
CONFIG_RCU_NOCB_CPU
-CONFIG_RCU_USER_QS
Meaningless for TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 657f3a035488..4e2b1893d40d 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -72,10 +72,6 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
Always used in KVM testing.
-CONFIG_RCU_USER_QS
-
- Redundant with CONFIG_NO_HZ_FULL.
-
CONFIG_PREEMPT_RCU
CONFIG_TREE_RCU
--
2.5.2
Currently, rcu_torture_writer_state is printed as an integer, which slows
debugging. This commit therefore prints a symbolic name in addition to
the integer.
Signed-off-by: Paul E. McKenney <[email protected]>
---
kernel/rcu/rcutorture.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d89328e260df..adbb194e2b5d 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -162,6 +162,27 @@ static int rcu_torture_writer_state;
#define RTWS_SYNC 7
#define RTWS_STUTTER 8
#define RTWS_STOPPING 9
+static char * const rcu_torture_writer_state_names[] = {
+ "RTWS_FIXED_DELAY",
+ "RTWS_DELAY",
+ "RTWS_REPLACE",
+ "RTWS_DEF_FREE",
+ "RTWS_EXP_SYNC",
+ "RTWS_COND_GET",
+ "RTWS_COND_SYNC",
+ "RTWS_SYNC",
+ "RTWS_STUTTER",
+ "RTWS_STOPPING",
+};
+
+char *rcu_torture_writer_state_getname(void)
+{
+ unsigned int i = READ_ONCE(rcu_torture_writer_state);
+
+ if (i >= ARRAY_SIZE(rcu_torture_writer_state_names))
+ return "???";
+ return rcu_torture_writer_state_names[i];
+}
#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
#define RCUTORTURE_RUNNABLE_INIT 1
@@ -1307,7 +1328,8 @@ rcu_torture_stats_print(void)
rcutorture_get_gp_data(cur_ops->ttype,
&flags, &gpnum, &completed);
- pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
+ pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
+ rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
gpnum, completed, flags);
show_rcu_gp_kthreads();
--
2.5.2
Currently, ->gp_state is printed as an integer, which slows debugging.
This commit therefore prints a symbolic name in addition to the integer.
Signed-off-by: Paul E. McKenney <[email protected]>
[ Updated to fix relational operator called out by Dan Carpenter. ]
---
kernel/rcu/tree.c | 15 +++++++++++++--
kernel/rcu/tree.h | 9 +++++++++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7b78c88e19a3..491c99a73996 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1187,6 +1187,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
}
/*
+ * Convert a ->gp_state value to a character string.
+ */
+static char *gp_state_getname(short gs)
+{
+ if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
+ return "???";
+ return gp_state_names[gs];
+}
+
+/*
* Complain about starvation of grace-period kthread.
*/
static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
@@ -1197,10 +1207,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
if (j - gpa > 2 * HZ) {
- pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
+ pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
rsp->name, j - gpa,
rsp->gpnum, rsp->completed,
- rsp->gp_flags, rsp->gp_state,
+ rsp->gp_flags,
+ gp_state_getname(rsp->gp_state), rsp->gp_state,
rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
if (rsp->gp_kthread)
sched_show_task(rsp->gp_kthread);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f32bebb6bc90..f0304988e3f8 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -544,6 +544,15 @@ struct rcu_state {
#define RCU_GP_DOING_FQS 4 /* Wait done for force-quiescent-state time. */
#define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
#define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
+static char * const gp_state_names[] = {
+ "RCU_GP_IDLE",
+ "RCU_GP_WAIT_GPS",
+ "RCU_GP_DONE_GPS",
+ "RCU_GP_WAIT_FQS",
+ "RCU_GP_DOING_FQS",
+ "RCU_GP_CLEANUP",
+ "RCU_GP_CLEANED",
+};
extern struct list_head rcu_struct_flavors;
--
2.5.2
Currently, the scripts print a list of warning/bug indicators from the
console.log file. This works well if there are only a few warnings or
bugs, but can be quite annoying if there is a large number. This commit
therefore prints a summary listing the number of each type of warning/bug
indicator, but only if there is at least one such indicator. The full
list is stored in the results directory at console.log.diags, which
makes it easier to find the warning/bugs in the full console.log.
Signed-off-by: Paul E. McKenney <[email protected]>
---
.../selftests/rcutorture/bin/parse-console.sh | 41 ++++++++++++++++++----
1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index d8f35cf116be..844787a0d7be 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,9 +24,6 @@
#
# Authors: Paul E. McKenney <[email protected]>
-T=/tmp/abat-chk-badness.sh.$$
-trap 'rm -f $T' 0
-
file="$1"
title="$2"
@@ -36,9 +33,41 @@ if grep -Pq '\x00' < $file
then
print_warning Console output contains nul bytes, old qemu still running?
fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T
-if test -s $T
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+if test -s $1.diags
then
print_warning Assertion failure in $file $title
- cat $T
+ # cat $1.diags
+ summary=""
+ n_badness=`grep -c Badness $1`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary Badness: $n_badness"
+ fi
+ n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+ if test "$n_warn" -ne 0
+ then
+ summary="$summary Warnings: $n_warn"
+ fi
+ n_bugs=`egrep -c 'BUG|Oops:' $1`
+ if test "$n_bugs" -ne 0
+ then
+ summary="$summary Bugs: $n_bugs"
+ fi
+ n_calltrace=`grep -c 'Call Trace:' $1`
+ if test "$n_calltrace" -ne 0
+ then
+ summary="$summary Call Traces: $n_calltrace"
+ fi
+ n_lockdep=`grep -c =========== $1`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary lockdep: $n_badness"
+ fi
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1`
+ if test "$n_stalls" -ne 0
+ then
+ summary="$summary Stalls: $n_stalls"
+ fi
+ print_warning Summary: $summary
fi
--
2.5.2
Currently, the console output files ("console.log") are placed in the
build directory initially, then copied to the results directory.
One problem with this is if a qemu refuses to die in a timely fashion
after a kernel hang, it will continue to write after the next qemu
starts up, resulting in confusing output from the old instance of
qemu. This commit prevents such confusion by placing the console.log
files into the results directory to begin with, so that a given instance
of qemu is always writing only to its own console.log file.
Signed-off-by: Paul E. McKenney <[email protected]>
---
tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index d39273dae464..0f80eefb0bfd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -150,7 +150,7 @@ fi
qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
# Generate architecture-specific and interaction-specific qemu arguments
-qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$builddir/console.log"`"
+qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
# Generate qemu -append arguments
qemu_append="`identify_qemu_append "$QEMU"`"
@@ -166,7 +166,7 @@ then
touch $resdir/buildonly
exit 0
fi
-echo "NOTE: $QEMU either did not run or was interactive" > $builddir/console.log
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
qemu_pid=$!
@@ -222,6 +222,5 @@ then
done
fi
-cp $builddir/console.log $resdir
parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title
--
2.5.2
On Fri, Dec 04, 2015 at 04:23:46PM -0800, Paul E. McKenney wrote:
> This commit increases debug information in the case where the grace-period
> kthread is being prevented from running by dumping that kthread's stack.
That's not everything this patch does; see below.
> Signed-off-by: Paul E. McKenney <[email protected]>
> ---
> kernel/rcu/tree.c | 7 +++++--
> 1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 81aa1cdc6bc9..7b78c88e19a3 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1196,12 +1196,15 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
>
> j = jiffies;
> gpa = READ_ONCE(rsp->gp_activity);
> - if (j - gpa > 2 * HZ)
> + if (j - gpa > 2 * HZ) {
> pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
> rsp->name, j - gpa,
> rsp->gpnum, rsp->completed,
> rsp->gp_flags, rsp->gp_state,
> - rsp->gp_kthread ? rsp->gp_kthread->state : 0);
> + rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
The commit message doesn't explain this change. Could you please 1)
document this change in the commit message, and 2) split it into a
separate patch unless it directly relates to the other change (which
doesn't seem like the case)?
> + if (rsp->gp_kthread)
> + sched_show_task(rsp->gp_kthread);
> + }
> }
>
> /*
> --
> 2.5.2
>
On Fri, Dec 04, 2015 at 04:23:49PM -0800, Paul E. McKenney wrote:
> Currently, rcu_torture_writer_state is printed as an integer, which slows
> debugging. This commit therefore prints a symbolic name in addition to
> the integer.
>
> Signed-off-by: Paul E. McKenney <[email protected]>
> ---
> kernel/rcu/rcutorture.c | 24 +++++++++++++++++++++++-
> 1 file changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> index d89328e260df..adbb194e2b5d 100644
> --- a/kernel/rcu/rcutorture.c
> +++ b/kernel/rcu/rcutorture.c
> @@ -162,6 +162,27 @@ static int rcu_torture_writer_state;
> #define RTWS_SYNC 7
> #define RTWS_STUTTER 8
> #define RTWS_STOPPING 9
> +static char * const rcu_torture_writer_state_names[] = {
Shouldn't this use "static const char * const"? Also, see below.
> + "RTWS_FIXED_DELAY",
> + "RTWS_DELAY",
> + "RTWS_REPLACE",
> + "RTWS_DEF_FREE",
> + "RTWS_EXP_SYNC",
> + "RTWS_COND_GET",
> + "RTWS_COND_SYNC",
> + "RTWS_SYNC",
> + "RTWS_STUTTER",
> + "RTWS_STOPPING",
> +};
> +
> +char *rcu_torture_writer_state_getname(void)
This should return "const char *", I think.
> +{
> + unsigned int i = READ_ONCE(rcu_torture_writer_state);
> +
> + if (i >= ARRAY_SIZE(rcu_torture_writer_state_names))
> + return "???";
> + return rcu_torture_writer_state_names[i];
> +}
>
> #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
> #define RCUTORTURE_RUNNABLE_INIT 1
> @@ -1307,7 +1328,8 @@ rcu_torture_stats_print(void)
>
> rcutorture_get_gp_data(cur_ops->ttype,
> &flags, &gpnum, &completed);
> - pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
> + pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
> + rcu_torture_writer_state_getname(),
> rcu_torture_writer_state,
> gpnum, completed, flags);
> show_rcu_gp_kthreads();
> --
> 2.5.2
>
On Fri, Dec 04, 2015 at 04:23:50PM -0800, Paul E. McKenney wrote:
> Currently, ->gp_state is printed as an integer, which slows debugging.
> This commit therefore prints a symbolic name in addition to the integer.
>
> Signed-off-by: Paul E. McKenney <[email protected]>
> [ Updated to fix relational operator called out by Dan Carpenter. ]
> ---
> kernel/rcu/tree.c | 15 +++++++++++++--
> kernel/rcu/tree.h | 9 +++++++++
> 2 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 7b78c88e19a3..491c99a73996 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1187,6 +1187,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
> }
>
> /*
> + * Convert a ->gp_state value to a character string.
> + */
> +static char *gp_state_getname(short gs)
Same comment about "const char *"
> +{
> + if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
> + return "???";
> + return gp_state_names[gs];
> +}
> +
> +/*
> * Complain about starvation of grace-period kthread.
> */
> static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> @@ -1197,10 +1207,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> j = jiffies;
> gpa = READ_ONCE(rsp->gp_activity);
> if (j - gpa > 2 * HZ) {
> - pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
> + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
> rsp->name, j - gpa,
> rsp->gpnum, rsp->completed,
> - rsp->gp_flags, rsp->gp_state,
> + rsp->gp_flags,
> + gp_state_getname(rsp->gp_state), rsp->gp_state,
> rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
> if (rsp->gp_kthread)
> sched_show_task(rsp->gp_kthread);
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index f32bebb6bc90..f0304988e3f8 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -544,6 +544,15 @@ struct rcu_state {
> #define RCU_GP_DOING_FQS 4 /* Wait done for force-quiescent-state time. */
> #define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
> #define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
> +static char * const gp_state_names[] = {
Same comment about "const char * const", but also, should this really go
in a header file? Do any circumstances exist where this header could
get included in more than one source file in a given build? If so,
multiple copies of this array could potentially end up in the binary, if
the compiler fails to merge them.
> + "RCU_GP_IDLE",
> + "RCU_GP_WAIT_GPS",
> + "RCU_GP_DONE_GPS",
> + "RCU_GP_WAIT_FQS",
> + "RCU_GP_DOING_FQS",
> + "RCU_GP_CLEANUP",
> + "RCU_GP_CLEANED",
> +};
>
> extern struct list_head rcu_struct_flavors;
>
> --
> 2.5.2
>
On Fri, Dec 04, 2015 at 04:23:31PM -0800, Paul E. McKenney wrote:
> Hello!
>
> This series contains torture-test updates:
>
> 1. Add batch number to script printout to allow easier estimation
> of test duration.
>
> 2. Dump stack when RCU's grace-period kthread stalls.
>
> 3. Set the scripting's default test-hang "grace period" to three
> minutes, and allow this to be overridden.
>
> 4. Remove the obsolete CONFIG_RCU_USER_QS Kconfig option from
> the rcutorture selftest documentation, courtesy of Yang Shi.
>
> 5. Print symbolic name for rcu_torture_writer_state, saving the
> time otherwise required to manually translate from integer
> to symbol.
>
> 6. Print symbolic name for ->gp_state, again saving the time
> otherwise required to manually translate from integer to symbol.
>
> 7. Abbreviate console error dump, allowing the results of an
> rcutorture script run to be observed with less back-scrolling.
>
> 8. Place console.log files correctly from the get-go, preventing
> multiple qemu instances from outputting to the same file.
I sent comments on patches 2, 5, and 6; for the rest:
Reviewed-by: Josh Triplett <[email protected]>
On Fri, Dec 04, 2015 at 04:59:48PM -0800, Josh Triplett wrote:
> On Fri, Dec 04, 2015 at 04:23:46PM -0800, Paul E. McKenney wrote:
> > This commit increases debug information in the case where the grace-period
> > kthread is being prevented from running by dumping that kthread's stack.
>
> That's not everything this patch does; see below.
>
> > Signed-off-by: Paul E. McKenney <[email protected]>
> > ---
> > kernel/rcu/tree.c | 7 +++++--
> > 1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 81aa1cdc6bc9..7b78c88e19a3 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1196,12 +1196,15 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> >
> > j = jiffies;
> > gpa = READ_ONCE(rsp->gp_activity);
> > - if (j - gpa > 2 * HZ)
> > + if (j - gpa > 2 * HZ) {
> > pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
> > rsp->name, j - gpa,
> > rsp->gpnum, rsp->completed,
> > rsp->gp_flags, rsp->gp_state,
> > - rsp->gp_kthread ? rsp->gp_kthread->state : 0);
> > + rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
>
> The commit message doesn't explain this change. Could you please 1)
> document this change in the commit message, and 2) split it into a
> separate patch unless it directly relates to the other change (which
> doesn't seem like the case)?
Good point, especially given that it took me some time to remember why
that change was necessary. It is now in a separate commit, with a commit
log stating that this disambiguates the case where there is no RCU GP
kthread from the case where there is a TASK_RUNNING RCU GP kthread.
Thanx, Paul
> > + if (rsp->gp_kthread)
> > + sched_show_task(rsp->gp_kthread);
> > + }
> > }
> >
> > /*
> > --
> > 2.5.2
> >
>
On Fri, Dec 04, 2015 at 05:02:21PM -0800, Josh Triplett wrote:
> On Fri, Dec 04, 2015 at 04:23:49PM -0800, Paul E. McKenney wrote:
> > Currently, rcu_torture_writer_state is printed as an integer, which slows
> > debugging. This commit therefore prints a symbolic name in addition to
> > the integer.
> >
> > Signed-off-by: Paul E. McKenney <[email protected]>
> > ---
> > kernel/rcu/rcutorture.c | 24 +++++++++++++++++++++++-
> > 1 file changed, 23 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
> > index d89328e260df..adbb194e2b5d 100644
> > --- a/kernel/rcu/rcutorture.c
> > +++ b/kernel/rcu/rcutorture.c
> > @@ -162,6 +162,27 @@ static int rcu_torture_writer_state;
> > #define RTWS_SYNC 7
> > #define RTWS_STUTTER 8
> > #define RTWS_STOPPING 9
> > +static char * const rcu_torture_writer_state_names[] = {
>
> Shouldn't this use "static const char * const"? Also, see below.
Can't hurt...
> > + "RTWS_FIXED_DELAY",
> > + "RTWS_DELAY",
> > + "RTWS_REPLACE",
> > + "RTWS_DEF_FREE",
> > + "RTWS_EXP_SYNC",
> > + "RTWS_COND_GET",
> > + "RTWS_COND_SYNC",
> > + "RTWS_SYNC",
> > + "RTWS_STUTTER",
> > + "RTWS_STOPPING",
> > +};
> > +
> > +char *rcu_torture_writer_state_getname(void)
>
> This should return "const char *", I think.
Yes, especially once I had made the earlier change. Also made this
function be static while I was at it.
Thanx, Paul
> > +{
> > + unsigned int i = READ_ONCE(rcu_torture_writer_state);
> > +
> > + if (i >= ARRAY_SIZE(rcu_torture_writer_state_names))
> > + return "???";
> > + return rcu_torture_writer_state_names[i];
> > +}
> >
> > #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
> > #define RCUTORTURE_RUNNABLE_INIT 1
> > @@ -1307,7 +1328,8 @@ rcu_torture_stats_print(void)
> >
> > rcutorture_get_gp_data(cur_ops->ttype,
> > &flags, &gpnum, &completed);
> > - pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
> > + pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
> > + rcu_torture_writer_state_getname(),
> > rcu_torture_writer_state,
> > gpnum, completed, flags);
> > show_rcu_gp_kthreads();
> > --
> > 2.5.2
> >
>
On Fri, Dec 04, 2015 at 05:04:21PM -0800, Josh Triplett wrote:
> On Fri, Dec 04, 2015 at 04:23:50PM -0800, Paul E. McKenney wrote:
> > Currently, ->gp_state is printed as an integer, which slows debugging.
> > This commit therefore prints a symbolic name in addition to the integer.
> >
> > Signed-off-by: Paul E. McKenney <[email protected]>
> > [ Updated to fix relational operator called out by Dan Carpenter. ]
> > ---
> > kernel/rcu/tree.c | 15 +++++++++++++--
> > kernel/rcu/tree.h | 9 +++++++++
> > 2 files changed, 22 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 7b78c88e19a3..491c99a73996 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1187,6 +1187,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
> > }
> >
> > /*
> > + * Convert a ->gp_state value to a character string.
> > + */
> > +static char *gp_state_getname(short gs)
>
> Same comment about "const char *"
Good point, fixed!
> > +{
> > + if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
> > + return "???";
> > + return gp_state_names[gs];
> > +}
> > +
> > +/*
> > * Complain about starvation of grace-period kthread.
> > */
> > static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> > @@ -1197,10 +1207,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> > j = jiffies;
> > gpa = READ_ONCE(rsp->gp_activity);
> > if (j - gpa > 2 * HZ) {
> > - pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
> > + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
> > rsp->name, j - gpa,
> > rsp->gpnum, rsp->completed,
> > - rsp->gp_flags, rsp->gp_state,
> > + rsp->gp_flags,
> > + gp_state_getname(rsp->gp_state), rsp->gp_state,
> > rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
> > if (rsp->gp_kthread)
> > sched_show_task(rsp->gp_kthread);
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index f32bebb6bc90..f0304988e3f8 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -544,6 +544,15 @@ struct rcu_state {
> > #define RCU_GP_DOING_FQS 4 /* Wait done for force-quiescent-state time. */
> > #define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
> > #define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
> > +static char * const gp_state_names[] = {
>
> Same comment about "const char * const", but also, should this really go
> in a header file? Do any circumstances exist where this header could
> get included in more than one source file in a given build? If so,
> multiple copies of this array could potentially end up in the binary, if
> the compiler fails to merge them.
I added the const.
However, I left the array in the .h file. The issue is that the #defines
really have to be in the tree.h file. Given that, if I move the array
elsewhere, they will very likely drift out of sync. So this sounds like
a job for the compiler/linker...
Ah, but there is another approach. It turns out that tree.h in included
only from tree.c and tree_trace.c. And tree_trace.c defines a CPP symbol
RCU_TREE_NONCORE that is used elsewhere in tree.h to keep tree_trace.c's
mitts off a number of attractive but dangerous functions. So I just put
the array under "#ifndef RCU_TREE_NONCORE".
Fair enough?
Thanx, Paul
> > + "RCU_GP_IDLE",
> > + "RCU_GP_WAIT_GPS",
> > + "RCU_GP_DONE_GPS",
> > + "RCU_GP_WAIT_FQS",
> > + "RCU_GP_DOING_FQS",
> > + "RCU_GP_CLEANUP",
> > + "RCU_GP_CLEANED",
> > +};
> >
> > extern struct list_head rcu_struct_flavors;
> >
> > --
> > 2.5.2
> >
>
On Sat, Dec 05, 2015 at 05:54:36PM -0800, Paul E. McKenney wrote:
> On Fri, Dec 04, 2015 at 05:04:21PM -0800, Josh Triplett wrote:
> > On Fri, Dec 04, 2015 at 04:23:50PM -0800, Paul E. McKenney wrote:
> > > Currently, ->gp_state is printed as an integer, which slows debugging.
> > > This commit therefore prints a symbolic name in addition to the integer.
> > >
> > > Signed-off-by: Paul E. McKenney <[email protected]>
> > > [ Updated to fix relational operator called out by Dan Carpenter. ]
> > > ---
> > > kernel/rcu/tree.c | 15 +++++++++++++--
> > > kernel/rcu/tree.h | 9 +++++++++
> > > 2 files changed, 22 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 7b78c88e19a3..491c99a73996 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -1187,6 +1187,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
> > > }
> > >
> > > /*
> > > + * Convert a ->gp_state value to a character string.
> > > + */
> > > +static char *gp_state_getname(short gs)
> >
> > Same comment about "const char *"
>
> Good point, fixed!
>
> > > +{
> > > + if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
> > > + return "???";
> > > + return gp_state_names[gs];
> > > +}
> > > +
> > > +/*
> > > * Complain about starvation of grace-period kthread.
> > > */
> > > static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> > > @@ -1197,10 +1207,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
> > > j = jiffies;
> > > gpa = READ_ONCE(rsp->gp_activity);
> > > if (j - gpa > 2 * HZ) {
> > > - pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
> > > + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
> > > rsp->name, j - gpa,
> > > rsp->gpnum, rsp->completed,
> > > - rsp->gp_flags, rsp->gp_state,
> > > + rsp->gp_flags,
> > > + gp_state_getname(rsp->gp_state), rsp->gp_state,
> > > rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
> > > if (rsp->gp_kthread)
> > > sched_show_task(rsp->gp_kthread);
> > > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > > index f32bebb6bc90..f0304988e3f8 100644
> > > --- a/kernel/rcu/tree.h
> > > +++ b/kernel/rcu/tree.h
> > > @@ -544,6 +544,15 @@ struct rcu_state {
> > > #define RCU_GP_DOING_FQS 4 /* Wait done for force-quiescent-state time. */
> > > #define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
> > > #define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
> > > +static char * const gp_state_names[] = {
> >
> > Same comment about "const char * const", but also, should this really go
> > in a header file? Do any circumstances exist where this header could
> > get included in more than one source file in a given build? If so,
> > multiple copies of this array could potentially end up in the binary, if
> > the compiler fails to merge them.
>
> I added the const.
>
> However, I left the array in the .h file. The issue is that the #defines
> really have to be in the tree.h file. Given that, if I move the array
> elsewhere, they will very likely drift out of sync. So this sounds like
> a job for the compiler/linker...
>
> Ah, but there is another approach. It turns out that tree.h in included
> only from tree.c and tree_trace.c. And tree_trace.c defines a CPP symbol
> RCU_TREE_NONCORE that is used elsewhere in tree.h to keep tree_trace.c's
> mitts off a number of attractive but dangerous functions. So I just put
> the array under "#ifndef RCU_TREE_NONCORE".
>
> Fair enough?
Fair enough.
Reviewed-by: Josh Triplett <[email protected]>