Subject: [PATCH 0/3] crypto/rcu: suppress unnecessary CPU stall warnings

Suppress RCU CPU stall warnings during tcrypt speed tests.

Robert Elliott (3):
rcu: genericize RCU stall suppression functions
rcu: print first CPU on expedited stall line
crypto: tcrypt - suppress RCU stall warnings during speed tests

.../RCU/Design/Requirements/Requirements.rst | 6 +-
Documentation/RCU/stallwarn.rst | 19 ++-
arch/parisc/kernel/process.c | 2 +-
crypto/tcrypt.c | 113 +++++++++++++++---
drivers/tty/sysrq.c | 4 +-
include/linux/rcupdate.h | 8 +-
kernel/rcu/rcu.h | 11 +-
kernel/rcu/tree_exp.h | 22 ++--
kernel/rcu/tree_stall.h | 18 +--
kernel/rcu/update.c | 11 +-
10 files changed, 160 insertions(+), 54 deletions(-)

--
2.38.1


Subject: [PATCH 1/3] rcu: genericize RCU stall suppression functions

Convert the functions that temporarily suppress RCU CPU
stall reporting:
rcu_sysrq_start()
rcu_sysrq_end()

to more generic functions that may be called by functions
other than the SysRq handler:
rcu_suppress_start()
rcu_suppress_end()

Covert the underlying variable:
rcu_cpu_stall_suppress

to an atomic variable so multiple threads can manipulate it at the
same time, incrementing it in start() and decrementing in end().

Expose that in sysfs in a read-only
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress_dyn

Keep
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress
as writeable by userspace, but OR that into the equations rather than
directly manipulate the atomic value.

Signed-off-by: Robert Elliott <[email protected]>
---
.../RCU/Design/Requirements/Requirements.rst | 6 +++---
Documentation/RCU/stallwarn.rst | 19 +++++++++++++++----
arch/parisc/kernel/process.c | 2 +-
drivers/tty/sysrq.c | 4 ++--
include/linux/rcupdate.h | 8 ++++----
kernel/rcu/rcu.h | 11 ++++++-----
kernel/rcu/tree_stall.h | 18 ++++++++++--------
kernel/rcu/update.c | 11 ++++++++++-
8 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 49387d823619..5083490bb6fc 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -1639,9 +1639,9 @@ against mishaps and misuse:
``rcupdate.rcu_cpu_stall_suppress`` to suppress the splats. This
kernel parameter may also be set via ``sysfs``. Furthermore, RCU CPU
stall warnings are counter-productive during sysrq dumps and during
- panics. RCU therefore supplies the rcu_sysrq_start() and
- rcu_sysrq_end() API members to be called before and after long
- sysrq dumps. RCU also supplies the rcu_panic() notifier that is
+ panics. RCU therefore supplies the rcu_suppress_start() and
+ rcu_suppress_end() API members to be called before and after long
+ CPU usage. RCU also supplies the rcu_panic() notifier that is
automatically invoked at the beginning of a panic to suppress further
RCU CPU stall warnings.

diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
index e38c587067fc..9e6fba72f56d 100644
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@@ -135,13 +135,24 @@ see include/trace/events/rcu.h.
Fine-Tuning the RCU CPU Stall Detector
======================================

-The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
-CPU stall detector, which detects conditions that unduly delay RCU grace
-periods. This module parameter enables CPU stall detection by default,
-but may be overridden via boot-time parameter or at runtime via sysfs.
+RCU's CPU stall detector detects conditions that unduly delay RCU grace
+periods. CPU stall detection is enabled by default, but may be overridden
+via boot-time parameter or at runtime via sysfs (accessible via
+/sys/module/rcupdate/parameters).
+
+The rcupdate.rcu_cpu_stall_suppress module parameter disables RCU's
+CPU stall detector.
+
+/sys/module/rcu_cpu_stall_suppress_dyn reports an internal semaphore
+used by the RCU's CPU stall detector. Functions holding a CPU for a
+long time (e.g., sysrq printouts) increment this value before use
+and decrement it when done, so the value reports the number of
+functions currently disabling stalls.
+
The stall detector's idea of what constitutes "unduly delayed" is
controlled by a set of kernel configuration variables and cpp macros:

+
CONFIG_RCU_CPU_STALL_TIMEOUT
----------------------------

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index c4f8374c7018..038378fe7bfa 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -126,7 +126,7 @@ void machine_power_off(void)
"Please power this system off now.");

/* prevent soft lockup/stalled CPU messages for endless loop. */
- rcu_sysrq_start();
+ rcu_suppress_start();
lockup_detector_soft_poweroff();
for (;;);
}
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index d2b2720db6ca..81ab63a473a7 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -579,7 +579,7 @@ void __handle_sysrq(int key, bool check_mask)
orig_suppress_printk = suppress_printk;
suppress_printk = 0;

- rcu_sysrq_start();
+ rcu_suppress_start();
rcu_read_lock();
/*
* Raise the apparent loglevel to maximum so that the sysrq header
@@ -623,7 +623,7 @@ void __handle_sysrq(int key, bool check_mask)
console_loglevel = orig_log_level;
}
rcu_read_unlock();
- rcu_sysrq_end();
+ rcu_suppress_end();

suppress_printk = orig_suppress_printk;
}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03abf883a281..c0d8a4aae7ff 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -131,11 +131,11 @@ static inline void rcu_init_tasks_generic(void) { }
#endif

#ifdef CONFIG_RCU_STALL_COMMON
-void rcu_sysrq_start(void);
-void rcu_sysrq_end(void);
+void rcu_suppress_start(void);
+void rcu_suppress_end(void);
#else /* #ifdef CONFIG_RCU_STALL_COMMON */
-static inline void rcu_sysrq_start(void) { }
-static inline void rcu_sysrq_end(void) { }
+static inline void rcu_suppress_start(void) { }
+static inline void rcu_suppress_end(void) { }
#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */

#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index c5aa934de59b..a658955a1174 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -224,24 +224,25 @@ extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
+extern atomic_t rcu_cpu_stall_suppress_dyn;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);

static inline bool rcu_stall_is_suppressed(void)
{
- return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress;
+ return rcu_stall_is_suppressed_at_boot() ||
+ rcu_cpu_stall_suppress ||
+ atomic_read(&rcu_cpu_stall_suppress_dyn);
}

#define rcu_ftrace_dump_stall_suppress() \
do { \
- if (!rcu_cpu_stall_suppress) \
- rcu_cpu_stall_suppress = 3; \
+ atomic_inc(&rcu_cpu_stall_suppress_dyn); \
} while (0)

#define rcu_ftrace_dump_stall_unsuppress() \
do { \
- if (rcu_cpu_stall_suppress == 3) \
- rcu_cpu_stall_suppress = 0; \
+ atomic_dec(&rcu_cpu_stall_suppress_dyn); \
} while (0)

#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5653560573e2..260748bc5bc8 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -103,23 +103,25 @@ bool rcu_gp_might_be_stalled(void)
return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
}

-/* Don't do RCU CPU stall warnings during long sysrq printouts. */
-void rcu_sysrq_start(void)
+/* Don't do RCU CPU stall warnings during functions holding the CPU
+ * for a long period of time such as long sysrq printouts.
+ */
+void rcu_suppress_start(void)
{
- if (!rcu_cpu_stall_suppress)
- rcu_cpu_stall_suppress = 2;
+ atomic_inc(&rcu_cpu_stall_suppress_dyn);
}
+EXPORT_SYMBOL_GPL(rcu_suppress_start);

-void rcu_sysrq_end(void)
+void rcu_suppress_end(void)
{
- if (rcu_cpu_stall_suppress == 2)
- rcu_cpu_stall_suppress = 0;
+ atomic_dec(&rcu_cpu_stall_suppress_dyn);
}
+EXPORT_SYMBOL_GPL(rcu_suppress_end);

/* Don't print RCU CPU stall warnings during a kernel panic. */
static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
{
- rcu_cpu_stall_suppress = 1;
+ atomic_inc(&rcu_cpu_stall_suppress_dyn);
return NOTIFY_DONE;
}

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index f5e6a2f95a2a..ceee9d777384 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -501,11 +501,18 @@ EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
#ifdef CONFIG_RCU_STALL_COMMON
int rcu_cpu_stall_ftrace_dump __read_mostly;
module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
-int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
+
+int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings from sysfs
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
module_param(rcu_cpu_stall_suppress, int, 0644);
+
+atomic_t rcu_cpu_stall_suppress_dyn __read_mostly; // !0 = suppress stall warnings from functions
+EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_dyn);
+module_param_named(rcu_cpu_stall_suppress_dyn, rcu_cpu_stall_suppress_dyn.counter, int, 0444);
+
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
+
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
@@ -616,6 +623,8 @@ void __init rcupdate_announce_bootup_oddness(void)
pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
if (rcu_cpu_stall_suppress)
pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
+ if (atomic_read(&rcu_cpu_stall_suppress_dyn))
+ pr_info("\tDynamic RCU CPU stall warnings suppressed (rcu_cpu_stall_suppress_dyn).\n");
if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
rcu_tasks_bootup_oddness();
--
2.38.1

Subject: [PATCH 2/3] rcu: print first CPU on expedited stall line

Include the first CPU number in the first pr_err() call reporting
an expedited stall warning.

Printing the CPU numbers with subsequent pr_cont() calls can
result in the prints being many lines away or being dropped entirely
in a busy system. This change ensures there is indication of at
least one of the CPUs with the problem with the original message.

Before (if prints are interspersed with other prints):
rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: {
13-....
} 32 jiffies s: 6685 root: 0x1/.

After:
rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: {13-....
} 32 jiffies s: 6685 root: 0x1/.

Signed-off-by: Robert Elliott <[email protected]>
---
kernel/rcu/tree_exp.h | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index ed6c3cce28f2..ade6a18e6c07 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -624,8 +624,6 @@ static void synchronize_rcu_expedited_wait(void)
if (rcu_stall_is_suppressed())
continue;
trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
- pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
- rcu_state.name);
ndetected = 0;
rcu_for_each_leaf_node(rnp) {
ndetected += rcu_print_task_exp_stall(rnp);
@@ -637,11 +635,21 @@ static void synchronize_rcu_expedited_wait(void)
continue;
ndetected++;
rdp = per_cpu_ptr(&rcu_data, cpu);
- pr_cont(" %d-%c%c%c%c", cpu,
- "O."[!!cpu_online(cpu)],
- "o."[!!(rdp->grpmask & rnp->expmaskinit)],
- "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
- "D."[!!(rdp->cpu_no_qs.b.exp)]);
+ // print the prefix and the first CPU number together
+ // under heavy load, the pr_cont prints can be far away or dropped
+ if (ndetected == 1)
+ pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {%d-%c%c%c%c",
+ rcu_state.name, cpu,
+ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
+ "D."[!!(rdp->cpu_no_qs.b.exp)]);
+ else
+ pr_cont(" %d-%c%c%c%c", cpu,
+ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
+ "D."[!!(rdp->cpu_no_qs.b.exp)]);
}
}
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
--
2.38.1

Subject: [PATCH 3/3] crypto: tcrypt - suppress RCU stall warnings during speed tests

Suppress RCU CPU stall warnings while running speed tests.

The tcrypt module is intended only for developer usage, so
RCU stalls induced by those tests are not necessarily representative
of real problems.

Speed tests need to disable interrupts or preemption to get results
that are not distorted by such interruptions. This triggers more
RCU stalls than normal invocations of the crypto functions.

If an RCU stall is triggered and reported, the time to print to the
console distorts the speed results.

Signed-off-by: Robert Elliott <[email protected]>
---
crypto/tcrypt.c | 113 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 94 insertions(+), 19 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 8645e72a7099..3e9e4adeef02 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -27,6 +27,7 @@
#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <linux/scatterlist.h>
#include <linux/string.h>
#include <linux/moduleparam.h>
@@ -191,12 +192,16 @@ static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc,
if (!rc)
return -ENOMEM;

+ rcu_suppress_start();
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_mult_aead_op(data, enc, num_mb, rc, prefix);
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

pr_info("%s %8d operations in %d seconds (%12llu bytes)\n",
prefix, bcount * num_mb, secs, (u64)bcount * blen * num_mb);
@@ -218,19 +223,25 @@ static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
return -ENOMEM;

/* Warm-up run. */
+ rcu_suppress_start();
for (i = 0; i < 4; i++) {
ret = do_mult_aead_op(data, enc, num_mb, rc, prefix);
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

/* The real thing. */
for (i = 0; i < 8; i++) {
cycles_t start, end;

+ rcu_suppress_start();
start = get_cycles();
ret = do_mult_aead_op(data, enc, num_mb, rc, prefix);
end = get_cycles();
+ rcu_suppress_end();

if (ret)
goto out;
@@ -470,6 +481,7 @@ static int test_aead_jiffies(struct aead_request *req, int enc,
int bcount;
int ret;

+ rcu_suppress_start();
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
if (enc)
@@ -477,9 +489,12 @@ static int test_aead_jiffies(struct aead_request *req, int enc,
else
ret = do_one_aead_op(req, crypto_aead_decrypt(req));

- if (ret)
+ if (ret) {
+ rcu_suppress_end();
return ret;
+ }
}
+ rcu_suppress_end();

pr_info("%s %8d operations in %d seconds (%12llu bytes)\n",
prefix, bcount, secs, (u64)bcount * blen);
@@ -494,26 +509,32 @@ static int test_aead_cycles(struct aead_request *req, int enc,
int i;

/* Warm-up run. */
+ rcu_suppress_start();
for (i = 0; i < 4; i++) {
if (enc)
ret = do_one_aead_op(req, crypto_aead_encrypt(req));
else
ret = do_one_aead_op(req, crypto_aead_decrypt(req));

- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

/* The real thing. */
for (i = 0; i < 8; i++) {
cycles_t start, end;

+ rcu_suppress_start();
start = get_cycles();
if (enc)
ret = do_one_aead_op(req, crypto_aead_encrypt(req));
else
ret = do_one_aead_op(req, crypto_aead_decrypt(req));
end = get_cycles();
+ rcu_suppress_end();

if (ret)
goto out;
@@ -746,12 +767,16 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
int bcount;
int ret;

+ rcu_suppress_start();
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_one_ahash_op(req, crypto_ahash_digest(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
return ret;
+ }
}
+ rcu_suppress_end();

pr_info("%s %6u opers/sec, %9lu bytes/sec\n",
prefix, bcount / secs, ((long)bcount * blen) / secs);
@@ -769,21 +794,29 @@ static int test_ahash_jiffies(struct ahash_request *req, int blen,
if (plen == blen)
return test_ahash_jiffies_digest(req, blen, secs, prefix);

+ rcu_suppress_start();
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_one_ahash_op(req, crypto_ahash_init(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
return ret;
+ }
for (pcount = 0; pcount < blen; pcount += plen) {
ret = do_one_ahash_op(req, crypto_ahash_update(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
return ret;
+ }
}

ret = do_one_ahash_op(req, crypto_ahash_final(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
return ret;
+ }
}
+ rcu_suppress_end();

pr_info("%s %6u opers/sec, %9lu bytes/sec\n",
prefix, bcount / secs, ((long)bcount * blen) / secs);
@@ -798,23 +831,31 @@ static int test_ahash_cycles_digest(struct ahash_request *req, int blen,
int ret, i;

/* Warm-up run. */
+ rcu_suppress_start();
for (i = 0; i < 4; i++) {
ret = do_one_ahash_op(req, crypto_ahash_digest(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

/* The real thing. */
for (i = 0; i < 8; i++) {
cycles_t start, end;

+ rcu_suppress_start();
start = get_cycles();

ret = do_one_ahash_op(req, crypto_ahash_digest(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }

end = get_cycles();
+ rcu_suppress_end();

cycles += end - start;
}
@@ -839,24 +880,33 @@ static int test_ahash_cycles(struct ahash_request *req, int blen,
return test_ahash_cycles_digest(req, blen, prefix);

/* Warm-up run. */
+ rcu_suppress_start();
for (i = 0; i < 4; i++) {
ret = do_one_ahash_op(req, crypto_ahash_init(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
for (pcount = 0; pcount < blen; pcount += plen) {
ret = do_one_ahash_op(req, crypto_ahash_update(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
ret = do_one_ahash_op(req, crypto_ahash_final(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

/* The real thing. */
for (i = 0; i < 8; i++) {
cycles_t start, end;

+ rcu_suppress_start();
start = get_cycles();

ret = do_one_ahash_op(req, crypto_ahash_init(req));
@@ -864,14 +914,19 @@ static int test_ahash_cycles(struct ahash_request *req, int blen,
goto out;
for (pcount = 0; pcount < blen; pcount += plen) {
ret = do_one_ahash_op(req, crypto_ahash_update(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
ret = do_one_ahash_op(req, crypto_ahash_final(req));
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }

end = get_cycles();
+ rcu_suppress_end();

cycles += end - start;
}
@@ -1039,12 +1094,16 @@ static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc,
if (!rc)
return -ENOMEM;

+ rcu_suppress_start();
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
ret = do_mult_acipher_op(data, enc, num_mb, rc, prefix);
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

pr_info("%s %8d operations in %d seconds (%12llu bytes)\n",
prefix, bcount * num_mb, secs, (u64)bcount * blen * num_mb);
@@ -1066,19 +1125,25 @@ static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
return -ENOMEM;

/* Warm-up run. */
+ rcu_suppress_start();
for (i = 0; i < 4; i++) {
ret = do_mult_acipher_op(data, enc, num_mb, rc, prefix);
- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

/* The real thing. */
for (i = 0; i < 8; i++) {
cycles_t start, end;

+ rcu_suppress_start();
start = get_cycles();
ret = do_mult_acipher_op(data, enc, num_mb, rc, prefix);
end = get_cycles();
+ rcu_suppress_end();

if (ret)
goto out;
@@ -1270,6 +1335,7 @@ static int test_acipher_jiffies(struct skcipher_request *req, int enc,
int bcount;
int ret;

+ rcu_suppress_start();
for (start = jiffies, end = start + secs * HZ, bcount = 0;
time_before(jiffies, end); bcount++) {
if (enc)
@@ -1279,9 +1345,12 @@ static int test_acipher_jiffies(struct skcipher_request *req, int enc,
ret = do_one_acipher_op(req,
crypto_skcipher_decrypt(req));

- if (ret)
+ if (ret) {
+ rcu_suppress_end();
return ret;
+ }
}
+ rcu_suppress_end();

pr_info("%s %8d operations in %d seconds (%12llu bytes)\n",
prefix, bcount, secs, (u64)bcount * blen);
@@ -1296,6 +1365,7 @@ static int test_acipher_cycles(struct skcipher_request *req, int enc,
int i;

/* Warm-up run. */
+ rcu_suppress_start();
for (i = 0; i < 4; i++) {
if (enc)
ret = do_one_acipher_op(req,
@@ -1304,14 +1374,18 @@ static int test_acipher_cycles(struct skcipher_request *req, int enc,
ret = do_one_acipher_op(req,
crypto_skcipher_decrypt(req));

- if (ret)
+ if (ret) {
+ rcu_suppress_end();
goto out;
+ }
}
+ rcu_suppress_end();

/* The real thing. */
for (i = 0; i < 8; i++) {
cycles_t start, end;

+ rcu_suppress_start();
start = get_cycles();
if (enc)
ret = do_one_acipher_op(req,
@@ -1320,6 +1394,7 @@ static int test_acipher_cycles(struct skcipher_request *req, int enc,
ret = do_one_acipher_op(req,
crypto_skcipher_decrypt(req));
end = get_cycles();
+ rcu_suppress_end();

if (ret)
goto out;
--
2.38.1

2022-12-21 19:04:15

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH 1/3] rcu: genericize RCU stall suppression functions

On Mon, Dec 19, 2022 at 02:29:08PM -0600, Robert Elliott wrote:
> Convert the functions that temporarily suppress RCU CPU
> stall reporting:
> rcu_sysrq_start()
> rcu_sysrq_end()
>
> to more generic functions that may be called by functions
> other than the SysRq handler:
> rcu_suppress_start()
> rcu_suppress_end()
>
> Covert the underlying variable:
> rcu_cpu_stall_suppress
>
> to an atomic variable so multiple threads can manipulate it at the
> same time, incrementing it in start() and decrementing in end().
>
> Expose that in sysfs in a read-only
> /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress_dyn
>
> Keep
> /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress
> as writeable by userspace, but OR that into the equations rather than
> directly manipulate the atomic value.
>
> Signed-off-by: Robert Elliott <[email protected]>

I really like the idea of making the suppressing and unsuppressing of
RCU CPU stall warnings SMP-safe, thank you! Yes, as far as I know,
there have been no problems due to this, but accidents waiting to happen
and all that.

Some comments and questions below.

> ---
> .../RCU/Design/Requirements/Requirements.rst | 6 +++---
> Documentation/RCU/stallwarn.rst | 19 +++++++++++++++----
> arch/parisc/kernel/process.c | 2 +-
> drivers/tty/sysrq.c | 4 ++--
> include/linux/rcupdate.h | 8 ++++----
> kernel/rcu/rcu.h | 11 ++++++-----
> kernel/rcu/tree_stall.h | 18 ++++++++++--------
> kernel/rcu/update.c | 11 ++++++++++-
> 8 files changed, 51 insertions(+), 28 deletions(-)
>
> diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
> index 49387d823619..5083490bb6fc 100644
> --- a/Documentation/RCU/Design/Requirements/Requirements.rst
> +++ b/Documentation/RCU/Design/Requirements/Requirements.rst
> @@ -1639,9 +1639,9 @@ against mishaps and misuse:
> ``rcupdate.rcu_cpu_stall_suppress`` to suppress the splats. This
> kernel parameter may also be set via ``sysfs``. Furthermore, RCU CPU
> stall warnings are counter-productive during sysrq dumps and during
> - panics. RCU therefore supplies the rcu_sysrq_start() and
> - rcu_sysrq_end() API members to be called before and after long
> - sysrq dumps. RCU also supplies the rcu_panic() notifier that is
> + panics. RCU therefore supplies the rcu_suppress_start() and
> + rcu_suppress_end() API members to be called before and after long
> + CPU usage. RCU also supplies the rcu_panic() notifier that is
> automatically invoked at the beginning of a panic to suppress further
> RCU CPU stall warnings.
>
> diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
> index e38c587067fc..9e6fba72f56d 100644
> --- a/Documentation/RCU/stallwarn.rst
> +++ b/Documentation/RCU/stallwarn.rst
> @@ -135,13 +135,24 @@ see include/trace/events/rcu.h.
> Fine-Tuning the RCU CPU Stall Detector
> ======================================
>
> -The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
> -CPU stall detector, which detects conditions that unduly delay RCU grace
> -periods. This module parameter enables CPU stall detection by default,
> -but may be overridden via boot-time parameter or at runtime via sysfs.
> +RCU's CPU stall detector detects conditions that unduly delay RCU grace
> +periods. CPU stall detection is enabled by default, but may be overridden
> +via boot-time parameter or at runtime via sysfs (accessible via
> +/sys/module/rcupdate/parameters).
> +
> +The rcupdate.rcu_cpu_stall_suppress module parameter disables RCU's
> +CPU stall detector.
> +
> +/sys/module/rcu_cpu_stall_suppress_dyn reports an internal semaphore

Actually an atomically updated variable as opposed to a semaphore,
correct? Replacing "an internal semaphore" with something like "a
variable" would be fine from my viewpoint.

> +used by the RCU's CPU stall detector. Functions holding a CPU for a
> +long time (e.g., sysrq printouts) increment this value before use
> +and decrement it when done, so the value reports the number of
> +functions currently disabling stalls.
> +
> The stall detector's idea of what constitutes "unduly delayed" is
> controlled by a set of kernel configuration variables and cpp macros:
>
> +
> CONFIG_RCU_CPU_STALL_TIMEOUT
> ----------------------------

And thank you for updating the documentation!

> diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
> index c4f8374c7018..038378fe7bfa 100644
> --- a/arch/parisc/kernel/process.c
> +++ b/arch/parisc/kernel/process.c
> @@ -126,7 +126,7 @@ void machine_power_off(void)
> "Please power this system off now.");
>
> /* prevent soft lockup/stalled CPU messages for endless loop. */
> - rcu_sysrq_start();
> + rcu_suppress_start();
> lockup_detector_soft_poweroff();
> for (;;);
> }
> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
> index d2b2720db6ca..81ab63a473a7 100644
> --- a/drivers/tty/sysrq.c
> +++ b/drivers/tty/sysrq.c
> @@ -579,7 +579,7 @@ void __handle_sysrq(int key, bool check_mask)
> orig_suppress_printk = suppress_printk;
> suppress_printk = 0;
>
> - rcu_sysrq_start();
> + rcu_suppress_start();
> rcu_read_lock();
> /*
> * Raise the apparent loglevel to maximum so that the sysrq header
> @@ -623,7 +623,7 @@ void __handle_sysrq(int key, bool check_mask)
> console_loglevel = orig_log_level;
> }
> rcu_read_unlock();
> - rcu_sysrq_end();
> + rcu_suppress_end();
>
> suppress_printk = orig_suppress_printk;
> }
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 03abf883a281..c0d8a4aae7ff 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -131,11 +131,11 @@ static inline void rcu_init_tasks_generic(void) { }
> #endif
>
> #ifdef CONFIG_RCU_STALL_COMMON
> -void rcu_sysrq_start(void);
> -void rcu_sysrq_end(void);
> +void rcu_suppress_start(void);
> +void rcu_suppress_end(void);
> #else /* #ifdef CONFIG_RCU_STALL_COMMON */
> -static inline void rcu_sysrq_start(void) { }
> -static inline void rcu_sysrq_end(void) { }
> +static inline void rcu_suppress_start(void) { }
> +static inline void rcu_suppress_end(void) { }
> #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
>
> #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index c5aa934de59b..a658955a1174 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -224,24 +224,25 @@ extern int rcu_cpu_stall_ftrace_dump;
> extern int rcu_cpu_stall_suppress;
> extern int rcu_cpu_stall_timeout;
> extern int rcu_exp_cpu_stall_timeout;
> +extern atomic_t rcu_cpu_stall_suppress_dyn;
> int rcu_jiffies_till_stall_check(void);
> int rcu_exp_jiffies_till_stall_check(void);
>
> static inline bool rcu_stall_is_suppressed(void)
> {
> - return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress;
> + return rcu_stall_is_suppressed_at_boot() ||
> + rcu_cpu_stall_suppress ||
> + atomic_read(&rcu_cpu_stall_suppress_dyn);
> }
>
> #define rcu_ftrace_dump_stall_suppress() \
> do { \
> - if (!rcu_cpu_stall_suppress) \
> - rcu_cpu_stall_suppress = 3; \

One thing we are losing here is the ability to see what is suppressing
the current stall, for example, from a crash dump. Maybe that is OK,
as I haven't needed to debug that sort of thing.

Thoughts from those who have had this debugging experience?

> + atomic_inc(&rcu_cpu_stall_suppress_dyn); \
> } while (0)
>
> #define rcu_ftrace_dump_stall_unsuppress() \
> do { \
> - if (rcu_cpu_stall_suppress == 3) \
> - rcu_cpu_stall_suppress = 0; \
> + atomic_dec(&rcu_cpu_stall_suppress_dyn); \
> } while (0)
>
> #else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
> index 5653560573e2..260748bc5bc8 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -103,23 +103,25 @@ bool rcu_gp_might_be_stalled(void)
> return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
> }
>
> -/* Don't do RCU CPU stall warnings during long sysrq printouts. */
> -void rcu_sysrq_start(void)
> +/* Don't do RCU CPU stall warnings during functions holding the CPU
> + * for a long period of time such as long sysrq printouts.
> + */
> +void rcu_suppress_start(void)
> {
> - if (!rcu_cpu_stall_suppress)
> - rcu_cpu_stall_suppress = 2;

And the same point here.

> + atomic_inc(&rcu_cpu_stall_suppress_dyn);
> }
> +EXPORT_SYMBOL_GPL(rcu_suppress_start);

If this is being exported to modules, the question of who suppressed
the CPU stalls might at some point become more urgent.

If the problem was reproducible, I would simply attach a BPF program to
rcu_suppress_start() and rcu_suppress_end() counting the stack traces of
all callers to these functions. This might or might not make everyone
happy, though.

> -void rcu_sysrq_end(void)
> +void rcu_suppress_end(void)
> {
> - if (rcu_cpu_stall_suppress == 2)
> - rcu_cpu_stall_suppress = 0;
> + atomic_dec(&rcu_cpu_stall_suppress_dyn);
> }
> +EXPORT_SYMBOL_GPL(rcu_suppress_end);
>
> /* Don't print RCU CPU stall warnings during a kernel panic. */
> static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
> {
> - rcu_cpu_stall_suppress = 1;
> + atomic_inc(&rcu_cpu_stall_suppress_dyn);
> return NOTIFY_DONE;
> }
>
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index f5e6a2f95a2a..ceee9d777384 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -501,11 +501,18 @@ EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
> #ifdef CONFIG_RCU_STALL_COMMON
> int rcu_cpu_stall_ftrace_dump __read_mostly;
> module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
> -int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
> +
> +int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings from sysfs
> EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
> module_param(rcu_cpu_stall_suppress, int, 0644);
> +
> +atomic_t rcu_cpu_stall_suppress_dyn __read_mostly; // !0 = suppress stall warnings from functions
> +EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_dyn);
> +module_param_named(rcu_cpu_stall_suppress_dyn, rcu_cpu_stall_suppress_dyn.counter, int, 0444);

I am not seeing a valid use case for specifying an initial
value on the kernel command line. Or does this somehow prevent
rcupdate.rcu_cpu_stall_suppress_dyn from being specified on the kernel
command line?

If something like rcupdate.rcu_cpu_stall_suppress_dyn=3 can be specified
(incorrectly, in my current view) on the kernel command line, maybe
something as shown below would help?

> +
> int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
> module_param(rcu_cpu_stall_timeout, int, 0644);
> +
> int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
> module_param(rcu_exp_cpu_stall_timeout, int, 0644);
> #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
> @@ -616,6 +623,8 @@ void __init rcupdate_announce_bootup_oddness(void)
> pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
> if (rcu_cpu_stall_suppress)
> pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
> + if (atomic_read(&rcu_cpu_stall_suppress_dyn))
> + pr_info("\tDynamic RCU CPU stall warnings suppressed (rcu_cpu_stall_suppress_dyn).\n");

Should this instead be a WARN_ON() placed somewhere so that it won't
mess up the printing of the other parameters?

Or maybe have this code set it to back to zero, with the message
indicating that this is being done?

> if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
> pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
> rcu_tasks_bootup_oddness();
> --
> 2.38.1
>

2022-12-21 19:04:35

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH 2/3] rcu: print first CPU on expedited stall line

On Mon, Dec 19, 2022 at 02:29:09PM -0600, Robert Elliott wrote:
> Include the first CPU number in the first pr_err() call reporting
> an expedited stall warning.
>
> Printing the CPU numbers with subsequent pr_cont() calls can
> result in the prints being many lines away or being dropped entirely
> in a busy system. This change ensures there is indication of at
> least one of the CPUs with the problem with the original message.
>
> Before (if prints are interspersed with other prints):
> rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: {
> 13-....
> } 32 jiffies s: 6685 root: 0x1/.
>
> After:
> rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: {13-....
> } 32 jiffies s: 6685 root: 0x1/.
>
> Signed-off-by: Robert Elliott <[email protected]>

A couple of questions below.

> ---
> kernel/rcu/tree_exp.h | 22 +++++++++++++++-------
> 1 file changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
> index ed6c3cce28f2..ade6a18e6c07 100644
> --- a/kernel/rcu/tree_exp.h
> +++ b/kernel/rcu/tree_exp.h
> @@ -624,8 +624,6 @@ static void synchronize_rcu_expedited_wait(void)
> if (rcu_stall_is_suppressed())
> continue;
> trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
> - pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
> - rcu_state.name);
> ndetected = 0;
> rcu_for_each_leaf_node(rnp) {
> ndetected += rcu_print_task_exp_stall(rnp);
> @@ -637,11 +635,21 @@ static void synchronize_rcu_expedited_wait(void)
> continue;
> ndetected++;
> rdp = per_cpu_ptr(&rcu_data, cpu);
> - pr_cont(" %d-%c%c%c%c", cpu,
> - "O."[!!cpu_online(cpu)],
> - "o."[!!(rdp->grpmask & rnp->expmaskinit)],
> - "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
> - "D."[!!(rdp->cpu_no_qs.b.exp)]);
> + // print the prefix and the first CPU number together
> + // under heavy load, the pr_cont prints can be far away or dropped
> + if (ndetected == 1)

Is the purpose here to print the header only on the first detected task?
If so, what if there is more than one task blocking the first rcu_node
structure? Wouldn't that omit the header entirely?

> + pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {%d-%c%c%c%c",

We can of course get other console output interspersed at this point.
This might be OK in practice, but the commit log should clearly spell
out the reasons.

> + rcu_state.name, cpu,
> + "O."[!!cpu_online(cpu)],
> + "o."[!!(rdp->grpmask & rnp->expmaskinit)],
> + "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
> + "D."[!!(rdp->cpu_no_qs.b.exp)]);
> + else
> + pr_cont(" %d-%c%c%c%c", cpu,
> + "O."[!!cpu_online(cpu)],
> + "o."[!!(rdp->grpmask & rnp->expmaskinit)],
> + "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
> + "D."[!!(rdp->cpu_no_qs.b.exp)]);
> }
> }
> pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
> --
> 2.38.1
>

2022-12-30 08:41:21

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH 3/3] crypto: tcrypt - suppress RCU stall warnings during speed tests

On Mon, Dec 19, 2022 at 02:29:10PM -0600, Robert Elliott wrote:
> Suppress RCU CPU stall warnings while running speed tests.
>
> The tcrypt module is intended only for developer usage, so
> RCU stalls induced by those tests are not necessarily representative
> of real problems.
>
> Speed tests need to disable interrupts or preemption to get results
> that are not distorted by such interruptions. This triggers more
> RCU stalls than normal invocations of the crypto functions.

Where do we disable interrupts? That would seem to break the
use of jiffies since it won't get updated anymore.

Which particular test is still triggering the RCU warnings after
fixing the hash problems that you've already identified?

Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2023-01-10 06:18:29

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH 1/3] rcu: genericize RCU stall suppression functions



> On Dec 21, 2022, at 1:56 PM, Paul E. McKenney <[email protected]> wrote:
>
> On Mon, Dec 19, 2022 at 02:29:08PM -0600, Robert Elliott wrote:
>> Convert the functions that temporarily suppress RCU CPU
>> stall reporting:
>> rcu_sysrq_start()
>> rcu_sysrq_end()
>>
>> to more generic functions that may be called by functions
>> other than the SysRq handler:
>> rcu_suppress_start()
>> rcu_suppress_end()
>>
>> Covert the underlying variable:
>> rcu_cpu_stall_suppress
>>
>> to an atomic variable so multiple threads can manipulate it at the
>> same time, incrementing it in start() and decrementing in end().
>>
>> Expose that in sysfs in a read-only
>> /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress_dyn

Robert,

What is the use of exposing this read-only node?

I find it hard to believe anyone or any tool would be reading it during a scenario where stalls are required to be dynamically suppressed.

Or maybe I missed the point of this patch as I am late to the party.

Thanks,

- J


>>
>> Keep
>> /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress
>> as writeable by userspace, but OR that into the equations rather than
>> directly manipulate the atomic value.
>>
>> Signed-off-by: Robert Elliott <[email protected]>
>
> I really like the idea of making the suppressing and unsuppressing of
> RCU CPU stall warnings SMP-safe, thank you! Yes, as far as I know,
> there have been no problems due to this, but accidents waiting to happen
> and all that.
>
> Some comments and questions below.
>
>> ---
>> .../RCU/Design/Requirements/Requirements.rst | 6 +++---
>> Documentation/RCU/stallwarn.rst | 19 +++++++++++++++----
>> arch/parisc/kernel/process.c | 2 +-
>> drivers/tty/sysrq.c | 4 ++--
>> include/linux/rcupdate.h | 8 ++++----
>> kernel/rcu/rcu.h | 11 ++++++-----
>> kernel/rcu/tree_stall.h | 18 ++++++++++--------
>> kernel/rcu/update.c | 11 ++++++++++-
>> 8 files changed, 51 insertions(+), 28 deletions(-)
>>
>> diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
>> index 49387d823619..5083490bb6fc 100644
>> --- a/Documentation/RCU/Design/Requirements/Requirements.rst
>> +++ b/Documentation/RCU/Design/Requirements/Requirements.rst
>> @@ -1639,9 +1639,9 @@ against mishaps and misuse:
>> ``rcupdate.rcu_cpu_stall_suppress`` to suppress the splats. This
>> kernel parameter may also be set via ``sysfs``. Furthermore, RCU CPU
>> stall warnings are counter-productive during sysrq dumps and during
>> - panics. RCU therefore supplies the rcu_sysrq_start() and
>> - rcu_sysrq_end() API members to be called before and after long
>> - sysrq dumps. RCU also supplies the rcu_panic() notifier that is
>> + panics. RCU therefore supplies the rcu_suppress_start() and
>> + rcu_suppress_end() API members to be called before and after long
>> + CPU usage. RCU also supplies the rcu_panic() notifier that is
>> automatically invoked at the beginning of a panic to suppress further
>> RCU CPU stall warnings.
>>
>> diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
>> index e38c587067fc..9e6fba72f56d 100644
>> --- a/Documentation/RCU/stallwarn.rst
>> +++ b/Documentation/RCU/stallwarn.rst
>> @@ -135,13 +135,24 @@ see include/trace/events/rcu.h.
>> Fine-Tuning the RCU CPU Stall Detector
>> ======================================
>>
>> -The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
>> -CPU stall detector, which detects conditions that unduly delay RCU grace
>> -periods. This module parameter enables CPU stall detection by default,
>> -but may be overridden via boot-time parameter or at runtime via sysfs.
>> +RCU's CPU stall detector detects conditions that unduly delay RCU grace
>> +periods. CPU stall detection is enabled by default, but may be overridden
>> +via boot-time parameter or at runtime via sysfs (accessible via
>> +/sys/module/rcupdate/parameters).
>> +
>> +The rcupdate.rcu_cpu_stall_suppress module parameter disables RCU's
>> +CPU stall detector.
>> +
>> +/sys/module/rcu_cpu_stall_suppress_dyn reports an internal semaphore
>
> Actually an atomically updated variable as opposed to a semaphore,
> correct? Replacing "an internal semaphore" with something like "a
> variable" would be fine from my viewpoint.
>
>> +used by the RCU's CPU stall detector. Functions holding a CPU for a
>> +long time (e.g., sysrq printouts) increment this value before use
>> +and decrement it when done, so the value reports the number of
>> +functions currently disabling stalls.
>> +
>> The stall detector's idea of what constitutes "unduly delayed" is
>> controlled by a set of kernel configuration variables and cpp macros:
>>
>> +
>> CONFIG_RCU_CPU_STALL_TIMEOUT
>> ----------------------------
>
> And thank you for updating the documentation!
>
>> diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
>> index c4f8374c7018..038378fe7bfa 100644
>> --- a/arch/parisc/kernel/process.c
>> +++ b/arch/parisc/kernel/process.c
>> @@ -126,7 +126,7 @@ void machine_power_off(void)
>> "Please power this system off now.");
>>
>> /* prevent soft lockup/stalled CPU messages for endless loop. */
>> - rcu_sysrq_start();
>> + rcu_suppress_start();
>> lockup_detector_soft_poweroff();
>> for (;;);
>> }
>> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
>> index d2b2720db6ca..81ab63a473a7 100644
>> --- a/drivers/tty/sysrq.c
>> +++ b/drivers/tty/sysrq.c
>> @@ -579,7 +579,7 @@ void __handle_sysrq(int key, bool check_mask)
>> orig_suppress_printk = suppress_printk;
>> suppress_printk = 0;
>>
>> - rcu_sysrq_start();
>> + rcu_suppress_start();
>> rcu_read_lock();
>> /*
>> * Raise the apparent loglevel to maximum so that the sysrq header
>> @@ -623,7 +623,7 @@ void __handle_sysrq(int key, bool check_mask)
>> console_loglevel = orig_log_level;
>> }
>> rcu_read_unlock();
>> - rcu_sysrq_end();
>> + rcu_suppress_end();
>>
>> suppress_printk = orig_suppress_printk;
>> }
>> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
>> index 03abf883a281..c0d8a4aae7ff 100644
>> --- a/include/linux/rcupdate.h
>> +++ b/include/linux/rcupdate.h
>> @@ -131,11 +131,11 @@ static inline void rcu_init_tasks_generic(void) { }
>> #endif
>>
>> #ifdef CONFIG_RCU_STALL_COMMON
>> -void rcu_sysrq_start(void);
>> -void rcu_sysrq_end(void);
>> +void rcu_suppress_start(void);
>> +void rcu_suppress_end(void);
>> #else /* #ifdef CONFIG_RCU_STALL_COMMON */
>> -static inline void rcu_sysrq_start(void) { }
>> -static inline void rcu_sysrq_end(void) { }
>> +static inline void rcu_suppress_start(void) { }
>> +static inline void rcu_suppress_end(void) { }
>> #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
>>
>> #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
>> index c5aa934de59b..a658955a1174 100644
>> --- a/kernel/rcu/rcu.h
>> +++ b/kernel/rcu/rcu.h
>> @@ -224,24 +224,25 @@ extern int rcu_cpu_stall_ftrace_dump;
>> extern int rcu_cpu_stall_suppress;
>> extern int rcu_cpu_stall_timeout;
>> extern int rcu_exp_cpu_stall_timeout;
>> +extern atomic_t rcu_cpu_stall_suppress_dyn;
>> int rcu_jiffies_till_stall_check(void);
>> int rcu_exp_jiffies_till_stall_check(void);
>>
>> static inline bool rcu_stall_is_suppressed(void)
>> {
>> - return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress;
>> + return rcu_stall_is_suppressed_at_boot() ||
>> + rcu_cpu_stall_suppress ||
>> + atomic_read(&rcu_cpu_stall_suppress_dyn);
>> }
>>
>> #define rcu_ftrace_dump_stall_suppress() \
>> do { \
>> - if (!rcu_cpu_stall_suppress) \
>> - rcu_cpu_stall_suppress = 3; \
>
> One thing we are losing here is the ability to see what is suppressing
> the current stall, for example, from a crash dump. Maybe that is OK,
> as I haven't needed to debug that sort of thing.
>
> Thoughts from those who have had this debugging experience?
>
>> + atomic_inc(&rcu_cpu_stall_suppress_dyn); \
>> } while (0)
>>
>> #define rcu_ftrace_dump_stall_unsuppress() \
>> do { \
>> - if (rcu_cpu_stall_suppress == 3) \
>> - rcu_cpu_stall_suppress = 0; \
>> + atomic_dec(&rcu_cpu_stall_suppress_dyn); \
>> } while (0)
>>
>> #else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
>> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
>> index 5653560573e2..260748bc5bc8 100644
>> --- a/kernel/rcu/tree_stall.h
>> +++ b/kernel/rcu/tree_stall.h
>> @@ -103,23 +103,25 @@ bool rcu_gp_might_be_stalled(void)
>> return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
>> }
>>
>> -/* Don't do RCU CPU stall warnings during long sysrq printouts. */
>> -void rcu_sysrq_start(void)
>> +/* Don't do RCU CPU stall warnings during functions holding the CPU
>> + * for a long period of time such as long sysrq printouts.
>> + */
>> +void rcu_suppress_start(void)
>> {
>> - if (!rcu_cpu_stall_suppress)
>> - rcu_cpu_stall_suppress = 2;
>
> And the same point here.
>
>> + atomic_inc(&rcu_cpu_stall_suppress_dyn);
>> }
>> +EXPORT_SYMBOL_GPL(rcu_suppress_start);
>
> If this is being exported to modules, the question of who suppressed
> the CPU stalls might at some point become more urgent.
>
> If the problem was reproducible, I would simply attach a BPF program to
> rcu_suppress_start() and rcu_suppress_end() counting the stack traces of
> all callers to these functions. This might or might not make everyone
> happy, though.
>
>> -void rcu_sysrq_end(void)
>> +void rcu_suppress_end(void)
>> {
>> - if (rcu_cpu_stall_suppress == 2)
>> - rcu_cpu_stall_suppress = 0;
>> + atomic_dec(&rcu_cpu_stall_suppress_dyn);
>> }
>> +EXPORT_SYMBOL_GPL(rcu_suppress_end);
>>
>> /* Don't print RCU CPU stall warnings during a kernel panic. */
>> static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
>> {
>> - rcu_cpu_stall_suppress = 1;
>> + atomic_inc(&rcu_cpu_stall_suppress_dyn);
>> return NOTIFY_DONE;
>> }
>>
>> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
>> index f5e6a2f95a2a..ceee9d777384 100644
>> --- a/kernel/rcu/update.c
>> +++ b/kernel/rcu/update.c
>> @@ -501,11 +501,18 @@ EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
>> #ifdef CONFIG_RCU_STALL_COMMON
>> int rcu_cpu_stall_ftrace_dump __read_mostly;
>> module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
>> -int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
>> +
>> +int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings from sysfs
>> EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
>> module_param(rcu_cpu_stall_suppress, int, 0644);
>> +
>> +atomic_t rcu_cpu_stall_suppress_dyn __read_mostly; // !0 = suppress stall warnings from functions
>> +EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_dyn);
>> +module_param_named(rcu_cpu_stall_suppress_dyn, rcu_cpu_stall_suppress_dyn.counter, int, 0444);
>
> I am not seeing a valid use case for specifying an initial
> value on the kernel command line. Or does this somehow prevent
> rcupdate.rcu_cpu_stall_suppress_dyn from being specified on the kernel
> command line?
>
> If something like rcupdate.rcu_cpu_stall_suppress_dyn=3 can be specified
> (incorrectly, in my current view) on the kernel command line, maybe
> something as shown below would help?
>
>> +
>> int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
>> module_param(rcu_cpu_stall_timeout, int, 0644);
>> +
>> int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
>> module_param(rcu_exp_cpu_stall_timeout, int, 0644);
>> #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
>> @@ -616,6 +623,8 @@ void __init rcupdate_announce_bootup_oddness(void)
>> pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
>> if (rcu_cpu_stall_suppress)
>> pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
>> + if (atomic_read(&rcu_cpu_stall_suppress_dyn))
>> + pr_info("\tDynamic RCU CPU stall warnings suppressed (rcu_cpu_stall_suppress_dyn).\n");
>
> Should this instead be a WARN_ON() placed somewhere so that it won't
> mess up the printing of the other parameters?
>
> Or maybe have this code set it to back to zero, with the message
> indicating that this is being done?
>
>> if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
>> pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
>> rcu_tasks_bootup_oddness();
>> --
>> 2.38.1
>>

2023-01-10 06:20:22

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH 1/3] rcu: genericize RCU stall suppression functions



> On Jan 10, 2023, at 1:07 AM, Joel Fernandes <[email protected]> wrote:
>
> 
>
>>> On Dec 21, 2022, at 1:56 PM, Paul E. McKenney <[email protected]> wrote:
>>>
>>> On Mon, Dec 19, 2022 at 02:29:08PM -0600, Robert Elliott wrote:
>>> Convert the functions that temporarily suppress RCU CPU
>>> stall reporting:
>>> rcu_sysrq_start()
>>> rcu_sysrq_end()
>>>
>>> to more generic functions that may be called by functions
>>> other than the SysRq handler:
>>> rcu_suppress_start()
>>> rcu_suppress_end()
>>>
>>> Covert the underlying variable:
>>> rcu_cpu_stall_suppress
>>>
>>> to an atomic variable so multiple threads can manipulate it at the
>>> same time, incrementing it in start() and decrementing in end().
>>>
>>> Expose that in sysfs in a read-only
>>> /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress_dyn
>
> Robert,
>
> What is the use of exposing this read-only node?
>
> I find it hard to believe anyone or any tool would be reading it during a scenario where stalls are required to be dynamically suppressed.
>
> Or maybe I missed the point of this patch as I am late to the party.

I was hasty to not be clear — I have no problem with the idea of dynamic suppression, just missing the point about the read only infomercial via sysfs node. More kernel memory wasted in sysfs however small :-).

Thanks,

- J

>
> Thanks,
>
> - J
>
>
>>>
>>> Keep
>>> /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress
>>> as writeable by userspace, but OR that into the equations rather than
>>> directly manipulate the atomic value.
>>>
>>> Signed-off-by: Robert Elliott <[email protected]>
>>
>> I really like the idea of making the suppressing and unsuppressing of
>> RCU CPU stall warnings SMP-safe, thank you! Yes, as far as I know,
>> there have been no problems due to this, but accidents waiting to happen
>> and all that.
>>
>> Some comments and questions below.
>>
>>> ---
>>> .../RCU/Design/Requirements/Requirements.rst | 6 +++---
>>> Documentation/RCU/stallwarn.rst | 19 +++++++++++++++----
>>> arch/parisc/kernel/process.c | 2 +-
>>> drivers/tty/sysrq.c | 4 ++--
>>> include/linux/rcupdate.h | 8 ++++----
>>> kernel/rcu/rcu.h | 11 ++++++-----
>>> kernel/rcu/tree_stall.h | 18 ++++++++++--------
>>> kernel/rcu/update.c | 11 ++++++++++-
>>> 8 files changed, 51 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
>>> index 49387d823619..5083490bb6fc 100644
>>> --- a/Documentation/RCU/Design/Requirements/Requirements.rst
>>> +++ b/Documentation/RCU/Design/Requirements/Requirements.rst
>>> @@ -1639,9 +1639,9 @@ against mishaps and misuse:
>>> ``rcupdate.rcu_cpu_stall_suppress`` to suppress the splats. This
>>> kernel parameter may also be set via ``sysfs``. Furthermore, RCU CPU
>>> stall warnings are counter-productive during sysrq dumps and during
>>> - panics. RCU therefore supplies the rcu_sysrq_start() and
>>> - rcu_sysrq_end() API members to be called before and after long
>>> - sysrq dumps. RCU also supplies the rcu_panic() notifier that is
>>> + panics. RCU therefore supplies the rcu_suppress_start() and
>>> + rcu_suppress_end() API members to be called before and after long
>>> + CPU usage. RCU also supplies the rcu_panic() notifier that is
>>> automatically invoked at the beginning of a panic to suppress further
>>> RCU CPU stall warnings.
>>>
>>> diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
>>> index e38c587067fc..9e6fba72f56d 100644
>>> --- a/Documentation/RCU/stallwarn.rst
>>> +++ b/Documentation/RCU/stallwarn.rst
>>> @@ -135,13 +135,24 @@ see include/trace/events/rcu.h.
>>> Fine-Tuning the RCU CPU Stall Detector
>>> ======================================
>>>
>>> -The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's
>>> -CPU stall detector, which detects conditions that unduly delay RCU grace
>>> -periods. This module parameter enables CPU stall detection by default,
>>> -but may be overridden via boot-time parameter or at runtime via sysfs.
>>> +RCU's CPU stall detector detects conditions that unduly delay RCU grace
>>> +periods. CPU stall detection is enabled by default, but may be overridden
>>> +via boot-time parameter or at runtime via sysfs (accessible via
>>> +/sys/module/rcupdate/parameters).
>>> +
>>> +The rcupdate.rcu_cpu_stall_suppress module parameter disables RCU's
>>> +CPU stall detector.
>>> +
>>> +/sys/module/rcu_cpu_stall_suppress_dyn reports an internal semaphore
>>
>> Actually an atomically updated variable as opposed to a semaphore,
>> correct? Replacing "an internal semaphore" with something like "a
>> variable" would be fine from my viewpoint.
>>
>>> +used by the RCU's CPU stall detector. Functions holding a CPU for a
>>> +long time (e.g., sysrq printouts) increment this value before use
>>> +and decrement it when done, so the value reports the number of
>>> +functions currently disabling stalls.
>>> +
>>> The stall detector's idea of what constitutes "unduly delayed" is
>>> controlled by a set of kernel configuration variables and cpp macros:
>>>
>>> +
>>> CONFIG_RCU_CPU_STALL_TIMEOUT
>>> ----------------------------
>>
>> And thank you for updating the documentation!
>>
>>> diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
>>> index c4f8374c7018..038378fe7bfa 100644
>>> --- a/arch/parisc/kernel/process.c
>>> +++ b/arch/parisc/kernel/process.c
>>> @@ -126,7 +126,7 @@ void machine_power_off(void)
>>> "Please power this system off now.");
>>>
>>> /* prevent soft lockup/stalled CPU messages for endless loop. */
>>> - rcu_sysrq_start();
>>> + rcu_suppress_start();
>>> lockup_detector_soft_poweroff();
>>> for (;;);
>>> }
>>> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
>>> index d2b2720db6ca..81ab63a473a7 100644
>>> --- a/drivers/tty/sysrq.c
>>> +++ b/drivers/tty/sysrq.c
>>> @@ -579,7 +579,7 @@ void __handle_sysrq(int key, bool check_mask)
>>> orig_suppress_printk = suppress_printk;
>>> suppress_printk = 0;
>>>
>>> - rcu_sysrq_start();
>>> + rcu_suppress_start();
>>> rcu_read_lock();
>>> /*
>>> * Raise the apparent loglevel to maximum so that the sysrq header
>>> @@ -623,7 +623,7 @@ void __handle_sysrq(int key, bool check_mask)
>>> console_loglevel = orig_log_level;
>>> }
>>> rcu_read_unlock();
>>> - rcu_sysrq_end();
>>> + rcu_suppress_end();
>>>
>>> suppress_printk = orig_suppress_printk;
>>> }
>>> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
>>> index 03abf883a281..c0d8a4aae7ff 100644
>>> --- a/include/linux/rcupdate.h
>>> +++ b/include/linux/rcupdate.h
>>> @@ -131,11 +131,11 @@ static inline void rcu_init_tasks_generic(void) { }
>>> #endif
>>>
>>> #ifdef CONFIG_RCU_STALL_COMMON
>>> -void rcu_sysrq_start(void);
>>> -void rcu_sysrq_end(void);
>>> +void rcu_suppress_start(void);
>>> +void rcu_suppress_end(void);
>>> #else /* #ifdef CONFIG_RCU_STALL_COMMON */
>>> -static inline void rcu_sysrq_start(void) { }
>>> -static inline void rcu_sysrq_end(void) { }
>>> +static inline void rcu_suppress_start(void) { }
>>> +static inline void rcu_suppress_end(void) { }
>>> #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
>>>
>>> #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK))
>>> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
>>> index c5aa934de59b..a658955a1174 100644
>>> --- a/kernel/rcu/rcu.h
>>> +++ b/kernel/rcu/rcu.h
>>> @@ -224,24 +224,25 @@ extern int rcu_cpu_stall_ftrace_dump;
>>> extern int rcu_cpu_stall_suppress;
>>> extern int rcu_cpu_stall_timeout;
>>> extern int rcu_exp_cpu_stall_timeout;
>>> +extern atomic_t rcu_cpu_stall_suppress_dyn;
>>> int rcu_jiffies_till_stall_check(void);
>>> int rcu_exp_jiffies_till_stall_check(void);
>>>
>>> static inline bool rcu_stall_is_suppressed(void)
>>> {
>>> - return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress;
>>> + return rcu_stall_is_suppressed_at_boot() ||
>>> + rcu_cpu_stall_suppress ||
>>> + atomic_read(&rcu_cpu_stall_suppress_dyn);
>>> }
>>>
>>> #define rcu_ftrace_dump_stall_suppress() \
>>> do { \
>>> - if (!rcu_cpu_stall_suppress) \
>>> - rcu_cpu_stall_suppress = 3; \
>>
>> One thing we are losing here is the ability to see what is suppressing
>> the current stall, for example, from a crash dump. Maybe that is OK,
>> as I haven't needed to debug that sort of thing.
>>
>> Thoughts from those who have had this debugging experience?
>>
>>> + atomic_inc(&rcu_cpu_stall_suppress_dyn); \
>>> } while (0)
>>>
>>> #define rcu_ftrace_dump_stall_unsuppress() \
>>> do { \
>>> - if (rcu_cpu_stall_suppress == 3) \
>>> - rcu_cpu_stall_suppress = 0; \
>>> + atomic_dec(&rcu_cpu_stall_suppress_dyn); \
>>> } while (0)
>>>
>>> #else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
>>> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
>>> index 5653560573e2..260748bc5bc8 100644
>>> --- a/kernel/rcu/tree_stall.h
>>> +++ b/kernel/rcu/tree_stall.h
>>> @@ -103,23 +103,25 @@ bool rcu_gp_might_be_stalled(void)
>>> return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
>>> }
>>>
>>> -/* Don't do RCU CPU stall warnings during long sysrq printouts. */
>>> -void rcu_sysrq_start(void)
>>> +/* Don't do RCU CPU stall warnings during functions holding the CPU
>>> + * for a long period of time such as long sysrq printouts.
>>> + */
>>> +void rcu_suppress_start(void)
>>> {
>>> - if (!rcu_cpu_stall_suppress)
>>> - rcu_cpu_stall_suppress = 2;
>>
>> And the same point here.
>>
>>> + atomic_inc(&rcu_cpu_stall_suppress_dyn);
>>> }
>>> +EXPORT_SYMBOL_GPL(rcu_suppress_start);
>>
>> If this is being exported to modules, the question of who suppressed
>> the CPU stalls might at some point become more urgent.
>>
>> If the problem was reproducible, I would simply attach a BPF program to
>> rcu_suppress_start() and rcu_suppress_end() counting the stack traces of
>> all callers to these functions. This might or might not make everyone
>> happy, though.
>>
>>> -void rcu_sysrq_end(void)
>>> +void rcu_suppress_end(void)
>>> {
>>> - if (rcu_cpu_stall_suppress == 2)
>>> - rcu_cpu_stall_suppress = 0;
>>> + atomic_dec(&rcu_cpu_stall_suppress_dyn);
>>> }
>>> +EXPORT_SYMBOL_GPL(rcu_suppress_end);
>>>
>>> /* Don't print RCU CPU stall warnings during a kernel panic. */
>>> static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
>>> {
>>> - rcu_cpu_stall_suppress = 1;
>>> + atomic_inc(&rcu_cpu_stall_suppress_dyn);
>>> return NOTIFY_DONE;
>>> }
>>>
>>> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
>>> index f5e6a2f95a2a..ceee9d777384 100644
>>> --- a/kernel/rcu/update.c
>>> +++ b/kernel/rcu/update.c
>>> @@ -501,11 +501,18 @@ EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
>>> #ifdef CONFIG_RCU_STALL_COMMON
>>> int rcu_cpu_stall_ftrace_dump __read_mostly;
>>> module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
>>> -int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
>>> +
>>> +int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings from sysfs
>>> EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
>>> module_param(rcu_cpu_stall_suppress, int, 0644);
>>> +
>>> +atomic_t rcu_cpu_stall_suppress_dyn __read_mostly; // !0 = suppress stall warnings from functions
>>> +EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress_dyn);
>>> +module_param_named(rcu_cpu_stall_suppress_dyn, rcu_cpu_stall_suppress_dyn.counter, int, 0444);
>>
>> I am not seeing a valid use case for specifying an initial
>> value on the kernel command line. Or does this somehow prevent
>> rcupdate.rcu_cpu_stall_suppress_dyn from being specified on the kernel
>> command line?
>>
>> If something like rcupdate.rcu_cpu_stall_suppress_dyn=3 can be specified
>> (incorrectly, in my current view) on the kernel command line, maybe
>> something as shown below would help?
>>
>>> +
>>> int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
>>> module_param(rcu_cpu_stall_timeout, int, 0644);
>>> +
>>> int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
>>> module_param(rcu_exp_cpu_stall_timeout, int, 0644);
>>> #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
>>> @@ -616,6 +623,8 @@ void __init rcupdate_announce_bootup_oddness(void)
>>> pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
>>> if (rcu_cpu_stall_suppress)
>>> pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
>>> + if (atomic_read(&rcu_cpu_stall_suppress_dyn))
>>> + pr_info("\tDynamic RCU CPU stall warnings suppressed (rcu_cpu_stall_suppress_dyn).\n");
>>
>> Should this instead be a WARN_ON() placed somewhere so that it won't
>> mess up the printing of the other parameters?
>>
>> Or maybe have this code set it to back to zero, with the message
>> indicating that this is being done?
>>
>>> if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
>>> pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
>>> rcu_tasks_bootup_oddness();
>>> --
>>> 2.38.1
>>>