Hello!
This series removes CSD-lock diagnostics that were once very useful
but which have not seen much action since. It also adjusts Kconfig and
kernel-boot-parameter setup.
1. locking/csd_lock: Add Kconfig option for csd_debug default.
2. locking/csd_lock: Remove added data from CSD lock debugging.
3. locking/csd_lock: Remove per-CPU data indirection from CSD
lock debugging.
4. kernel/smp: Make csdlock_debug= resettable.
Thanx, Paul
------------------------------------------------------------------------
Documentation/admin-guide/kernel-parameters.txt | 17 -
b/Documentation/admin-guide/kernel-parameters.txt | 6
b/kernel/smp.c | 2
b/lib/Kconfig.debug | 9
kernel/smp.c | 260 ++--------------------
5 files changed, 47 insertions(+), 247 deletions(-)
It is currently possible to set the csdlock_debug_enabled static
branch, but not to reset it. This is an issue when several different
entities supply kernel boot parameters and also for kernels built with
CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT=y.
Therefore, make the csdlock_debug=0 kernel boot parameter turn off
debugging. Last one wins!
Reported-by: Jes Sorensen <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 13 +++++++------
kernel/smp.c | 11 ++++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b15198a85acb..5f2ec4b0f927 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -889,12 +889,13 @@
cs89x0_media= [HW,NET]
Format: { rj45 | aui | bnc }
- csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call
- handling. When switched on, additional debug data is
- printed to the console in case a hanging CPU is
- detected, and that CPU is pinged again in order to try
- to resolve the hang situation. The default value of
- this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
+ csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU
+ function call handling. When switched on,
+ additional debug data is printed to the console
+ in case a hanging CPU is detected, and that
+ CPU is pinged again in order to try to resolve
+ the hang situation. The default value of this
+ option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
Kconfig option.
dasd= [HW,NET]
diff --git a/kernel/smp.c b/kernel/smp.c
index 7a85bcddd9dc..298ba7570621 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -116,11 +116,16 @@ static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug
*/
static int __init csdlock_debug(char *str)
{
+ int ret;
unsigned int val = 0;
- get_option(&str, &val);
- if (val)
- static_branch_enable(&csdlock_debug_enabled);
+ ret = get_option(&str, &val);
+ if (ret) {
+ if (val)
+ static_branch_enable(&csdlock_debug_enabled);
+ else
+ static_branch_disable(&csdlock_debug_enabled);
+ }
return 1;
}
--
2.40.0.rc2
The csd_debug kernel parameter works well, but is inconvenient in cases
where it is more closely associated with boot loaders or automation than
with a particular kernel version or release. Thererfore, provide a new
CSD_LOCK_WAIT_DEBUG_DEFAULT Kconfig option that defaults csd_debug to
1 when selected and 0 otherwise, with this latter being the default.
Signed-off-by: Paul E. McKenney <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Ingo Molnar <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 6 ++++--
kernel/smp.c | 2 +-
lib/Kconfig.debug | 9 +++++++++
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6221a1d057dd..ce70777f5999 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -893,8 +893,10 @@
handling. When switched on, additional debug data is
printed to the console in case a hanging CPU is
detected, and that CPU is pinged again in order to try
- to resolve the hang situation.
- 0: disable csdlock debugging (default)
+ to resolve the hang situation. The default value of
+ this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
+ Kconfig option.
+ 0: disable csdlock debugging
1: enable basic csdlock debugging (minor impact)
ext: enable extended csdlock debugging (more impact,
but more data)
diff --git a/kernel/smp.c b/kernel/smp.c
index 06a413987a14..e2d558f5cef8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -158,7 +158,7 @@ void __init call_function_init(void)
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
+static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended);
static int __init csdlock_debug(char *str)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c8b379e2e9ad..e1b160a0474d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1480,6 +1480,15 @@ config CSD_LOCK_WAIT_DEBUG
include the IPI handler function currently executing (if any)
and relevant stack traces.
+config CSD_LOCK_WAIT_DEBUG_DEFAULT
+ bool "Default csd_lock_wait() debugging on at boot time"
+ depends on CSD_LOCK_WAIT_DEBUG
+ depends on 64BIT
+ default n
+ help
+ This option causes the csdlock_debug= kernel boot parameter to
+ default to 1 (basic debugging) instead of 0 (no debugging).
+
endmenu # lock debugging
config TRACE_IRQFLAGS
--
2.40.0.rc2
The diagnostics added by this commit were extremely useful in one instance:
a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
However, they have not seen much action since, and there have been some
concerns expressed that the complexity is not worth the benefit.
Therefore, manually revert the following commit preparatory commit:
de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
Signed-off-by: Paul E. McKenney <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
---
kernel/smp.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index 038d666f327b..7a85bcddd9dc 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -31,12 +31,8 @@
#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
-struct cfd_percpu {
- call_single_data_t csd;
-};
-
struct call_function_data {
- struct cfd_percpu __percpu *pcpu;
+ call_single_data_t __percpu *csd;
cpumask_var_t cpumask;
cpumask_var_t cpumask_ipi;
};
@@ -59,8 +55,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
free_cpumask_var(cfd->cpumask);
return -ENOMEM;
}
- cfd->pcpu = alloc_percpu(struct cfd_percpu);
- if (!cfd->pcpu) {
+ cfd->csd = alloc_percpu(call_single_data_t);
+ if (!cfd->csd) {
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
return -ENOMEM;
@@ -75,7 +71,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
- free_percpu(cfd->pcpu);
+ free_percpu(cfd->csd);
return 0;
}
@@ -730,7 +726,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
- call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+ call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
if (cond_func && !cond_func(cpu, info))
continue;
@@ -774,7 +770,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd;
- csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+ csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock_wait(csd);
}
}
--
2.40.0.rc2
The diagnostics added by this commit were extremely useful in one instance:
a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
However, they have not seen much action since, and there have been some
concerns expressed that the complexity is not worth the benefit.
Therefore, manually revert this commit, but leave a comment telling
people where to find these diagnostics.
[ paulmck: Apply Juergen Gross feedback. ]
Signed-off-by: Paul E. McKenney <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Jonathan Corbet <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 4 -
kernel/smp.c | 233 +-----------------
2 files changed, 12 insertions(+), 225 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ce70777f5999..b15198a85acb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -896,10 +896,6 @@
to resolve the hang situation. The default value of
this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
Kconfig option.
- 0: disable csdlock debugging
- 1: enable basic csdlock debugging (minor impact)
- ext: enable extended csdlock debugging (more impact,
- but more data)
dasd= [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
diff --git a/kernel/smp.c b/kernel/smp.c
index e2d558f5cef8..038d666f327b 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -31,59 +31,8 @@
#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-union cfd_seq_cnt {
- u64 val;
- struct {
- u64 src:16;
- u64 dst:16;
-#define CFD_SEQ_NOCPU 0xffff
- u64 type:4;
-#define CFD_SEQ_QUEUE 0
-#define CFD_SEQ_IPI 1
-#define CFD_SEQ_NOIPI 2
-#define CFD_SEQ_PING 3
-#define CFD_SEQ_PINGED 4
-#define CFD_SEQ_HANDLE 5
-#define CFD_SEQ_DEQUEUE 6
-#define CFD_SEQ_IDLE 7
-#define CFD_SEQ_GOTIPI 8
-#define CFD_SEQ_HDLEND 9
- u64 cnt:28;
- } u;
-};
-
-static char *seq_type[] = {
- [CFD_SEQ_QUEUE] = "queue",
- [CFD_SEQ_IPI] = "ipi",
- [CFD_SEQ_NOIPI] = "noipi",
- [CFD_SEQ_PING] = "ping",
- [CFD_SEQ_PINGED] = "pinged",
- [CFD_SEQ_HANDLE] = "handle",
- [CFD_SEQ_DEQUEUE] = "dequeue (src CPU 0 == empty)",
- [CFD_SEQ_IDLE] = "idle",
- [CFD_SEQ_GOTIPI] = "gotipi",
- [CFD_SEQ_HDLEND] = "hdlend (src CPU 0 == early)",
-};
-
-struct cfd_seq_local {
- u64 ping;
- u64 pinged;
- u64 handle;
- u64 dequeue;
- u64 idle;
- u64 gotipi;
- u64 hdlend;
-};
-#endif
-
struct cfd_percpu {
call_single_data_t csd;
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
- u64 seq_queue;
- u64 seq_ipi;
- u64 seq_noipi;
-#endif
};
struct call_function_data {
@@ -159,18 +108,21 @@ void __init call_function_init(void)
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
-static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended);
+/*
+ * Parse the csdlock_debug= kernel boot parameter.
+ *
+ * If you need to restore the old "ext" value that once provided
+ * additional debugging information, reapply the following commits:
+ *
+ * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
+ * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
+ */
static int __init csdlock_debug(char *str)
{
unsigned int val = 0;
- if (str && !strcmp(str, "ext")) {
- val = 1;
- static_branch_enable(&csdlock_debug_extended);
- } else
- get_option(&str, &val);
-
+ get_option(&str, &val);
if (val)
static_branch_enable(&csdlock_debug_enabled);
@@ -181,36 +133,11 @@ __setup("csdlock_debug=", csdlock_debug);
static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
static DEFINE_PER_CPU(void *, cur_csd_info);
-static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
module_param(csd_lock_timeout, ulong, 0444);
static atomic_t csd_bug_count = ATOMIC_INIT(0);
-static u64 cfd_seq;
-
-#define CFD_SEQ(s, d, t, c) \
- (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c }
-
-static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type)
-{
- union cfd_seq_cnt new, old;
-
- new = CFD_SEQ(src, dst, type, 0);
-
- do {
- old.val = READ_ONCE(cfd_seq);
- new.u.cnt = old.u.cnt + 1;
- } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val);
-
- return old.val;
-}
-
-#define cfd_seq_store(var, src, dst, type) \
- do { \
- if (static_branch_unlikely(&csdlock_debug_extended)) \
- var = cfd_seq_inc(src, dst, type); \
- } while (0)
/* Record current CSD work for current CPU, NULL to erase. */
static void __csd_lock_record(struct __call_single_data *csd)
@@ -244,80 +171,6 @@ static int csd_lock_wait_getcpu(struct __call_single_data *csd)
return -1;
}
-static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst,
- unsigned int type, union cfd_seq_cnt *data,
- unsigned int *n_data, unsigned int now)
-{
- union cfd_seq_cnt new[2];
- unsigned int i, j, k;
-
- new[0].val = val;
- new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1);
-
- for (i = 0; i < 2; i++) {
- if (new[i].u.cnt <= now)
- new[i].u.cnt |= 0x80000000U;
- for (j = 0; j < *n_data; j++) {
- if (new[i].u.cnt == data[j].u.cnt) {
- /* Direct read value trumps generated one. */
- if (i == 0)
- data[j].val = new[i].val;
- break;
- }
- if (new[i].u.cnt < data[j].u.cnt) {
- for (k = *n_data; k > j; k--)
- data[k].val = data[k - 1].val;
- data[j].val = new[i].val;
- (*n_data)++;
- break;
- }
- }
- if (j == *n_data) {
- data[j].val = new[i].val;
- (*n_data)++;
- }
- }
-}
-
-static const char *csd_lock_get_type(unsigned int type)
-{
- return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type];
-}
-
-static void csd_lock_print_extended(struct __call_single_data *csd, int cpu)
-{
- struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu);
- unsigned int srccpu = csd->node.src;
- struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu);
- struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
- unsigned int now;
- union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)];
- unsigned int n_data = 0, i;
-
- data[0].val = READ_ONCE(cfd_seq);
- now = data[0].u.cnt;
-
- cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu, CFD_SEQ_QUEUE, data, &n_data, now);
- cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu, CFD_SEQ_IPI, data, &n_data, now);
- cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu, CFD_SEQ_NOIPI, data, &n_data, now);
-
- cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now);
- cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now);
-
- cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_IDLE, data, &n_data, now);
- cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu, CFD_SEQ_GOTIPI, data, &n_data, now);
- cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HANDLE, data, &n_data, now);
- cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, CFD_SEQ_DEQUEUE, data, &n_data, now);
- cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HDLEND, data, &n_data, now);
-
- for (i = 0; i < n_data; i++) {
- pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n",
- data[i].u.cnt & ~0x80000000U, data[i].u.src,
- data[i].u.dst, csd_lock_get_type(data[i].u.type));
- }
- pr_alert("\tcsd: cnt now: %07x\n", now);
-}
-
/*
* Complain if too much time spent waiting. Note that only
* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
@@ -368,8 +221,6 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
*bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
}
if (cpu >= 0) {
- if (static_branch_unlikely(&csdlock_debug_extended))
- csd_lock_print_extended(csd, cpu);
dump_cpu_task(cpu);
if (!cpu_cur_csd) {
pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
@@ -412,27 +263,7 @@ static __always_inline void csd_lock_wait(struct __call_single_data *csd)
smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
}
-
-static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
-{
- unsigned int this_cpu = smp_processor_id();
- struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local);
- struct call_function_data *cfd = this_cpu_ptr(&cfd_data);
- struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
-
- cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
- if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
- cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
- cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING);
- send_call_function_single_ipi(cpu);
- cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED);
- } else {
- cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
- }
-}
#else
-#define cfd_seq_store(var, src, dst, type)
-
static void csd_lock_record(struct __call_single_data *csd)
{
}
@@ -470,19 +301,6 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
void __smp_call_single_queue(int cpu, struct llist_node *node)
{
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
- if (static_branch_unlikely(&csdlock_debug_extended)) {
- unsigned int type;
-
- type = CSD_TYPE(container_of(node, call_single_data_t,
- node.llist));
- if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) {
- __smp_call_single_queue_debug(cpu, node);
- return;
- }
- }
-#endif
-
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
@@ -541,8 +359,6 @@ static int generic_exec_single(int cpu, struct __call_single_data *csd)
*/
void generic_smp_call_function_single_interrupt(void)
{
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_GOTIPI);
__flush_smp_call_function_queue(true);
}
@@ -570,13 +386,7 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
lockdep_assert_irqs_disabled();
head = this_cpu_ptr(&call_single_queue);
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_HANDLE);
entry = llist_del_all(head);
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue,
- /* Special meaning of source cpu: 0 == queue empty */
- entry ? CFD_SEQ_NOCPU : 0,
- smp_processor_id(), CFD_SEQ_DEQUEUE);
entry = llist_reverse_order(entry);
/* There shouldn't be any pending callbacks on an offline CPU. */
@@ -635,12 +445,8 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
}
}
- if (!entry) {
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend,
- 0, smp_processor_id(),
- CFD_SEQ_HDLEND);
+ if (!entry)
return;
- }
/*
* Second; run all !SYNC callbacks.
@@ -678,9 +484,6 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
*/
if (entry)
sched_ttwu_pending(entry);
-
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_HDLEND);
}
@@ -704,8 +507,6 @@ void flush_smp_call_function_queue(void)
if (llist_empty(this_cpu_ptr(&call_single_queue)))
return;
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_IDLE);
local_irq_save(flags);
/* Get the already pending soft interrupts for RT enabled kernels */
was_pending = local_softirq_pending();
@@ -929,8 +730,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
- struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
- call_single_data_t *csd = &pcpu->csd;
+ call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
if (cond_func && !cond_func(cpu, info))
continue;
@@ -944,20 +744,13 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
csd->node.src = smp_processor_id();
csd->node.dst = cpu;
#endif
- cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
nr_cpus++;
last_cpu = cpu;
-
- cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
- } else {
- cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
}
}
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING);
-
/*
* Choose the most efficient way to send an IPI. Note that the
* number of CPUs might be zero due to concurrent changes to the
@@ -967,8 +760,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
send_call_function_single_ipi(last_cpu);
else if (likely(nr_cpus > 1))
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
-
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
}
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
--
2.40.0.rc2
On 21.03.23 01:54, Paul E. McKenney wrote:
> Hello!
>
> This series removes CSD-lock diagnostics that were once very useful
> but which have not seen much action since. It also adjusts Kconfig and
> kernel-boot-parameter setup.
>
> 1. locking/csd_lock: Add Kconfig option for csd_debug default.
>
> 2. locking/csd_lock: Remove added data from CSD lock debugging.
>
> 3. locking/csd_lock: Remove per-CPU data indirection from CSD
> lock debugging.
>
> 4. kernel/smp: Make csdlock_debug= resettable.
>
> Thanx, Paul
>
> ------------------------------------------------------------------------
>
> Documentation/admin-guide/kernel-parameters.txt | 17 -
> b/Documentation/admin-guide/kernel-parameters.txt | 6
> b/kernel/smp.c | 2
> b/lib/Kconfig.debug | 9
> kernel/smp.c | 260 ++--------------------
> 5 files changed, 47 insertions(+), 247 deletions(-)
For the series:
Acked-by: Juergen Gross <[email protected]>
Juergen
On Mon, Mar 20, 2023 at 05:54:39PM -0700, Paul E. McKenney wrote:
> Hello!
>
> This series removes CSD-lock diagnostics that were once very useful
> but which have not seen much action since. It also adjusts Kconfig and
> kernel-boot-parameter setup.
>
> 1. locking/csd_lock: Add Kconfig option for csd_debug default.
>
> 2. locking/csd_lock: Remove added data from CSD lock debugging.
>
> 3. locking/csd_lock: Remove per-CPU data indirection from CSD
> lock debugging.
>
> 4. kernel/smp: Make csdlock_debug= resettable.
>
> Thanx, Paul
>
> ------------------------------------------------------------------------
>
> Documentation/admin-guide/kernel-parameters.txt | 17 -
> b/Documentation/admin-guide/kernel-parameters.txt | 6
> b/kernel/smp.c | 2
> b/lib/Kconfig.debug | 9
> kernel/smp.c | 260 ++--------------------
> 5 files changed, 47 insertions(+), 247 deletions(-)
Yay!! How do you want to route these, should I take them through tip?
What about the rest of the thing? Your commits seem to suggest it's
still actually used -- why? Are there still more virt bugs?
On Tue, Mar 21, 2023 at 11:22:20AM +0100, Peter Zijlstra wrote:
> On Mon, Mar 20, 2023 at 05:54:39PM -0700, Paul E. McKenney wrote:
> > Hello!
> >
> > This series removes CSD-lock diagnostics that were once very useful
> > but which have not seen much action since. It also adjusts Kconfig and
> > kernel-boot-parameter setup.
> >
> > 1. locking/csd_lock: Add Kconfig option for csd_debug default.
> >
> > 2. locking/csd_lock: Remove added data from CSD lock debugging.
> >
> > 3. locking/csd_lock: Remove per-CPU data indirection from CSD
> > lock debugging.
> >
> > 4. kernel/smp: Make csdlock_debug= resettable.
> >
> > Thanx, Paul
> >
> > ------------------------------------------------------------------------
> >
> > Documentation/admin-guide/kernel-parameters.txt | 17 -
> > b/Documentation/admin-guide/kernel-parameters.txt | 6
> > b/kernel/smp.c | 2
> > b/lib/Kconfig.debug | 9
> > kernel/smp.c | 260 ++--------------------
> > 5 files changed, 47 insertions(+), 247 deletions(-)
>
> Yay!! How do you want to route these, should I take them through tip?
Either way works for me. If you take them into -tip, I will drop them
from -rcu. If you don't take them into -tip, I will send Linus a pull
request for the upcoming merge window. And if you take them at just
the wrong time, we will both send them to Linus. ;-)
Your choice!
> What about the rest of the thing? Your commits seem to suggest it's
> still actually used -- why? Are there still more virt bugs?
Thus far, no luck. I proposed ditching some of the stack traces, but
that got shot down.
These find the following issues: (1) CPU looping with interrupts
disabled. (2) CPU stuck in a longer-than-average SMI handler or other
firmware sand trap. (3) CPU fail stopped.
In theory, we could drop the RCU CPU stall warning to five seconds and
catch this same stuff. Unfortunately, in practice, there would need to
be lots of churn from CPUs looping with preemption disabled. Which we
still get from time to time even at 21 seconds.
NMIs can be used to deal with #1, and the hard lockup detector in fact
sort of does this. But these are not helpful for #2 and #3.
So nothing yet, but I am still looking for improved diagnostics.
Thanx, Paul
On Tue, Mar 21, 2023 at 08:38:50AM -0700, Paul E. McKenney wrote:
> On Tue, Mar 21, 2023 at 11:22:20AM +0100, Peter Zijlstra wrote:
> > On Mon, Mar 20, 2023 at 05:54:39PM -0700, Paul E. McKenney wrote:
> > > Hello!
> > >
> > > This series removes CSD-lock diagnostics that were once very useful
> > > but which have not seen much action since. It also adjusts Kconfig and
> > > kernel-boot-parameter setup.
> > >
> > > 1. locking/csd_lock: Add Kconfig option for csd_debug default.
> > >
> > > 2. locking/csd_lock: Remove added data from CSD lock debugging.
> > >
> > > 3. locking/csd_lock: Remove per-CPU data indirection from CSD
> > > lock debugging.
> > >
> > > 4. kernel/smp: Make csdlock_debug= resettable.
> > >
> > > Thanx, Paul
> > >
> > > ------------------------------------------------------------------------
> > >
> > > Documentation/admin-guide/kernel-parameters.txt | 17 -
> > > b/Documentation/admin-guide/kernel-parameters.txt | 6
> > > b/kernel/smp.c | 2
> > > b/lib/Kconfig.debug | 9
> > > kernel/smp.c | 260 ++--------------------
> > > 5 files changed, 47 insertions(+), 247 deletions(-)
> >
> > Yay!! How do you want to route these, should I take them through tip?
>
> Either way works for me. If you take them into -tip, I will drop them
> from -rcu. If you don't take them into -tip, I will send Linus a pull
> request for the upcoming merge window. And if you take them at just
> the wrong time, we will both send them to Linus. ;-)
>
> Your choice!
OK, since they conflict a wee bit with Valentin's IPI tracepoint stuff,
I'm probably going to take both these series, let me stomp on the
conflict and feed it to the robots to see how bad it all gets :-)
I'll let you know if/when i'll push them to -tip.
On Wed, Mar 22, 2023 at 10:57:19AM +0100, Peter Zijlstra wrote:
> On Tue, Mar 21, 2023 at 08:38:50AM -0700, Paul E. McKenney wrote:
> > On Tue, Mar 21, 2023 at 11:22:20AM +0100, Peter Zijlstra wrote:
> > > On Mon, Mar 20, 2023 at 05:54:39PM -0700, Paul E. McKenney wrote:
> > > > Hello!
> > > >
> > > > This series removes CSD-lock diagnostics that were once very useful
> > > > but which have not seen much action since. It also adjusts Kconfig and
> > > > kernel-boot-parameter setup.
> > > >
> > > > 1. locking/csd_lock: Add Kconfig option for csd_debug default.
> > > >
> > > > 2. locking/csd_lock: Remove added data from CSD lock debugging.
> > > >
> > > > 3. locking/csd_lock: Remove per-CPU data indirection from CSD
> > > > lock debugging.
> > > >
> > > > 4. kernel/smp: Make csdlock_debug= resettable.
> > > >
> > > > Thanx, Paul
> > > >
> > > > ------------------------------------------------------------------------
> > > >
> > > > Documentation/admin-guide/kernel-parameters.txt | 17 -
> > > > b/Documentation/admin-guide/kernel-parameters.txt | 6
> > > > b/kernel/smp.c | 2
> > > > b/lib/Kconfig.debug | 9
> > > > kernel/smp.c | 260 ++--------------------
> > > > 5 files changed, 47 insertions(+), 247 deletions(-)
> > >
> > > Yay!! How do you want to route these, should I take them through tip?
> >
> > Either way works for me. If you take them into -tip, I will drop them
> > from -rcu. If you don't take them into -tip, I will send Linus a pull
> > request for the upcoming merge window. And if you take them at just
> > the wrong time, we will both send them to Linus. ;-)
> >
> > Your choice!
>
> OK, since they conflict a wee bit with Valentin's IPI tracepoint stuff,
> I'm probably going to take both these series, let me stomp on the
> conflict and feed it to the robots to see how bad it all gets :-)
>
> I'll let you know if/when i'll push them to -tip.
Works for me! I will keep them in the meantime, until told otherwise
by tip-bot.
Thanx, Paul
The following commit has been merged into the smp/core branch of tip:
Commit-ID: 6366d062e7f97499409979f23f4107a6c45edb04
Gitweb: https://git.kernel.org/tip/6366d062e7f97499409979f23f4107a6c45edb04
Author: Paul E. McKenney <[email protected]>
AuthorDate: Mon, 20 Mar 2023 17:55:15 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 24 Mar 2023 11:01:26 +01:00
locking/csd_lock: Remove per-CPU data indirection from CSD lock debugging
The diagnostics added by this commit were extremely useful in one instance:
a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
However, they have not seen much action since, and there have been some
concerns expressed that the complexity is not worth the benefit.
Therefore, manually revert the following commit preparatory commit:
de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Juergen Gross <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/smp.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index 038d666..7a85bcd 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -31,12 +31,8 @@
#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
-struct cfd_percpu {
- call_single_data_t csd;
-};
-
struct call_function_data {
- struct cfd_percpu __percpu *pcpu;
+ call_single_data_t __percpu *csd;
cpumask_var_t cpumask;
cpumask_var_t cpumask_ipi;
};
@@ -59,8 +55,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
free_cpumask_var(cfd->cpumask);
return -ENOMEM;
}
- cfd->pcpu = alloc_percpu(struct cfd_percpu);
- if (!cfd->pcpu) {
+ cfd->csd = alloc_percpu(call_single_data_t);
+ if (!cfd->csd) {
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
return -ENOMEM;
@@ -75,7 +71,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
- free_percpu(cfd->pcpu);
+ free_percpu(cfd->csd);
return 0;
}
@@ -730,7 +726,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
- call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+ call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
if (cond_func && !cond_func(cpu, info))
continue;
@@ -774,7 +770,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd;
- csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+ csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock_wait(csd);
}
}
The following commit has been merged into the smp/core branch of tip:
Commit-ID: c52198601695851622f361d3f16456e9fc857629
Gitweb: https://git.kernel.org/tip/c52198601695851622f361d3f16456e9fc857629
Author: Paul E. McKenney <[email protected]>
AuthorDate: Mon, 20 Mar 2023 17:55:13 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 24 Mar 2023 11:01:25 +01:00
locking/csd_lock: Add Kconfig option for csd_debug default
The csd_debug kernel parameter works well, but is inconvenient in cases
where it is more closely associated with boot loaders or automation than
with a particular kernel version or release. Thererfore, provide a new
CSD_LOCK_WAIT_DEBUG_DEFAULT Kconfig option that defaults csd_debug to
1 when selected and 0 otherwise, with this latter being the default.
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Juergen Gross <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
Documentation/admin-guide/kernel-parameters.txt | 6 ++++--
kernel/smp.c | 2 +-
lib/Kconfig.debug | 9 +++++++++
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6221a1d..ce70777 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -893,8 +893,10 @@
handling. When switched on, additional debug data is
printed to the console in case a hanging CPU is
detected, and that CPU is pinged again in order to try
- to resolve the hang situation.
- 0: disable csdlock debugging (default)
+ to resolve the hang situation. The default value of
+ this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
+ Kconfig option.
+ 0: disable csdlock debugging
1: enable basic csdlock debugging (minor impact)
ext: enable extended csdlock debugging (more impact,
but more data)
diff --git a/kernel/smp.c b/kernel/smp.c
index 06a4139..e2d558f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -158,7 +158,7 @@ void __init call_function_init(void)
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
+static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended);
static int __init csdlock_debug(char *str)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c8b379e..e1b160a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1480,6 +1480,15 @@ config CSD_LOCK_WAIT_DEBUG
include the IPI handler function currently executing (if any)
and relevant stack traces.
+config CSD_LOCK_WAIT_DEBUG_DEFAULT
+ bool "Default csd_lock_wait() debugging on at boot time"
+ depends on CSD_LOCK_WAIT_DEBUG
+ depends on 64BIT
+ default n
+ help
+ This option causes the csdlock_debug= kernel boot parameter to
+ default to 1 (basic debugging) instead of 0 (no debugging).
+
endmenu # lock debugging
config TRACE_IRQFLAGS
The following commit has been merged into the smp/core branch of tip:
Commit-ID: 203e435844734cfa503cd1755f35db2514db5cca
Gitweb: https://git.kernel.org/tip/203e435844734cfa503cd1755f35db2514db5cca
Author: Paul E. McKenney <[email protected]>
AuthorDate: Mon, 20 Mar 2023 17:55:16 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 24 Mar 2023 11:01:26 +01:00
kernel/smp: Make csdlock_debug= resettable
It is currently possible to set the csdlock_debug_enabled static
branch, but not to reset it. This is an issue when several different
entities supply kernel boot parameters and also for kernels built with
CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT=y.
Therefore, make the csdlock_debug=0 kernel boot parameter turn off
debugging. Last one wins!
Reported-by: Jes Sorensen <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Juergen Gross <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
Documentation/admin-guide/kernel-parameters.txt | 13 +++++++------
kernel/smp.c | 11 ++++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b15198a..5f2ec4b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -889,12 +889,13 @@
cs89x0_media= [HW,NET]
Format: { rj45 | aui | bnc }
- csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call
- handling. When switched on, additional debug data is
- printed to the console in case a hanging CPU is
- detected, and that CPU is pinged again in order to try
- to resolve the hang situation. The default value of
- this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
+ csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU
+ function call handling. When switched on,
+ additional debug data is printed to the console
+ in case a hanging CPU is detected, and that
+ CPU is pinged again in order to try to resolve
+ the hang situation. The default value of this
+ option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
Kconfig option.
dasd= [HW,NET]
diff --git a/kernel/smp.c b/kernel/smp.c
index 7a85bcd..298ba75 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -116,11 +116,16 @@ static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug
*/
static int __init csdlock_debug(char *str)
{
+ int ret;
unsigned int val = 0;
- get_option(&str, &val);
- if (val)
- static_branch_enable(&csdlock_debug_enabled);
+ ret = get_option(&str, &val);
+ if (ret) {
+ if (val)
+ static_branch_enable(&csdlock_debug_enabled);
+ else
+ static_branch_disable(&csdlock_debug_enabled);
+ }
return 1;
}
The following commit has been merged into the smp/core branch of tip:
Commit-ID: 1771257cb447a7b27a15ed9aaf332726c47fcbcf
Gitweb: https://git.kernel.org/tip/1771257cb447a7b27a15ed9aaf332726c47fcbcf
Author: Paul E. McKenney <[email protected]>
AuthorDate: Mon, 20 Mar 2023 17:55:14 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Fri, 24 Mar 2023 11:01:25 +01:00
locking/csd_lock: Remove added data from CSD lock debugging
The diagnostics added by this commit were extremely useful in one instance:
a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
However, they have not seen much action since, and there have been some
concerns expressed that the complexity is not worth the benefit.
Therefore, manually revert this commit, but leave a comment telling
people where to find these diagnostics.
[ paulmck: Apply Juergen Gross feedback. ]
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Juergen Gross <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
Documentation/admin-guide/kernel-parameters.txt | 4 +-
kernel/smp.c | 233 +---------------
2 files changed, 12 insertions(+), 225 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ce70777..b15198a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -896,10 +896,6 @@
to resolve the hang situation. The default value of
this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
Kconfig option.
- 0: disable csdlock debugging
- 1: enable basic csdlock debugging (minor impact)
- ext: enable extended csdlock debugging (more impact,
- but more data)
dasd= [HW,NET]
See header of drivers/s390/block/dasd_devmap.c.
diff --git a/kernel/smp.c b/kernel/smp.c
index e2d558f..038d666 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -31,59 +31,8 @@
#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-union cfd_seq_cnt {
- u64 val;
- struct {
- u64 src:16;
- u64 dst:16;
-#define CFD_SEQ_NOCPU 0xffff
- u64 type:4;
-#define CFD_SEQ_QUEUE 0
-#define CFD_SEQ_IPI 1
-#define CFD_SEQ_NOIPI 2
-#define CFD_SEQ_PING 3
-#define CFD_SEQ_PINGED 4
-#define CFD_SEQ_HANDLE 5
-#define CFD_SEQ_DEQUEUE 6
-#define CFD_SEQ_IDLE 7
-#define CFD_SEQ_GOTIPI 8
-#define CFD_SEQ_HDLEND 9
- u64 cnt:28;
- } u;
-};
-
-static char *seq_type[] = {
- [CFD_SEQ_QUEUE] = "queue",
- [CFD_SEQ_IPI] = "ipi",
- [CFD_SEQ_NOIPI] = "noipi",
- [CFD_SEQ_PING] = "ping",
- [CFD_SEQ_PINGED] = "pinged",
- [CFD_SEQ_HANDLE] = "handle",
- [CFD_SEQ_DEQUEUE] = "dequeue (src CPU 0 == empty)",
- [CFD_SEQ_IDLE] = "idle",
- [CFD_SEQ_GOTIPI] = "gotipi",
- [CFD_SEQ_HDLEND] = "hdlend (src CPU 0 == early)",
-};
-
-struct cfd_seq_local {
- u64 ping;
- u64 pinged;
- u64 handle;
- u64 dequeue;
- u64 idle;
- u64 gotipi;
- u64 hdlend;
-};
-#endif
-
struct cfd_percpu {
call_single_data_t csd;
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
- u64 seq_queue;
- u64 seq_ipi;
- u64 seq_noipi;
-#endif
};
struct call_function_data {
@@ -159,18 +108,21 @@ void __init call_function_init(void)
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
-static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended);
+/*
+ * Parse the csdlock_debug= kernel boot parameter.
+ *
+ * If you need to restore the old "ext" value that once provided
+ * additional debugging information, reapply the following commits:
+ *
+ * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
+ * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
+ */
static int __init csdlock_debug(char *str)
{
unsigned int val = 0;
- if (str && !strcmp(str, "ext")) {
- val = 1;
- static_branch_enable(&csdlock_debug_extended);
- } else
- get_option(&str, &val);
-
+ get_option(&str, &val);
if (val)
static_branch_enable(&csdlock_debug_enabled);
@@ -181,36 +133,11 @@ __setup("csdlock_debug=", csdlock_debug);
static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
static DEFINE_PER_CPU(void *, cur_csd_info);
-static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
module_param(csd_lock_timeout, ulong, 0444);
static atomic_t csd_bug_count = ATOMIC_INIT(0);
-static u64 cfd_seq;
-
-#define CFD_SEQ(s, d, t, c) \
- (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c }
-
-static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type)
-{
- union cfd_seq_cnt new, old;
-
- new = CFD_SEQ(src, dst, type, 0);
-
- do {
- old.val = READ_ONCE(cfd_seq);
- new.u.cnt = old.u.cnt + 1;
- } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val);
-
- return old.val;
-}
-
-#define cfd_seq_store(var, src, dst, type) \
- do { \
- if (static_branch_unlikely(&csdlock_debug_extended)) \
- var = cfd_seq_inc(src, dst, type); \
- } while (0)
/* Record current CSD work for current CPU, NULL to erase. */
static void __csd_lock_record(struct __call_single_data *csd)
@@ -244,80 +171,6 @@ static int csd_lock_wait_getcpu(struct __call_single_data *csd)
return -1;
}
-static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst,
- unsigned int type, union cfd_seq_cnt *data,
- unsigned int *n_data, unsigned int now)
-{
- union cfd_seq_cnt new[2];
- unsigned int i, j, k;
-
- new[0].val = val;
- new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1);
-
- for (i = 0; i < 2; i++) {
- if (new[i].u.cnt <= now)
- new[i].u.cnt |= 0x80000000U;
- for (j = 0; j < *n_data; j++) {
- if (new[i].u.cnt == data[j].u.cnt) {
- /* Direct read value trumps generated one. */
- if (i == 0)
- data[j].val = new[i].val;
- break;
- }
- if (new[i].u.cnt < data[j].u.cnt) {
- for (k = *n_data; k > j; k--)
- data[k].val = data[k - 1].val;
- data[j].val = new[i].val;
- (*n_data)++;
- break;
- }
- }
- if (j == *n_data) {
- data[j].val = new[i].val;
- (*n_data)++;
- }
- }
-}
-
-static const char *csd_lock_get_type(unsigned int type)
-{
- return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type];
-}
-
-static void csd_lock_print_extended(struct __call_single_data *csd, int cpu)
-{
- struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu);
- unsigned int srccpu = csd->node.src;
- struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu);
- struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
- unsigned int now;
- union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)];
- unsigned int n_data = 0, i;
-
- data[0].val = READ_ONCE(cfd_seq);
- now = data[0].u.cnt;
-
- cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu, CFD_SEQ_QUEUE, data, &n_data, now);
- cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu, CFD_SEQ_IPI, data, &n_data, now);
- cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu, CFD_SEQ_NOIPI, data, &n_data, now);
-
- cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now);
- cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now);
-
- cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_IDLE, data, &n_data, now);
- cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu, CFD_SEQ_GOTIPI, data, &n_data, now);
- cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HANDLE, data, &n_data, now);
- cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, CFD_SEQ_DEQUEUE, data, &n_data, now);
- cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HDLEND, data, &n_data, now);
-
- for (i = 0; i < n_data; i++) {
- pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n",
- data[i].u.cnt & ~0x80000000U, data[i].u.src,
- data[i].u.dst, csd_lock_get_type(data[i].u.type));
- }
- pr_alert("\tcsd: cnt now: %07x\n", now);
-}
-
/*
* Complain if too much time spent waiting. Note that only
* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
@@ -368,8 +221,6 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
*bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
}
if (cpu >= 0) {
- if (static_branch_unlikely(&csdlock_debug_extended))
- csd_lock_print_extended(csd, cpu);
dump_cpu_task(cpu);
if (!cpu_cur_csd) {
pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
@@ -412,27 +263,7 @@ static __always_inline void csd_lock_wait(struct __call_single_data *csd)
smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
}
-
-static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
-{
- unsigned int this_cpu = smp_processor_id();
- struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local);
- struct call_function_data *cfd = this_cpu_ptr(&cfd_data);
- struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
-
- cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
- if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
- cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
- cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING);
- send_call_function_single_ipi(cpu);
- cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED);
- } else {
- cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
- }
-}
#else
-#define cfd_seq_store(var, src, dst, type)
-
static void csd_lock_record(struct __call_single_data *csd)
{
}
@@ -470,19 +301,6 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
void __smp_call_single_queue(int cpu, struct llist_node *node)
{
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
- if (static_branch_unlikely(&csdlock_debug_extended)) {
- unsigned int type;
-
- type = CSD_TYPE(container_of(node, call_single_data_t,
- node.llist));
- if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) {
- __smp_call_single_queue_debug(cpu, node);
- return;
- }
- }
-#endif
-
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
@@ -541,8 +359,6 @@ static int generic_exec_single(int cpu, struct __call_single_data *csd)
*/
void generic_smp_call_function_single_interrupt(void)
{
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_GOTIPI);
__flush_smp_call_function_queue(true);
}
@@ -570,13 +386,7 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
lockdep_assert_irqs_disabled();
head = this_cpu_ptr(&call_single_queue);
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_HANDLE);
entry = llist_del_all(head);
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue,
- /* Special meaning of source cpu: 0 == queue empty */
- entry ? CFD_SEQ_NOCPU : 0,
- smp_processor_id(), CFD_SEQ_DEQUEUE);
entry = llist_reverse_order(entry);
/* There shouldn't be any pending callbacks on an offline CPU. */
@@ -635,12 +445,8 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
}
}
- if (!entry) {
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend,
- 0, smp_processor_id(),
- CFD_SEQ_HDLEND);
+ if (!entry)
return;
- }
/*
* Second; run all !SYNC callbacks.
@@ -678,9 +484,6 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
*/
if (entry)
sched_ttwu_pending(entry);
-
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_HDLEND);
}
@@ -704,8 +507,6 @@ void flush_smp_call_function_queue(void)
if (llist_empty(this_cpu_ptr(&call_single_queue)))
return;
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
- smp_processor_id(), CFD_SEQ_IDLE);
local_irq_save(flags);
/* Get the already pending soft interrupts for RT enabled kernels */
was_pending = local_softirq_pending();
@@ -929,8 +730,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
- struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
- call_single_data_t *csd = &pcpu->csd;
+ call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
if (cond_func && !cond_func(cpu, info))
continue;
@@ -944,20 +744,13 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
csd->node.src = smp_processor_id();
csd->node.dst = cpu;
#endif
- cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
nr_cpus++;
last_cpu = cpu;
-
- cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
- } else {
- cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
}
}
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING);
-
/*
* Choose the most efficient way to send an IPI. Note that the
* number of CPUs might be zero due to concurrent changes to the
@@ -967,8 +760,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
send_call_function_single_ipi(last_cpu);
else if (likely(nr_cpus > 1))
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
-
- cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
}
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
On Tue, Mar 28, 2023 at 08:34:46AM -0000, tip-bot2 for Paul E. McKenney wrote:
> The following commit has been merged into the smp/core branch of tip:
>
> Commit-ID: 203e435844734cfa503cd1755f35db2514db5cca
> Gitweb: https://git.kernel.org/tip/203e435844734cfa503cd1755f35db2514db5cca
> Author: Paul E. McKenney <[email protected]>
> AuthorDate: Mon, 20 Mar 2023 17:55:16 -07:00
> Committer: Peter Zijlstra <[email protected]>
> CommitterDate: Fri, 24 Mar 2023 11:01:26 +01:00
>
> kernel/smp: Make csdlock_debug= resettable
Very good, thank you! I have removed these from the -rcu tree's "dev"
branch, and if testing goes well will be sending the new version to -next.
Thanx, Paul
> It is currently possible to set the csdlock_debug_enabled static
> branch, but not to reset it. This is an issue when several different
> entities supply kernel boot parameters and also for kernels built with
> CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT=y.
>
> Therefore, make the csdlock_debug=0 kernel boot parameter turn off
> debugging. Last one wins!
>
> Reported-by: Jes Sorensen <[email protected]>
> Signed-off-by: Paul E. McKenney <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Acked-by: Juergen Gross <[email protected]>
> Link: https://lore.kernel.org/r/[email protected]
> ---
> Documentation/admin-guide/kernel-parameters.txt | 13 +++++++------
> kernel/smp.c | 11 ++++++++---
> 2 files changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index b15198a..5f2ec4b 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -889,12 +889,13 @@
> cs89x0_media= [HW,NET]
> Format: { rj45 | aui | bnc }
>
> - csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call
> - handling. When switched on, additional debug data is
> - printed to the console in case a hanging CPU is
> - detected, and that CPU is pinged again in order to try
> - to resolve the hang situation. The default value of
> - this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
> + csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU
> + function call handling. When switched on,
> + additional debug data is printed to the console
> + in case a hanging CPU is detected, and that
> + CPU is pinged again in order to try to resolve
> + the hang situation. The default value of this
> + option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT
> Kconfig option.
>
> dasd= [HW,NET]
> diff --git a/kernel/smp.c b/kernel/smp.c
> index 7a85bcd..298ba75 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -116,11 +116,16 @@ static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug
> */
> static int __init csdlock_debug(char *str)
> {
> + int ret;
> unsigned int val = 0;
>
> - get_option(&str, &val);
> - if (val)
> - static_branch_enable(&csdlock_debug_enabled);
> + ret = get_option(&str, &val);
> + if (ret) {
> + if (val)
> + static_branch_enable(&csdlock_debug_enabled);
> + else
> + static_branch_disable(&csdlock_debug_enabled);
> + }
>
> return 1;
> }