Changes since RFCv3:
- Split the patch in 2: entry/exit and queue
- Fix 'struct __call_single_data' & call_single_data_t alignment issue
- Made all TP_printk follow the same pattern
Changes since RFCv2:
- Fixed some spacing issues and trace calls
Changes since RFCv1:
- Implemented trace_csd_queue_cpu() as suggested by Valentin Schneider
- Using EVENT_CLASS in order to avoid duplication
- Introduced new helper: csd_do_func()
- Name change from smp_call_function_* to csd_function_*
- Rebased on top of torvalds/master
Leonardo Bras (2):
trace,smp: Add tracepoints around remotelly called functions
trace,smp: Add tracepoints for scheduling remotelly called functions
include/trace/events/smp.h | 72 ++++++++++++++++++++++++++++++++++++++
kernel/smp.c | 41 +++++++++++++---------
2 files changed, 96 insertions(+), 17 deletions(-)
create mode 100644 include/trace/events/smp.h
--
2.40.1
The recently added ipi_send_{cpu,cpumask} tracepoints allow finding sources
of IPIs targeting CPUs running latency-sensitive applications.
For NOHZ_FULL CPUs, all IPIs are interference, and those tracepoints are
sufficient to find them and work on getting rid of them. In some setups
however, not *all* IPIs are to be suppressed, but long-running IPI
callbacks can still be problematic.
Add a pair of tracepoints to mark the start and end of processing a CSD IPI
callback, similar to what exists for softirq, workqueue or timer callbacks.
Signed-off-by: Leonardo Bras <[email protected]>
---
include/trace/events/smp.h | 45 ++++++++++++++++++++++++++++++++++++++
kernel/smp.c | 25 ++++++++++++++++-----
2 files changed, 64 insertions(+), 6 deletions(-)
create mode 100644 include/trace/events/smp.h
diff --git a/include/trace/events/smp.h b/include/trace/events/smp.h
new file mode 100644
index 000000000000..547f536e7ecd
--- /dev/null
+++ b/include/trace/events/smp.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM smp
+
+#if !defined(_TRACE_SMP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SMP_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoints for a function which is called as an effect of smp_call_function.*
+ */
+DECLARE_EVENT_CLASS(csd_function,
+
+ TP_PROTO(smp_call_func_t func, call_single_data_t *csd),
+
+ TP_ARGS(func, csd),
+
+ TP_STRUCT__entry(
+ __field(void *, func)
+ __field(void *, csd)
+ ),
+
+ TP_fast_assign(
+ __entry->func = func;
+ __entry->csd = csd;
+ ),
+
+ TP_printk("func=%ps, csd=%p", __entry->func, __entry->csd)
+);
+
+DEFINE_EVENT(csd_function, csd_function_entry,
+ TP_PROTO(smp_call_func_t func, call_single_data_t *csd),
+ TP_ARGS(func, csd)
+);
+
+DEFINE_EVENT(csd_function, csd_function_exit,
+ TP_PROTO(smp_call_func_t func, call_single_data_t *csd),
+ TP_ARGS(func, csd)
+);
+
+#endif /* _TRACE_SMP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/smp.c b/kernel/smp.c
index 919387be6d4e..eecdd452619c 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -27,6 +27,9 @@
#include <linux/jump_label.h>
#include <trace/events/ipi.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/smp.h>
+#undef CREATE_TRACE_POINTS
#include "smpboot.h"
#include "sched/smp.h"
@@ -121,6 +124,14 @@ send_call_function_ipi_mask(struct cpumask *mask)
arch_send_call_function_ipi_mask(mask);
}
+static __always_inline void
+csd_do_func(smp_call_func_t func, void *info, struct __call_single_data *csd)
+{
+ trace_csd_function_entry(func, csd);
+ func(info);
+ trace_csd_function_exit(func, csd);
+}
+
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
@@ -375,7 +386,7 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
csd_lock_record(csd);
csd_unlock(csd);
local_irq_save(flags);
- func(info);
+ csd_do_func(func, info, csd);
csd_lock_record(NULL);
local_irq_restore(flags);
return 0;
@@ -477,7 +488,7 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
}
csd_lock_record(csd);
- func(info);
+ csd_do_func(func, info, csd);
csd_unlock(csd);
csd_lock_record(NULL);
} else {
@@ -508,7 +519,7 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
csd_lock_record(csd);
csd_unlock(csd);
- func(info);
+ csd_do_func(func, info, csd);
csd_lock_record(NULL);
} else if (type == CSD_TYPE_IRQ_WORK) {
irq_work_single(csd);
@@ -522,8 +533,10 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
/*
* Third; only CSD_TYPE_TTWU is left, issue those.
*/
- if (entry)
- sched_ttwu_pending(entry);
+ if (entry) {
+ csd = llist_entry(entry, typeof(*csd), node.llist);
+ csd_do_func(sched_ttwu_pending, entry, csd);
+ }
}
@@ -816,7 +829,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
unsigned long flags;
local_irq_save(flags);
- func(info);
+ csd_do_func(func, info, NULL);
local_irq_restore(flags);
}
--
2.40.1
Add a tracepoint for when a CSD is queued to a remote CPU's
call_single_queue. This allows finding exactly which CPU queued a given CSD
when looking at a csd_function_{entry,exit} event, and also enables us to
accurately measure IPI delivery time with e.g. a synthetic event:
$ echo 'hist:keys=cpu,csd.hex:ts=common_timestamp.usecs' >\
/sys/kernel/tracing/events/smp/csd_queue_cpu/trigger
$ echo 'csd_latency unsigned int dst_cpu; unsigned long csd; u64 time' >\
/sys/kernel/tracing/synthetic_events
$ echo \
'hist:keys=common_cpu,csd.hex:'\
'time=common_timestamp.usecs-$ts:'\
'onmatch(smp.csd_queue_cpu).trace(csd_latency,common_cpu,csd,$time)' >\
/sys/kernel/tracing/events/smp/csd_function_entry/trigger
$ trace-cmd record -e 'synthetic:csd_latency' hackbench
$ trace-cmd report
<...>-467 [001] 21.824263: csd_queue_cpu: cpu=0 callsite=try_to_wake_up+0x2ea func=sched_ttwu_pending csd=0xffff8880076148b8
<...>-467 [001] 21.824280: ipi_send_cpu: cpu=0 callsite=try_to_wake_up+0x2ea callback=generic_smp_call_function_single_interrupt+0x0
<...>-489 [000] 21.824299: csd_function_entry: func=sched_ttwu_pending csd=0xffff8880076148b8
<...>-489 [000] 21.824320: csd_latency: dst_cpu=0, csd=18446612682193848504, time=36
Suggested-by: Valentin Schneider <[email protected]>
Signed-off-by: Leonardo Bras <[email protected]>
---
include/trace/events/smp.h | 27 +++++++++++++++++++++++++++
kernel/smp.c | 16 +++++-----------
2 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/include/trace/events/smp.h b/include/trace/events/smp.h
index 547f536e7ecd..b05259284eb0 100644
--- a/include/trace/events/smp.h
+++ b/include/trace/events/smp.h
@@ -7,6 +7,33 @@
#include <linux/tracepoint.h>
+TRACE_EVENT(csd_queue_cpu,
+
+ TP_PROTO(const unsigned int cpu,
+ unsigned long callsite,
+ smp_call_func_t func,
+ call_single_data_t *csd),
+
+ TP_ARGS(cpu, callsite, func, csd),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cpu)
+ __field(void *, callsite)
+ __field(void *, func)
+ __field(void *, csd)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->callsite = (void *)callsite;
+ __entry->func = func;
+ __entry->csd = csd;
+ ),
+
+ TP_printk("cpu=%u callsite=%pS func=%ps csd=%p",
+ __entry->cpu, __entry->callsite, __entry->func, __entry->csd)
+ );
+
/*
* Tracepoints for a function which is called as an effect of smp_call_function.*
*/
diff --git a/kernel/smp.c b/kernel/smp.c
index eecdd452619c..e70497f1c3c6 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -340,7 +340,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
* even if we haven't sent the smp_call IPI yet (e.g. the stopper
* executes migration_cpu_stop() on the remote CPU).
*/
- if (trace_ipi_send_cpu_enabled()) {
+ if (trace_csd_queue_cpu_enabled()) {
call_single_data_t *csd;
smp_call_func_t func;
@@ -348,7 +348,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
sched_ttwu_pending : csd->func;
- trace_ipi_send_cpu(cpu, _RET_IP_, func);
+ trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
}
/*
@@ -741,7 +741,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
int cpu, last_cpu, this_cpu = smp_processor_id();
struct call_function_data *cfd;
bool wait = scf_flags & SCF_WAIT;
- int nr_cpus = 0, nr_queued = 0;
+ int nr_cpus = 0;
bool run_remote = false;
bool run_local = false;
@@ -799,21 +799,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
csd->node.src = smp_processor_id();
csd->node.dst = cpu;
#endif
+ trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
+
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
nr_cpus++;
last_cpu = cpu;
}
- nr_queued++;
}
- /*
- * Trace each smp_function_call_*() as an IPI, actual IPIs
- * will be traced with func==generic_smp_call_function_single_ipi().
- */
- if (nr_queued)
- trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
-
/*
* Choose the most efficient way to send an IPI. Note that the
* number of CPUs might be zero due to concurrent changes to the
--
2.40.1
On Mon, 2023-05-15 at 15:30 -0300, Leonardo Bras wrote:
> Changes since RFCv3:
> - Split the patch in 2: entry/exit and queue
> - Fix 'struct __call_single_data' & call_single_data_t alignment issue
> - Made all TP_printk follow the same pattern
>
> Changes since RFCv2:
> - Fixed some spacing issues and trace calls
>
> Changes since RFCv1:
> - Implemented trace_csd_queue_cpu() as suggested by Valentin Schneider
> - Using EVENT_CLASS in order to avoid duplication
> - Introduced new helper: csd_do_func()
> - Name change from smp_call_function_* to csd_function_*
> - Rebased on top of torvalds/master
>
> Leonardo Bras (2):
> trace,smp: Add tracepoints around remotelly called functions
> trace,smp: Add tracepoints for scheduling remotelly called functions
>
> include/trace/events/smp.h | 72 ++++++++++++++++++++++++++++++++++++++
> kernel/smp.c | 41 +++++++++++++---------
> 2 files changed, 96 insertions(+), 17 deletions(-)
> create mode 100644 include/trace/events/smp.h
>
argh, almost sure some llvm build will output some alignment warning because
csd_do_func() has an 'struct __call_single_data' argument while the tracepoints
have call_single_data_t arguments.
Other than that, it should be fine.
I will collect other suggestions before sending a v5 with this fix.
Leo
On 15/05/23 15:30, Leonardo Bras wrote:
> Changes since RFCv3:
> - Split the patch in 2: entry/exit and queue
> - Fix 'struct __call_single_data' & call_single_data_t alignment issue
> - Made all TP_printk follow the same pattern
>
One bikeshedding nit I have is I'd prefer to see the csd_* events in a csd
trace system, to have a sane nomenclature.
Other than that and the other comment on csd_do_func(), this LGTM.
> Changes since RFCv2:
> - Fixed some spacing issues and trace calls
>
> Changes since RFCv1:
> - Implemented trace_csd_queue_cpu() as suggested by Valentin Schneider
> - Using EVENT_CLASS in order to avoid duplication
> - Introduced new helper: csd_do_func()
> - Name change from smp_call_function_* to csd_function_*
> - Rebased on top of torvalds/master
>
> Leonardo Bras (2):
> trace,smp: Add tracepoints around remotelly called functions
> trace,smp: Add tracepoints for scheduling remotelly called functions
>
> include/trace/events/smp.h | 72 ++++++++++++++++++++++++++++++++++++++
> kernel/smp.c | 41 +++++++++++++---------
> 2 files changed, 96 insertions(+), 17 deletions(-)
> create mode 100644 include/trace/events/smp.h
>
> --
> 2.40.1
On 15/05/23 15:30, Leonardo Bras wrote:
> @@ -375,7 +386,7 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
> csd_lock_record(csd);
> csd_unlock(csd);
> local_irq_save(flags);
> - func(info);
> + csd_do_func(func, info, csd);
I'd suggest making this match the local case of
smp_call_function_many_cond(), IOW pass NULL as the csd when executing
locally.
IMO this is required for postprocessing with e.g. synthetic events for CSD
delivery measurement, otherwise we'll try to match this execution with a
previous CSD enqueue.
> csd_lock_record(NULL);
> local_irq_restore(flags);
> return 0;
Hello Valentin, thanks for the feedback!
On Tue, May 30, 2023 at 7:36 AM Valentin Schneider <[email protected]> wrote:
>
> On 15/05/23 15:30, Leonardo Bras wrote:
> > @@ -375,7 +386,7 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
> > csd_lock_record(csd);
> > csd_unlock(csd);
> > local_irq_save(flags);
> > - func(info);
> > + csd_do_func(func, info, csd);
>
> I'd suggest making this match the local case of
> smp_call_function_many_cond(), IOW pass NULL as the csd when executing
> locally.
Sure, done!
>
> IMO this is required for postprocessing with e.g. synthetic events for CSD
> delivery measurement, otherwise we'll try to match this execution with a
> previous CSD enqueue.
>
> > csd_lock_record(NULL);
> > local_irq_restore(flags);
> > return 0;
>
On Tue, May 30, 2023 at 7:36 AM Valentin Schneider <[email protected]> wrote:
>
> On 15/05/23 15:30, Leonardo Bras wrote:
> > Changes since RFCv3:
> > - Split the patch in 2: entry/exit and queue
> > - Fix 'struct __call_single_data' & call_single_data_t alignment issue
> > - Made all TP_printk follow the same pattern
> >
>
> One bikeshedding nit I have is I'd prefer to see the csd_* events in a csd
> trace system, to have a sane nomenclature.
Oh, here you mean the file name? (ie: trace/events/smp.h -> trace/events/csd.h)
>
> Other than that and the other comment on csd_do_func(), this LGTM.
>
Thanks!
Other than the above question, it should be fast for me to send a v5.
Best regards,
Leo
> > Changes since RFCv2:
> > - Fixed some spacing issues and trace calls
> >
> > Changes since RFCv1:
> > - Implemented trace_csd_queue_cpu() as suggested by Valentin Schneider
> > - Using EVENT_CLASS in order to avoid duplication
> > - Introduced new helper: csd_do_func()
> > - Name change from smp_call_function_* to csd_function_*
> > - Rebased on top of torvalds/master
> >
> > Leonardo Bras (2):
> > trace,smp: Add tracepoints around remotelly called functions
> > trace,smp: Add tracepoints for scheduling remotelly called functions
> >
> > include/trace/events/smp.h | 72 ++++++++++++++++++++++++++++++++++++++
> > kernel/smp.c | 41 +++++++++++++---------
> > 2 files changed, 96 insertions(+), 17 deletions(-)
> > create mode 100644 include/trace/events/smp.h
> >
> > --
> > 2.40.1
>
On Wed, Jun 14, 2023 at 1:14 AM Leonardo Bras Soares Passos
<[email protected]> wrote:
>
> On Tue, May 30, 2023 at 7:36 AM Valentin Schneider <[email protected]> wrote:
> >
> > On 15/05/23 15:30, Leonardo Bras wrote:
> > > Changes since RFCv3:
> > > - Split the patch in 2: entry/exit and queue
> > > - Fix 'struct __call_single_data' & call_single_data_t alignment issue
> > > - Made all TP_printk follow the same pattern
> > >
> >
> > One bikeshedding nit I have is I'd prefer to see the csd_* events in a csd
> > trace system, to have a sane nomenclature.
>
> Oh, here you mean the file name? (ie: trace/events/smp.h -> trace/events/csd.h)
Or just
-#define TRACE_SYSTEM smp
+#define TRACE_SYSTEM csd
?
>
> >
> > Other than that and the other comment on csd_do_func(), this LGTM.
> >
>
> Thanks!
> Other than the above question, it should be fast for me to send a v5.
>
> Best regards,
> Leo
>
>
> > > Changes since RFCv2:
> > > - Fixed some spacing issues and trace calls
> > >
> > > Changes since RFCv1:
> > > - Implemented trace_csd_queue_cpu() as suggested by Valentin Schneider
> > > - Using EVENT_CLASS in order to avoid duplication
> > > - Introduced new helper: csd_do_func()
> > > - Name change from smp_call_function_* to csd_function_*
> > > - Rebased on top of torvalds/master
> > >
> > > Leonardo Bras (2):
> > > trace,smp: Add tracepoints around remotelly called functions
> > > trace,smp: Add tracepoints for scheduling remotelly called functions
> > >
> > > include/trace/events/smp.h | 72 ++++++++++++++++++++++++++++++++++++++
> > > kernel/smp.c | 41 +++++++++++++---------
> > > 2 files changed, 96 insertions(+), 17 deletions(-)
> > > create mode 100644 include/trace/events/smp.h
> > >
> > > --
> > > 2.40.1
> >
On 14/06/23 02:16, Leonardo Bras Soares Passos wrote:
> On Wed, Jun 14, 2023 at 1:14 AM Leonardo Bras Soares Passos
> <[email protected]> wrote:
>>
>> On Tue, May 30, 2023 at 7:36 AM Valentin Schneider <[email protected]> wrote:
>> >
>> > On 15/05/23 15:30, Leonardo Bras wrote:
>> > > Changes since RFCv3:
>> > > - Split the patch in 2: entry/exit and queue
>> > > - Fix 'struct __call_single_data' & call_single_data_t alignment issue
>> > > - Made all TP_printk follow the same pattern
>> > >
>> >
>> > One bikeshedding nit I have is I'd prefer to see the csd_* events in a csd
>> > trace system, to have a sane nomenclature.
>>
>> Oh, here you mean the file name? (ie: trace/events/smp.h -> trace/events/csd.h)
>
> Or just
> -#define TRACE_SYSTEM smp
> +#define TRACE_SYSTEM csd
> ?
>
At the very least this yes, and making the file name match that probably
wouldn't hurt.