2017-04-12 05:39:06

by Joel Fernandes

[permalink] [raw]
Subject: [RFC] tracing: Add support for critical section event tracing

Critical section trace events can be used for tracing the start and
end of a critical section which can be used by a trace viewer like
systrace to graphically view the start and end of a critical section and
correlate them with latencies and scheduling issues.

Reason for starting critical section:
Either preemption or interrupts were turned off.
Reason for ending critical section:
Both preemption and interrupts are now turned back on.

Cc: Steven Rostedt <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
--
Here's an example of how Android's systrace will be using it to show critical
sections as a gantt chart: http://imgur.com/download/TZplEVp
---
include/trace/events/critical.h | 55 +++++++++++++++++++++++++++++++
kernel/trace/Kconfig | 6 ++++
kernel/trace/trace_irqsoff.c | 73 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 134 insertions(+)
create mode 100644 include/trace/events/critical.h

diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
new file mode 100644
index 000000000000..bfd58dd4f48f
--- /dev/null
+++ b/include/trace/events/critical.h
@@ -0,0 +1,55 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM critical
+
+#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CRITICAL_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <linux/string.h>
+
+TRACE_EVENT(critical_start,
+
+ TP_PROTO(unsigned long ip, unsigned long parent_ip),
+
+ TP_ARGS(ip, parent_ip),
+
+ TP_STRUCT__entry(
+ __array(char, func, 16)
+ __array(char, parent, 16)
+ ),
+
+ TP_fast_assign(
+ snprintf(__entry->func, 16, "%pf", (void *)ip);
+ snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
+ ),
+
+ TP_printk("caller=%s parent=%s\n",
+ __entry->func,
+ __entry->parent)
+);
+
+TRACE_EVENT(critical_end,
+
+ TP_PROTO(unsigned long ip, unsigned long parent_ip),
+
+ TP_ARGS(ip, parent_ip),
+
+ TP_STRUCT__entry(
+ __array(char, func, 16)
+ __array(char, parent, 16)
+ ),
+
+ TP_fast_assign(
+ snprintf(__entry->func, 16, "%pf", (void *)ip);
+ snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
+ ),
+
+ TP_printk("caller=%s parent=%s\n",
+ __entry->func,
+ __entry->parent)
+);
+#endif /* _TRACE_CRITICAL_H */
+
+/* This part ust be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5038005eb5d..986a07985360 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -208,6 +208,12 @@ config PREEMPT_TRACER
enabled. This option and the irqs-off timing option can be
used together or separately.)

+config TRACE_CRITICAL_SECTION_EVENTS
+ bool "Trace critical sections as trace events"
+ default n
+ select IRQSOFF_TRACER
+ select PREEMPT_TRACER
+
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7758bc0617cb..a14ce6185885 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -13,6 +13,8 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/critical.h>

#include "trace.h"

@@ -20,6 +22,7 @@ static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;

static DEFINE_PER_CPU(int, tracing_cpu);
+static DEFINE_PER_CPU(int, tracing_events_cpu);

static DEFINE_RAW_SPINLOCK(max_trace_lock);

@@ -362,6 +365,44 @@ check_critical_timing(struct trace_array *tr,
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}

+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+/*
+ * Called when either preempt or irq are turned off
+ */
+static inline void
+start_critical_event(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu;
+
+ cpu = raw_smp_processor_id();
+
+ if (per_cpu(tracing_events_cpu, cpu))
+ return;
+
+ trace_critical_start(ip, parent_ip);
+
+ per_cpu(tracing_events_cpu, cpu) = 1;
+}
+
+/*
+ * Called when both preempt and irq are turned back on
+ */
+static inline void
+stop_critical_event(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu;
+
+ cpu = raw_smp_processor_id();
+
+ if (unlikely(per_cpu(tracing_events_cpu, cpu)))
+ per_cpu(tracing_events_cpu, cpu) = 0;
+ else
+ return;
+
+ trace_critical_end(ip, parent_ip);
+}
+#endif
+
static inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip)
{
@@ -434,6 +475,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
/* start and stop critical timings used to for stoppage (in idle) */
void start_critical_timings(void)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (irqs_disabled() || preempt_count())
+ start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -441,6 +486,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);

void stop_critical_timings(void)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (irqs_disabled() || preempt_count())
+ stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -483,6 +532,10 @@ inline void print_irqtrace_events(struct task_struct *curr)
*/
void trace_hardirqs_on(void)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (!preempt_count())
+ stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -490,6 +543,10 @@ EXPORT_SYMBOL(trace_hardirqs_on);

void trace_hardirqs_off(void)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (!preempt_count())
+ start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+#endif
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -497,6 +554,10 @@ EXPORT_SYMBOL(trace_hardirqs_off);

__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (!preempt_count())
+ stop_critical_event(CALLER_ADDR0, caller_addr);
+#endif
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
@@ -504,6 +565,10 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);

__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (!preempt_count())
+ start_critical_event(CALLER_ADDR0, caller_addr);
+#endif
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
}
@@ -515,12 +580,20 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (!irqs_disabled())
+ stop_critical_event(a0, a1);
+#endif
if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}

void trace_preempt_off(unsigned long a0, unsigned long a1)
{
+#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
+ if (!irqs_disabled())
+ start_critical_event(a0, a1);
+#endif
if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
}
--
2.12.2.715.g7642488e1d-goog


2017-06-30 05:17:26

by Joel Fernandes

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

Hi Steven,

Did you have any comments about this patch? It was sent a while ago
and if you can provide me your initial thoughts on it, I would
appreciate it. (Sorry to ping you about it during the busy merge
window time, but I was thinking if I can get any initial early
comments from you then I can get to work on it and finish in time well
before the next merge window).

Regards,
Joel

On Tue, Apr 11, 2017 at 10:38 PM, Joel Fernandes <[email protected]> wrote:
> Critical section trace events can be used for tracing the start and
> end of a critical section which can be used by a trace viewer like
> systrace to graphically view the start and end of a critical section and
> correlate them with latencies and scheduling issues.
>
> Reason for starting critical section:
> Either preemption or interrupts were turned off.
> Reason for ending critical section:
> Both preemption and interrupts are now turned back on.
>
> Cc: Steven Rostedt <[email protected]>
> Signed-off-by: Joel Fernandes <[email protected]>
> --
> Here's an example of how Android's systrace will be using it to show critical
> sections as a gantt chart: http://imgur.com/download/TZplEVp
> ---
> include/trace/events/critical.h | 55 +++++++++++++++++++++++++++++++
> kernel/trace/Kconfig | 6 ++++
> kernel/trace/trace_irqsoff.c | 73 +++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 134 insertions(+)
> create mode 100644 include/trace/events/critical.h
>
> diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
> new file mode 100644
> index 000000000000..bfd58dd4f48f
> --- /dev/null
> +++ b/include/trace/events/critical.h
> @@ -0,0 +1,55 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM critical
> +
> +#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CRITICAL_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +#include <linux/string.h>
> +
> +TRACE_EVENT(critical_start,
> +
> + TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> + TP_ARGS(ip, parent_ip),
> +
> + TP_STRUCT__entry(
> + __array(char, func, 16)
> + __array(char, parent, 16)
> + ),
> +
> + TP_fast_assign(
> + snprintf(__entry->func, 16, "%pf", (void *)ip);
> + snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
> + ),
> +
> + TP_printk("caller=%s parent=%s\n",
> + __entry->func,
> + __entry->parent)
> +);
> +
> +TRACE_EVENT(critical_end,
> +
> + TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> + TP_ARGS(ip, parent_ip),
> +
> + TP_STRUCT__entry(
> + __array(char, func, 16)
> + __array(char, parent, 16)
> + ),
> +
> + TP_fast_assign(
> + snprintf(__entry->func, 16, "%pf", (void *)ip);
> + snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);
> + ),
> +
> + TP_printk("caller=%s parent=%s\n",
> + __entry->func,
> + __entry->parent)
> +);
> +#endif /* _TRACE_CRITICAL_H */
> +
> +/* This part ust be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d5038005eb5d..986a07985360 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -208,6 +208,12 @@ config PREEMPT_TRACER
> enabled. This option and the irqs-off timing option can be
> used together or separately.)
>
> +config TRACE_CRITICAL_SECTION_EVENTS
> + bool "Trace critical sections as trace events"
> + default n
> + select IRQSOFF_TRACER
> + select PREEMPT_TRACER
> +
> config SCHED_TRACER
> bool "Scheduling Latency Tracer"
> select GENERIC_TRACER
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 7758bc0617cb..a14ce6185885 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -13,6 +13,8 @@
> #include <linux/uaccess.h>
> #include <linux/module.h>
> #include <linux/ftrace.h>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/critical.h>
>
> #include "trace.h"
>
> @@ -20,6 +22,7 @@ static struct trace_array *irqsoff_trace __read_mostly;
> static int tracer_enabled __read_mostly;
>
> static DEFINE_PER_CPU(int, tracing_cpu);
> +static DEFINE_PER_CPU(int, tracing_events_cpu);
>
> static DEFINE_RAW_SPINLOCK(max_trace_lock);
>
> @@ -362,6 +365,44 @@ check_critical_timing(struct trace_array *tr,
> __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
> }
>
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +/*
> + * Called when either preempt or irq are turned off
> + */
> +static inline void
> +start_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> + int cpu;
> +
> + cpu = raw_smp_processor_id();
> +
> + if (per_cpu(tracing_events_cpu, cpu))
> + return;
> +
> + trace_critical_start(ip, parent_ip);
> +
> + per_cpu(tracing_events_cpu, cpu) = 1;
> +}
> +
> +/*
> + * Called when both preempt and irq are turned back on
> + */
> +static inline void
> +stop_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> + int cpu;
> +
> + cpu = raw_smp_processor_id();
> +
> + if (unlikely(per_cpu(tracing_events_cpu, cpu)))
> + per_cpu(tracing_events_cpu, cpu) = 0;
> + else
> + return;
> +
> + trace_critical_end(ip, parent_ip);
> +}
> +#endif
> +
> static inline void
> start_critical_timing(unsigned long ip, unsigned long parent_ip)
> {
> @@ -434,6 +475,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
> /* start and stop critical timings used to for stoppage (in idle) */
> void start_critical_timings(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (irqs_disabled() || preempt_count())
> + start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
> if (preempt_trace() || irq_trace())
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -441,6 +486,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
>
> void stop_critical_timings(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (irqs_disabled() || preempt_count())
> + stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
> if (preempt_trace() || irq_trace())
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -483,6 +532,10 @@ inline void print_irqtrace_events(struct task_struct *curr)
> */
> void trace_hardirqs_on(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
> if (!preempt_trace() && irq_trace())
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -490,6 +543,10 @@ EXPORT_SYMBOL(trace_hardirqs_on);
>
> void trace_hardirqs_off(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif
> if (!preempt_trace() && irq_trace())
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -497,6 +554,10 @@ EXPORT_SYMBOL(trace_hardirqs_off);
>
> __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + stop_critical_event(CALLER_ADDR0, caller_addr);
> +#endif
> if (!preempt_trace() && irq_trace())
> stop_critical_timing(CALLER_ADDR0, caller_addr);
> }
> @@ -504,6 +565,10 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
>
> __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + start_critical_event(CALLER_ADDR0, caller_addr);
> +#endif
> if (!preempt_trace() && irq_trace())
> start_critical_timing(CALLER_ADDR0, caller_addr);
> }
> @@ -515,12 +580,20 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
> #ifdef CONFIG_PREEMPT_TRACER
> void trace_preempt_on(unsigned long a0, unsigned long a1)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!irqs_disabled())
> + stop_critical_event(a0, a1);
> +#endif
> if (preempt_trace() && !irq_trace())
> stop_critical_timing(a0, a1);
> }
>
> void trace_preempt_off(unsigned long a0, unsigned long a1)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!irqs_disabled())
> + start_critical_event(a0, a1);
> +#endif
> if (preempt_trace() && !irq_trace())
> start_critical_timing(a0, a1);
> }
> --
> 2.12.2.715.g7642488e1d-goog
>

2017-06-30 12:33:16

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

On Thu, 29 Jun 2017 22:17:22 -0700
Joel Fernandes <[email protected]> wrote:

> Hi Steven,
>
> Did you have any comments about this patch? It was sent a while ago
> and if you can provide me your initial thoughts on it, I would
> appreciate it. (Sorry to ping you about it during the busy merge
> window time, but I was thinking if I can get any initial early
> comments from you then I can get to work on it and finish in time well
> before the next merge window).

Sorry for missing that. It's still in my INBOX in the "to look at"
markings ;-)


Actually, have you seen Tom Zanussi's patch series about histograms.
"tracing: Inter-event (e.g. latency) support". He just recently posted
another series.

I'd like to get that in before looking at adding events into the
critical sections. But that said, I'll still take a look at your patch.

Thanks for reminding me.

-- Steve

2017-06-30 12:51:52

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

On Tue, 11 Apr 2017 22:38:51 -0700
Joel Fernandes <[email protected]> wrote:

> diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
> new file mode 100644
> index 000000000000..bfd58dd4f48f
> --- /dev/null
> +++ b/include/trace/events/critical.h
> @@ -0,0 +1,55 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM critical
> +

There should be a:

#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS

around the entire header.

> +#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CRITICAL_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +#include <linux/string.h>
> +
> +TRACE_EVENT(critical_start,
> +
> + TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> + TP_ARGS(ip, parent_ip),
> +
> + TP_STRUCT__entry(
> + __array(char, func, 16)
> + __array(char, parent, 16)

Save only the ip, not the name.

> + ),
> +
> + TP_fast_assign(
> + snprintf(__entry->func, 16, "%pf", (void *)ip);
> + snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);

This is very slow to have in the fast path.

> + ),
> +
> + TP_printk("caller=%s parent=%s\n",

Put in %pF here, trace-cmd and perf know how to handle those, and then
the translation from ip to function name is done in the slow path.


> + __entry->func,
> + __entry->parent)
> +);
> +
> +TRACE_EVENT(critical_end,
> +
> + TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> + TP_ARGS(ip, parent_ip),
> +
> + TP_STRUCT__entry(
> + __array(char, func, 16)
> + __array(char, parent, 16)
> + ),
> +
> + TP_fast_assign(
> + snprintf(__entry->func, 16, "%pf", (void *)ip);
> + snprintf(__entry->parent, 16, "%pf", (void *)parent_ip);

Same here.

> + ),
> +
> + TP_printk("caller=%s parent=%s\n",
> + __entry->func,
> + __entry->parent)
> +);
> +#endif /* _TRACE_CRITICAL_H */
> +
> +/* This part ust be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index d5038005eb5d..986a07985360 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -208,6 +208,12 @@ config PREEMPT_TRACER
> enabled. This option and the irqs-off timing option can be
> used together or separately.)
>
> +config TRACE_CRITICAL_SECTION_EVENTS
> + bool "Trace critical sections as trace events"
> + default n
> + select IRQSOFF_TRACER
> + select PREEMPT_TRACER

Either make TRACE_CRITICAL_SECTION_EVENTS depend on irqsoff and preempt
tracers, or change trace_irqsoff.c to be able to handle it without
those tracers enabled.

"select" should be avoided when possible.

> +
> config SCHED_TRACER
> bool "Scheduling Latency Tracer"
> select GENERIC_TRACER
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 7758bc0617cb..a14ce6185885 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -13,6 +13,8 @@
> #include <linux/uaccess.h>
> #include <linux/module.h>
> #include <linux/ftrace.h>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/critical.h>
>
> #include "trace.h"
>
> @@ -20,6 +22,7 @@ static struct trace_array *irqsoff_trace __read_mostly;
> static int tracer_enabled __read_mostly;
>
> static DEFINE_PER_CPU(int, tracing_cpu);
> +static DEFINE_PER_CPU(int, tracing_events_cpu);
>
> static DEFINE_RAW_SPINLOCK(max_trace_lock);
>
> @@ -362,6 +365,44 @@ check_critical_timing(struct trace_array *tr,
> __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
> }
>
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> +/*
> + * Called when either preempt or irq are turned off
> + */
> +static inline void
> +start_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> + int cpu;
> +
> + cpu = raw_smp_processor_id();

Nuke the above and ...
> +
> + if (per_cpu(tracing_events_cpu, cpu))

use this_cpu_read() instead. It's much faster.

> + return;
> +
> + trace_critical_start(ip, parent_ip);
> +
> + per_cpu(tracing_events_cpu, cpu) = 1;

this_cpu_write(tracing_events_cpu, 1);

> +}
> +
> +/*
> + * Called when both preempt and irq are turned back on
> + */
> +static inline void
> +stop_critical_event(unsigned long ip, unsigned long parent_ip)
> +{
> + int cpu;
> +
> + cpu = raw_smp_processor_id();

Same here.

> +
> + if (unlikely(per_cpu(tracing_events_cpu, cpu)))
> + per_cpu(tracing_events_cpu, cpu) = 0;
> + else
> + return;
> +
> + trace_critical_end(ip, parent_ip);

Are you not worried about recursion here? There's no protection.
Wouldn't it be better to have:

if (!this_cpu_read(tracing_events_cpu))
return;

trace_critical_end(ip, parent_ip);

this_cpu_write(tracing_events_cpu, 0);

?

> +}

Add:

#else

static inline void start_critical_event(unsigned long ip,
unsigned long pip) { }
static inline void stop_critical_event(unsigned long ip,
unsigned long pip) { }

static inline int trace_critical_start_enabled() { return 0; }
static inline int trace_critical_end_enabled() { return 0; }

> +#endif
> +
> static inline void
> start_critical_timing(unsigned long ip, unsigned long parent_ip)
> {
> @@ -434,6 +475,10 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
> /* start and stop critical timings used to for stoppage (in idle) */
> void start_critical_timings(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (irqs_disabled() || preempt_count())
> + start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

And replace the above with:

if (trace_critical_start_enabled())
start_critical_event(CALLER_ADDR0, CALLER_ADDR1);

to get rid of the #ifdef embedded in the code.

Also, notice the use of "trace_critical_start_enabled()".

> if (preempt_trace() || irq_trace())
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -441,6 +486,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
>
> void stop_critical_timings(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (irqs_disabled() || preempt_count())
> + stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

Same here, but for the stop functions.

> if (preempt_trace() || irq_trace())
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -483,6 +532,10 @@ inline void print_irqtrace_events(struct task_struct *curr)
> */
> void trace_hardirqs_on(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -490,6 +543,10 @@ EXPORT_SYMBOL(trace_hardirqs_on);
>
> void trace_hardirqs_off(void)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
> }
> @@ -497,6 +554,10 @@ EXPORT_SYMBOL(trace_hardirqs_off);
>
> __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + stop_critical_event(CALLER_ADDR0, caller_addr);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> stop_critical_timing(CALLER_ADDR0, caller_addr);
> }
> @@ -504,6 +565,10 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
>
> __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!preempt_count())
> + start_critical_event(CALLER_ADDR0, caller_addr);
> +#endif

and here.

> if (!preempt_trace() && irq_trace())
> start_critical_timing(CALLER_ADDR0, caller_addr);
> }
> @@ -515,12 +580,20 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
> #ifdef CONFIG_PREEMPT_TRACER
> void trace_preempt_on(unsigned long a0, unsigned long a1)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!irqs_disabled())
> + stop_critical_event(a0, a1);
> +#endif

and here.

> if (preempt_trace() && !irq_trace())
> stop_critical_timing(a0, a1);
> }
>
> void trace_preempt_off(unsigned long a0, unsigned long a1)
> {
> +#ifdef CONFIG_TRACE_CRITICAL_SECTION_EVENTS
> + if (!irqs_disabled())
> + start_critical_event(a0, a1);
> +#endif

and here.

(that was repetitive).

-- Steve

> if (preempt_trace() && !irq_trace())
> start_critical_timing(a0, a1);
> }

2017-06-30 21:03:17

by Joel Fernandes

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

Hi Steven,

Thanks a lot for the comments, I agree with all of them and had a
comment about one of them:

On Fri, Jun 30, 2017 at 5:51 AM, Steven Rostedt <[email protected]> wrote:
[..]
> Are you not worried about recursion here? There's no protection.
> Wouldn't it be better to have:
>
> if (!this_cpu_read(tracing_events_cpu))
> return;
>
> trace_critical_end(ip, parent_ip);
>
> this_cpu_write(tracing_events_cpu, 0);
>
> ?
>

I tried to go over some scenarios and I think it shouldn't be a
problem because we start the critical event only when either
interrupts are turned off while preemption is turned on, or preempt is
turned off while interrupts are turned on, and the fact that we call
the tracer while still in the critical section. Let me know if you had
a scenario in mind that can cause problems with this.

Anyway, I will rearrange the code like you suggested just to be extra safe,

Thanks,
Joel

2017-06-30 21:07:51

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

On Fri, 30 Jun 2017 14:03:14 -0700
Joel Fernandes <[email protected]> wrote:

> Hi Steven,
>
> Thanks a lot for the comments, I agree with all of them and had a
> comment about one of them:
>
> On Fri, Jun 30, 2017 at 5:51 AM, Steven Rostedt <[email protected]> wrote:
> [..]
> > Are you not worried about recursion here? There's no protection.
> > Wouldn't it be better to have:
> >
> > if (!this_cpu_read(tracing_events_cpu))
> > return;
> >
> > trace_critical_end(ip, parent_ip);
> >
> > this_cpu_write(tracing_events_cpu, 0);
> >
> > ?
> >
>
> I tried to go over some scenarios and I think it shouldn't be a
> problem because we start the critical event only when either
> interrupts are turned off while preemption is turned on, or preempt is
> turned off while interrupts are turned on, and the fact that we call
> the tracer while still in the critical section. Let me know if you had
> a scenario in mind that can cause problems with this.

Then may I ask what is tracing_events_cpu actually protecting?

-- Steve

2017-06-30 21:24:50

by Joel Fernandes

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

On Fri, Jun 30, 2017 at 2:07 PM, Steven Rostedt <[email protected]> wrote:
> On Fri, 30 Jun 2017 14:03:14 -0700
> Joel Fernandes <[email protected]> wrote:
>
>> Hi Steven,
>>
>> Thanks a lot for the comments, I agree with all of them and had a
>> comment about one of them:
>>
>> On Fri, Jun 30, 2017 at 5:51 AM, Steven Rostedt <[email protected]> wrote:
>> [..]
>> > Are you not worried about recursion here? There's no protection.
>> > Wouldn't it be better to have:
>> >
>> > if (!this_cpu_read(tracing_events_cpu))
>> > return;
>> >
>> > trace_critical_end(ip, parent_ip);
>> >
>> > this_cpu_write(tracing_events_cpu, 0);
>> >
>> > ?
>> >
>>
>> I tried to go over some scenarios and I think it shouldn't be a
>> problem because we start the critical event only when either
>> interrupts are turned off while preemption is turned on, or preempt is
>> turned off while interrupts are turned on, and the fact that we call
>> the tracer while still in the critical section. Let me know if you had
>> a scenario in mind that can cause problems with this.
>
> Then may I ask what is tracing_events_cpu actually protecting?

Yes actually I think its not needed considering the above. When I was
developing the code I wanted to do something similar
start_critical_timing which has a tracing_cpu per-cpu variable, and at
the time wasn't sure if it was needed so I just added it in anyway.

Since the case of this patch is more specific (both preempt and irq
turned back on to mark end of critical section), it may not be needed
and I can drop it. I will think some more about it as well let you
know if I find a scenario that says otherwise.

Thanks so much for making the patch better!!

Regards,
Joel

2017-06-30 23:21:44

by Joel Fernandes

[permalink] [raw]
Subject: Re: [RFC] tracing: Add support for critical section event tracing

Hi Steven,

On Jun 30, 2017 5:32 AM, "Steven Rostedt" <[email protected]> wrote:
[..]
>> Did you have any comments about this patch? It was sent a while ago
>> and if you can provide me your initial thoughts on it, I would
>> appreciate it. (Sorry to ping you about it during the busy merge
>> window time, but I was thinking if I can get any initial early
>> comments from you then I can get to work on it and finish in time well
>> before the next merge window).

> Sorry for missing that. It's still in my INBOX in the "to look at"
> markings ;-)

Cool, no problem :-)

> Actually, have you seen Tom Zanussi's patch series about histograms.
> "tracing: Inter-event (e.g. latency) support". He just recently posted
> another series.

I was at his talk during ELC. I will take a look at his new patches as
well today. Its quite a big series ;-)

> I'd like to get that in before looking at adding events into the
> critical sections. But that said, I'll still take a look at your patch.

I feel though that the inter event stuff is a bit independent of the
critical events. Other than the fact that the irqsoff tracer can
possibly be rewritten to use inter-events, I didn't see how the
critical events work depends on the inter event stuff, or blocks it as
such. Just checking if there is another reason that you see it can
conflict?

> Thanks for reminding me.

Thanks for making time for the review :-)

Regards,
Joel