These patches add critical section event tracing support.
Changes since RFC [1] are:
- Make critical events independent of the preempt/irqsoff tracer.
- Store pointers instead of symbol names to do less in the fast path.
- Other changes based on Steven's comments.
Here's an example of how Android's systrace will be using it to show critical
sections as a gantt chart: http://imgur.com/download/TZplEVp
[1] https://patchwork.kernel.org/patch/9676579/
Joel Fernandes (2):
tracing: irqsoff: Move code under tracer config options
tracing: Add support for critical section events
include/linux/ftrace.h | 2 +-
include/linux/irqflags.h | 3 +-
include/trace/events/critical.h | 45 ++++++
kernel/trace/Kconfig | 5 +
kernel/trace/Makefile | 1 +
kernel/trace/trace_irqsoff.c | 331 ++++++++++++++++++++++++++--------------
6 files changed, 269 insertions(+), 118 deletions(-)
create mode 100644 include/trace/events/critical.h
Cc: Steven Rostedt <[email protected]>
Cc: [email protected]
--
2.14.1.581.gf28d330327-goog
In preparation for adding critical sections events such that
they don't depend on enabling of tracers, move tracer specific
code to their config options and common code that would be needed
by a subsequent patch into a common area. This patch is only
code movement patch.
Cc: Steven Rostedt <[email protected]>
Cc: [email protected]
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/trace/trace_irqsoff.c | 242 ++++++++++++++++++++++---------------------
1 file changed, 126 insertions(+), 116 deletions(-)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 7758bc0617cb..edd52ee8b756 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -16,6 +16,7 @@
#include "trace.h"
+#if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
@@ -35,27 +36,6 @@ static int save_flags;
static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
static int start_irqsoff_tracer(struct trace_array *tr, int graph);
-#ifdef CONFIG_PREEMPT_TRACER
-static inline int
-preempt_trace(void)
-{
- return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
-}
-#else
-# define preempt_trace() (0)
-#endif
-
-#ifdef CONFIG_IRQSOFF_TRACER
-static inline int
-irq_trace(void)
-{
- return ((trace_type & TRACER_IRQS_OFF) &&
- irqs_disabled());
-}
-#else
-# define irq_trace() (0)
-#endif
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int irqsoff_display_graph(struct trace_array *tr, int set);
# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH)
@@ -431,101 +411,6 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
atomic_dec(&data->disabled);
}
-/* start and stop critical timings used to for stoppage (in idle) */
-void start_critical_timings(void)
-{
- if (preempt_trace() || irq_trace())
- start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-EXPORT_SYMBOL_GPL(start_critical_timings);
-
-void stop_critical_timings(void)
-{
- if (preempt_trace() || irq_trace())
- stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-EXPORT_SYMBOL_GPL(stop_critical_timings);
-
-#ifdef CONFIG_IRQSOFF_TRACER
-#ifdef CONFIG_PROVE_LOCKING
-void time_hardirqs_on(unsigned long a0, unsigned long a1)
-{
- if (!preempt_trace() && irq_trace())
- stop_critical_timing(a0, a1);
-}
-
-void time_hardirqs_off(unsigned long a0, unsigned long a1)
-{
- if (!preempt_trace() && irq_trace())
- start_critical_timing(a0, a1);
-}
-
-#else /* !CONFIG_PROVE_LOCKING */
-
-/*
- * Stubs:
- */
-
-void trace_softirqs_on(unsigned long ip)
-{
-}
-
-void trace_softirqs_off(unsigned long ip)
-{
-}
-
-inline void print_irqtrace_events(struct task_struct *curr)
-{
-}
-
-/*
- * We are only interested in hardirq on/off events:
- */
-void trace_hardirqs_on(void)
-{
- if (!preempt_trace() && irq_trace())
- stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-EXPORT_SYMBOL(trace_hardirqs_on);
-
-void trace_hardirqs_off(void)
-{
- if (!preempt_trace() && irq_trace())
- start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-EXPORT_SYMBOL(trace_hardirqs_off);
-
-__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
-{
- if (!preempt_trace() && irq_trace())
- stop_critical_timing(CALLER_ADDR0, caller_addr);
-}
-EXPORT_SYMBOL(trace_hardirqs_on_caller);
-
-__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
-{
- if (!preempt_trace() && irq_trace())
- start_critical_timing(CALLER_ADDR0, caller_addr);
-}
-EXPORT_SYMBOL(trace_hardirqs_off_caller);
-
-#endif /* CONFIG_PROVE_LOCKING */
-#endif /* CONFIG_IRQSOFF_TRACER */
-
-#ifdef CONFIG_PREEMPT_TRACER
-void trace_preempt_on(unsigned long a0, unsigned long a1)
-{
- if (preempt_trace() && !irq_trace())
- stop_critical_timing(a0, a1);
-}
-
-void trace_preempt_off(unsigned long a0, unsigned long a1)
-{
- if (preempt_trace() && !irq_trace())
- start_critical_timing(a0, a1);
-}
-#endif /* CONFIG_PREEMPT_TRACER */
-
#ifdef CONFIG_FUNCTION_TRACER
static bool function_enabled;
@@ -781,3 +666,128 @@ __init static int init_irqsoff_tracer(void)
return 0;
}
core_initcall(init_irqsoff_tracer);
+
+#else /* CONFIG_IRQSOFF_TRACER || CONFIG_PREEMPT_TRACER */
+
+# define start_critical_timing(x, y) do { } while (0)
+# define stop_critical_timing(x, y) do { } while (0)
+
+#endif
+
+#ifdef CONFIG_PREEMPT_TRACER
+static inline int
+preempt_trace(void)
+{
+ return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
+}
+#else
+# define preempt_trace() (0)
+#endif
+
+#ifdef CONFIG_IRQSOFF_TRACER
+static inline int
+irq_trace(void)
+{
+ return ((trace_type & TRACER_IRQS_OFF) &&
+ irqs_disabled());
+}
+#else
+# define irq_trace() (0)
+#endif
+
+#ifdef CONFIG_PROVE_LOCKING
+#ifdef CONFIG_IRQSOFF_TRACER
+void time_hardirqs_on(unsigned long a0, unsigned long a1)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(a0, a1);
+}
+
+void time_hardirqs_off(unsigned long a0, unsigned long a1)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(a0, a1);
+}
+#endif
+#else /* !CONFIG_PROVE_LOCKING */
+# ifdef CONFIG_IRQSOFF_TRACER
+
+/*
+ * Stubs:
+ */
+
+void trace_softirqs_on(unsigned long ip)
+{
+}
+
+void trace_softirqs_off(unsigned long ip)
+{
+}
+
+inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+
+/*
+ * We are only interested in hardirq on/off events:
+ */
+void trace_hardirqs_on(void)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void trace_hardirqs_off(void)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+
+#endif /* CONFIG_IRQSOFF_TRACER */
+#endif /* CONFIG_PROVE_LOCKING */
+
+#ifdef CONFIG_PREEMPT_TRACER
+void trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+ if (preempt_trace() && !irq_trace())
+ stop_critical_timing(a0, a1);
+}
+
+void trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+ if (preempt_trace() && !irq_trace())
+ start_critical_timing(a0, a1);
+}
+#endif /* CONFIG_PREEMPT_TRACER */
+
+
+/* start and stop critical timings used to for stoppage (in idle) */
+void start_critical_timings(void)
+{
+ if (preempt_trace() || irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL_GPL(start_critical_timings);
+
+void stop_critical_timings(void)
+{
+ if (preempt_trace() || irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL_GPL(stop_critical_timings);
--
2.14.1.581.gf28d330327-goog
Critical section trace events can be used for tracing the start and
end of a critical section which can be used by a trace viewer like
systrace to graphically view the start and end of a critical section and
correlate them with latencies and scheduling issues.
Reason for starting critical section:
Either preemption or interrupts were turned off.
Reason for ending critical section:
Both preemption and interrupts are now turned back on.
Cc: Steven Rostedt <[email protected]>
Cc: [email protected]
Signed-off-by: Joel Fernandes <[email protected]>
---
include/linux/ftrace.h | 2 +-
include/linux/irqflags.h | 3 +-
include/trace/events/critical.h | 45 +++++++++++++++++++
kernel/trace/Kconfig | 5 +++
kernel/trace/Makefile | 1 +
kernel/trace/trace_irqsoff.c | 97 +++++++++++++++++++++++++++++++++++++++--
6 files changed, 147 insertions(+), 6 deletions(-)
create mode 100644 include/trace/events/critical.h
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6383115e9d2c..6020c473a198 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -742,7 +742,7 @@ static inline unsigned long get_lock_parent_ip(void)
static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
#endif
-#ifdef CONFIG_PREEMPT_TRACER
+#if defined(CONFIG_PREEMPT_TRACER) || defined(CONFIG_CRITICAL_SECTION_EVENTS)
extern void trace_preempt_on(unsigned long a0, unsigned long a1);
extern void trace_preempt_off(unsigned long a0, unsigned long a1);
#else
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 9bc050bc81b2..79c372fe9dcb 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -61,7 +61,8 @@ do { \
#endif
#if defined(CONFIG_IRQSOFF_TRACER) || \
- defined(CONFIG_PREEMPT_TRACER)
+ defined(CONFIG_PREEMPT_TRACER) || \
+ defined(CONFIG_CRITICAL_SECTION_EVENTS)
extern void stop_critical_timings(void);
extern void start_critical_timings(void);
#else
diff --git a/include/trace/events/critical.h b/include/trace/events/critical.h
new file mode 100644
index 000000000000..bd07a6a50252
--- /dev/null
+++ b/include/trace/events/critical.h
@@ -0,0 +1,45 @@
+#ifdef CONFIG_CRITICAL_SECTION_EVENTS
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM critical
+
+#if !defined(_TRACE_CRITICAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CRITICAL_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <linux/string.h>
+#include <asm/sections.h>
+
+DECLARE_EVENT_CLASS(critical_template,
+
+ TP_PROTO(unsigned long ip, unsigned long parent_ip),
+
+ TP_ARGS(ip, parent_ip),
+
+ TP_STRUCT__entry(
+ __field(u32, caller_offs)
+ __field(u32, parent_offs)
+ ),
+
+ TP_fast_assign(
+ __entry->caller_offs = (u32)(ip - (unsigned long)_stext);
+ __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext);
+ ),
+
+ TP_printk("caller=%pF parent=%pF\n",
+ (void *)((unsigned long)(_stext) + __entry->caller_offs),
+ (void *)((unsigned long)(_stext) + __entry->parent_offs))
+);
+
+DEFINE_EVENT(critical_template, critical_start,
+ TP_PROTO(unsigned long ip, unsigned long parent_ip),
+ TP_ARGS(ip, parent_ip));
+
+DEFINE_EVENT(critical_template, critical_stop,
+ TP_PROTO(unsigned long ip, unsigned long parent_ip),
+ TP_ARGS(ip, parent_ip));
+#endif /* _TRACE_CRITICAL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 434c840e2d82..c08b23fcae6d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -209,6 +209,11 @@ config PREEMPT_TRACER
enabled. This option and the irqs-off timing option can be
used together or separately.)
+config CRITICAL_SECTION_EVENTS
+ bool "Trace critical sections as trace events"
+ select TRACE_IRQFLAGS
+ default n
+
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 90f2701d92a7..265135e35b79 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
+obj-$(CONFIG_CRITICAL_SECTION_EVENTS) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index edd52ee8b756..7f59f41fbdc0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -13,9 +13,69 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/critical.h>
#include "trace.h"
+#ifdef CONFIG_CRITICAL_SECTION_EVENTS
+static DEFINE_PER_CPU(int, tracing_events_cpu);
+
+/*
+ * Called when either preempt or irq are turned off
+ */
+static inline void
+start_critical_event(unsigned long ip, unsigned long parent_ip)
+{
+ if (this_cpu_read(tracing_events_cpu))
+ return;
+
+ this_cpu_write(tracing_events_cpu, 1);
+ trace_critical_start(ip, parent_ip);
+}
+
+/*
+ * Called when both preempt and irq are turned back on
+ */
+static inline void
+stop_critical_event(unsigned long ip, unsigned long parent_ip)
+{
+ if (!this_cpu_read(tracing_events_cpu))
+ return;
+
+ trace_critical_stop(ip, parent_ip);
+ this_cpu_write(tracing_events_cpu, 0);
+}
+
+static inline int
+__trace_critical_start_enabled(void)
+{
+ if (irqs_disabled() || preempt_count())
+ return 1;
+
+ return 0;
+}
+
+static inline int
+__trace_critical_stop_enabled(void)
+{
+ if ((irqs_disabled() && !preempt_count()) ||
+ (!irqs_disabled() && preempt_count()))
+ return 1;
+
+ return 0;
+}
+#else
+
+static inline void start_critical_event(unsigned long ip,
+ unsigned long pip) { }
+static inline void stop_critical_event(unsigned long ip,
+ unsigned long pip) { }
+
+static inline int __trace_critical_start_enabled(void) { return 0; }
+static inline int __trace_critical_stop_enabled(void) { return 0; }
+#endif
+
#if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
@@ -710,7 +770,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
}
#endif
#else /* !CONFIG_PROVE_LOCKING */
-# ifdef CONFIG_IRQSOFF_TRACER
+# if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_CRITICAL_SECTION_EVENTS)
/*
* Stubs:
@@ -733,6 +793,9 @@ inline void print_irqtrace_events(struct task_struct *curr)
*/
void trace_hardirqs_on(void)
{
+ if (__trace_critical_stop_enabled())
+ stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -740,6 +803,9 @@ EXPORT_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
+ if (__trace_critical_start_enabled())
+ start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -747,6 +813,9 @@ EXPORT_SYMBOL(trace_hardirqs_off);
__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
+ if (__trace_critical_stop_enabled())
+ stop_critical_event(CALLER_ADDR0, caller_addr);
+
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
@@ -754,32 +823,44 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
+ if (__trace_critical_start_enabled())
+ start_critical_event(CALLER_ADDR0, caller_addr);
+
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
-#endif /* CONFIG_IRQSOFF_TRACER */
+#endif /* CONFIG_IRQSOFF_TRACER || CONFIG_CRITICAL_SECTION_EVENTS */
#endif /* CONFIG_PROVE_LOCKING */
-#ifdef CONFIG_PREEMPT_TRACER
+#if defined(CONFIG_PREEMPT_TRACER) || defined(CONFIG_CRITICAL_SECTION_EVENTS)
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
+ if (__trace_critical_stop_enabled())
+ stop_critical_event(a0, a1);
+
if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
+ if (__trace_critical_start_enabled())
+ start_critical_event(a0, a1);
+
if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
}
-#endif /* CONFIG_PREEMPT_TRACER */
+#endif /* CONFIG_PREEMPT_TRACER || CONFIG_CRITICAL_SECTION_EVENTS */
/* start and stop critical timings used to for stoppage (in idle) */
void start_critical_timings(void)
{
+ if (__trace_critical_start_enabled())
+ start_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
@@ -787,6 +868,14 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
void stop_critical_timings(void)
{
+ /*
+ * we have to use *_start_enabled instead of *_stop_enabled
+ * because the condition for stopping critical timings is
+ * whether a critical section was entered before entering idle
+ */
+ if (__trace_critical_start_enabled())
+ stop_critical_event(CALLER_ADDR0, CALLER_ADDR1);
+
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
--
2.14.1.581.gf28d330327-goog
On Sun, Sep 03, 2017 at 01:50:51AM -0700, Joel Fernandes wrote:
> Critical section trace events can be used for tracing the start and
> end of a critical section which can be used by a trace viewer like
> systrace to graphically view the start and end of a critical section and
> correlate them with latencies and scheduling issues.
>
> Reason for starting critical section:
> Either preemption or interrupts were turned off.
> Reason for ending critical section:
> Both preemption and interrupts are now turned back on.
Please don't use the name critical section for this. Also IRQ and
preempt already have a gazillion trace hooks, why do we need more?
Hi Peter,
On Mon, Sep 4, 2017 at 12:56 AM, Peter Zijlstra <[email protected]> wrote:
> On Sun, Sep 03, 2017 at 01:50:51AM -0700, Joel Fernandes wrote:
>> Critical section trace events can be used for tracing the start and
>> end of a critical section which can be used by a trace viewer like
>> systrace to graphically view the start and end of a critical section and
>> correlate them with latencies and scheduling issues.
>>
>> Reason for starting critical section:
>> Either preemption or interrupts were turned off.
>> Reason for ending critical section:
>> Both preemption and interrupts are now turned back on.
>
> Please don't use the name critical section for this.
I can change the name to something else, but at the moment I can't
think of anything better. Could you suggest a better name? Also btw,
'critical timings' is the terminology used within the irqsoff tracer
so this is in line with that.
> Also IRQ and preempt already have a gazillion trace hooks, why do we need more?
I think I didn't CC you on the coverletter (that's my bad). That's Ok
I'll point to links below.
The goal of the patch is not to add more hooks, but to make it
possible for userspace to use the trace-events mechanism to see these
events, so that these events can be seen along with other trace events
when using event tracing, using a convenient trace event interface
which can be enabled by userspace. This makes it possible to visually
see these events as a timeline along with other kernel and userspace
events. See more description in my coverletter [1] and a screenshot of
how this is used [2] by the systrace trace viewer. These patches make
it possible for this.
Also, this work is along the same lines of what we discussed in a
recent conference about adding irqsoff and preemptoff as real trace
events, and then using synthetic events (which can combine start and
end event into a single event) to redo the irqsoff tracer, so in that
sense it is in the right direction. Synthetic events is some time away
though from being merged AFAIK.
thanks,
- Joel
[1] https://lkml.org/lkml/2017/9/3/15
[2] http://imgur.com/download/TZplEVp
[3] all patches:
https://patchwork.kernel.org/patch/9936047/
https://patchwork.kernel.org/patch/9936045/
On Mon, Sep 04, 2017 at 12:04:44PM -0700, Joel Fernandes wrote:
> Hi Peter,
>
> On Mon, Sep 4, 2017 at 12:56 AM, Peter Zijlstra <[email protected]> wrote:
> > On Sun, Sep 03, 2017 at 01:50:51AM -0700, Joel Fernandes wrote:
> >> Critical section trace events can be used for tracing the start and
> >> end of a critical section which can be used by a trace viewer like
> >> systrace to graphically view the start and end of a critical section and
> >> correlate them with latencies and scheduling issues.
> >>
> >> Reason for starting critical section:
> >> Either preemption or interrupts were turned off.
> >> Reason for ending critical section:
> >> Both preemption and interrupts are now turned back on.
> >
> > Please don't use the name critical section for this.
>
> I can change the name to something else, but at the moment I can't
> think of anything better. Could you suggest a better name? Also btw,
> 'critical timings' is the terminology used within the irqsoff tracer
> so this is in line with that.
So 'critical section' is what some mis-guided people call the locked
region of a lock :-) Using it for something else is prone to cause more
confusion...
I would simply call them what they are: irq_disable,irq_enable
preempt_disable,preempt_enable.
> > Also IRQ and preempt already have a gazillion trace hooks, why do we need more?
>
> The goal of the patch is not to add more hooks, but to make it
> possible for userspace to use the trace-events mechanism to see these
> events, so that these events can be seen along with other trace events
> when using event tracing, using a convenient trace event interface
> which can be enabled by userspace.
But only when you're already building a debug kernel and we already have
these irq/preempt trace things in, right?
> This makes it possible to visually
> see these events as a timeline along with other kernel and userspace
> events. See more description in my coverletter [1] and a screenshot of
> how this is used [2] by the systrace trace viewer. These patches make
> it possible for this.
Yeah, for some reason these graphics things don't work for me... And
regular traces already have the irq-off and preempt-depth column, which
typically is enough. But I suppose I can see the value of allowing
explicit events for them.
> Also, this work is along the same lines of what we discussed in a
> recent conference about adding irqsoff and preemptoff as real trace
> events, and then using synthetic events (which can combine start and
> end event into a single event) to redo the irqsoff tracer, so in that
> sense it is in the right direction. Synthetic events is some time away
> though from being merged AFAIK.
I have no memories of that, but that sounds OK :-)
On Mon, 4 Sep 2017 21:44:26 +0200
Peter Zijlstra <[email protected]> wrote:
> > I can change the name to something else, but at the moment I can't
> > think of anything better. Could you suggest a better name? Also btw,
> > 'critical timings' is the terminology used within the irqsoff tracer
> > so this is in line with that.
>
> So 'critical section' is what some mis-guided people call the locked
> region of a lock :-) Using it for something else is prone to cause more
> confusion...
>
> I would simply call them what they are: irq_disable,irq_enable
> preempt_disable,preempt_enable.
Yes please. The "critical section" naming came from the code that was
from the latency tracer days of the real time patch (pre-ftrace). The
irqsoff tracer has the least modification from the original code, and
probably should be rewritten one of these days.
-- Steve
On Mon, Sep 4, 2017 at 4:34 PM, Steven Rostedt <[email protected]> wrote:
> On Mon, 4 Sep 2017 21:44:26 +0200
> Peter Zijlstra <[email protected]> wrote:
>
>> > I can change the name to something else, but at the moment I can't
>> > think of anything better. Could you suggest a better name? Also btw,
>> > 'critical timings' is the terminology used within the irqsoff tracer
>> > so this is in line with that.
>>
>> So 'critical section' is what some mis-guided people call the locked
>> region of a lock :-) Using it for something else is prone to cause more
>> confusion...
>>
>> I would simply call them what they are: irq_disable,irq_enable
>> preempt_disable,preempt_enable.
>
> Yes please. The "critical section" naming came from the code that was
> from the latency tracer days of the real time patch (pre-ftrace). The
> irqsoff tracer has the least modification from the original code, and
> probably should be rewritten one of these days.
Sounds good to me. For the subsystem, could you guys suggest a name? I
was thinking "atomic_section"?
Something like:
subsystem: atomic_section
events:
irqsoff_disable
irqsoff_enable
preemptoff_disable
preemptoff_enable
and additionally (to do what my patch does):
preemptirqsoff_enable
preemptirqsoff_disable
Does this sound good to you?
I did bulk of the work today for moving code from the irqsoff tracer
into a common area that's irqsoff tracer specific, and now I'm getting
close to the point of creating the above tracepoints and posting
another series, but I wanted to get your thoughts on the above.
I like this new approach better because the next logically step ontop
of this work can be to rewrite irqsoff tracer by using probes on the
above tracepoints (which I'll do once we can get these in).
thanks,
- Joel
On Mon, Sep 4, 2017 at 5:54 PM, Joel Fernandes <[email protected]> wrote:
> On Mon, Sep 4, 2017 at 4:34 PM, Steven Rostedt <[email protected]> wrote:
>> On Mon, 4 Sep 2017 21:44:26 +0200
>> Peter Zijlstra <[email protected]> wrote:
>>
>>> > I can change the name to something else, but at the moment I can't
>>> > think of anything better. Could you suggest a better name? Also btw,
>>> > 'critical timings' is the terminology used within the irqsoff tracer
>>> > so this is in line with that.
>>>
>>> So 'critical section' is what some mis-guided people call the locked
>>> region of a lock :-) Using it for something else is prone to cause more
>>> confusion...
>>>
>>> I would simply call them what they are: irq_disable,irq_enable
>>> preempt_disable,preempt_enable.
>>
>> Yes please. The "critical section" naming came from the code that was
>> from the latency tracer days of the real time patch (pre-ftrace). The
>> irqsoff tracer has the least modification from the original code, and
>> probably should be rewritten one of these days.
>
> Sounds good to me. For the subsystem, could you guys suggest a name? I
> was thinking "atomic_section"?
>
> Something like:
>
> subsystem: atomic_section
> events:
> irqsoff_disable
> irqsoff_enable
> preemptoff_disable
> preemptoff_enable
>
> and additionally (to do what my patch does):
> preemptirqsoff_enable
> preemptirqsoff_disable
Apologies, I meant (without the "off"):
subsystem: atomic_section
events:
irqs_disable
irqs_enable
preempt_disable
preempt_enable
and additionally (similar to what my patch does):
preemptirq_enable
preemptirq_disable
Could you let me know if we are in agreement about this naming?
thanks,
-Joel
On Mon, Sep 04, 2017 at 08:26:13PM -0700, Joel Fernandes wrote:
> Apologies, I meant (without the "off"):
>
> subsystem: atomic_section
> events:
> irqs_disable
> irqs_enable
> preempt_disable
> preempt_enable
>
> and additionally (similar to what my patch does):
> preemptirq_enable
> preemptirq_disable
>
What do you need the last for?
On Mon, Sep 4, 2017 at 11:52 PM, Peter Zijlstra <[email protected]> wrote:
> On Mon, Sep 04, 2017 at 08:26:13PM -0700, Joel Fernandes wrote:
>
>> Apologies, I meant (without the "off"):
>>
>> subsystem: atomic_section
>> events:
>> irqs_disable
>> irqs_enable
>> preempt_disable
>> preempt_enable
>>
>> and additionally (similar to what my patch does):
>> preemptirq_enable
>> preemptirq_disable
>>
>
> What do you need the last for?
The last 2 events above behave as 'disable' means either preempt or
irq got disabled, and 'enable' means *both* preempt and irq are
enabled (after either one of them was disabled).
This has the advantage of not generating events when we're already in
an atomic section when using these events, for example acquiring spin
locks in an interrupt handler might increase the preempt count and
generate 'preempt_disable' events, but not preemptirq_disable events.
This has the effect of reducing the spam in the traces when all we
care about is being in an atomic section or not. These events happen a
lot so to conserve space in the trace buffer, the user may want to
just enable the latter 2 events. Does that sound Ok to you?
thanks,
- Joel
On Tue, Sep 05, 2017 at 09:35:11AM -0700, Joel Fernandes wrote:
> On Mon, Sep 4, 2017 at 11:52 PM, Peter Zijlstra <[email protected]> wrote:
> > On Mon, Sep 04, 2017 at 08:26:13PM -0700, Joel Fernandes wrote:
> >
> >> Apologies, I meant (without the "off"):
> >>
> >> subsystem: atomic_section
> >> events:
> >> irqs_disable
> >> irqs_enable
> >> preempt_disable
> >> preempt_enable
> >>
> >> and additionally (similar to what my patch does):
> >> preemptirq_enable
> >> preemptirq_disable
> >>
> >
> > What do you need the last for?
>
> The last 2 events above behave as 'disable' means either preempt or
> irq got disabled, and 'enable' means *both* preempt and irq are
> enabled (after either one of them was disabled).
>
> This has the advantage of not generating events when we're already in
> an atomic section when using these events, for example acquiring spin
> locks in an interrupt handler might increase the preempt count and
> generate 'preempt_disable' events, but not preemptirq_disable events.
> This has the effect of reducing the spam in the traces when all we
> care about is being in an atomic section or not. These events happen a
> lot so to conserve space in the trace buffer, the user may want to
> just enable the latter 2 events. Does that sound Ok to you?
Hurm,... how about placing a filter on the other 4, such that we only
emit the event on 0<->x state transitions? IIRC tracing already has
filter bits and eBPF bits on that allow something like that.
That avoids having to introduce more tracepoints and gets you the same
results.
Hi Peter,
On Wed, Sep 6, 2017 at 1:40 AM, Peter Zijlstra <[email protected]> wrote:
> On Tue, Sep 05, 2017 at 09:35:11AM -0700, Joel Fernandes wrote:
>> On Mon, Sep 4, 2017 at 11:52 PM, Peter Zijlstra <[email protected]> wrote:
>> > On Mon, Sep 04, 2017 at 08:26:13PM -0700, Joel Fernandes wrote:
>> >
>> >> Apologies, I meant (without the "off"):
>> >>
>> >> subsystem: atomic_section
>> >> events:
>> >> irqs_disable
>> >> irqs_enable
>> >> preempt_disable
>> >> preempt_enable
>> >>
>> >> and additionally (similar to what my patch does):
>> >> preemptirq_enable
>> >> preemptirq_disable
>> >>
>> >
>> > What do you need the last for?
>>
>> The last 2 events above behave as 'disable' means either preempt or
>> irq got disabled, and 'enable' means *both* preempt and irq are
>> enabled (after either one of them was disabled).
>>
>> This has the advantage of not generating events when we're already in
>> an atomic section when using these events, for example acquiring spin
>> locks in an interrupt handler might increase the preempt count and
>> generate 'preempt_disable' events, but not preemptirq_disable events.
>> This has the effect of reducing the spam in the traces when all we
>> care about is being in an atomic section or not. These events happen a
>> lot so to conserve space in the trace buffer, the user may want to
>> just enable the latter 2 events. Does that sound Ok to you?
>
> Hurm,... how about placing a filter on the other 4, such that we only
> emit the event on 0<->x state transitions? IIRC tracing already has
> filter bits and eBPF bits on that allow something like that.
>
> That avoids having to introduce more tracepoints and gets you the same
> results.
Sure, that sounds fine to me. I dropped the last 2 events from a
repost of the series since we can add that in (combined preempt and
irq) at a later time using similar methods as you're suggesting.
thanks,
-Joel