2012-11-13 20:18:38

by David Sharp

[permalink] [raw]
Subject: [PATCH v9 1/3] tracing,x86: Add a TSC trace_clock

In order to promote interoperability between userspace tracers and ftrace,
add a trace_clock that reports raw TSC values which will then be recorded
in the ring buffer. Userspace tracers that also record TSCs are then on
exactly the same time base as the kernel and events can be unambiguously
interlaced.

Tested: Enabled a tracepoint and the "tsc" trace_clock and saw very large
timestamp values.

v2:
Move arch-specific bits out of generic code.
v3:
Rename "x86-tsc", cleanups
v7:
Generic arch bits in Kbuild.

Google-Bug-Id: 6980623
Signed-off-by: David Sharp <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
---
arch/alpha/include/asm/Kbuild | 1 +
arch/arm/include/asm/Kbuild | 1 +
arch/arm64/include/asm/Kbuild | 1 +
arch/avr32/include/asm/Kbuild | 1 +
arch/blackfin/include/asm/Kbuild | 1 +
arch/c6x/include/asm/Kbuild | 1 +
arch/cris/include/asm/Kbuild | 1 +
arch/frv/include/asm/Kbuild | 1 +
arch/h8300/include/asm/Kbuild | 1 +
arch/hexagon/include/asm/Kbuild | 1 +
arch/ia64/include/asm/Kbuild | 1 +
arch/m32r/include/asm/Kbuild | 1 +
arch/m68k/include/asm/Kbuild | 1 +
arch/microblaze/include/asm/Kbuild | 1 +
arch/mips/include/asm/Kbuild | 1 +
arch/mn10300/include/asm/Kbuild | 1 +
arch/openrisc/include/asm/Kbuild | 1 +
arch/parisc/include/asm/Kbuild | 1 +
arch/powerpc/include/asm/Kbuild | 1 +
arch/s390/include/asm/Kbuild | 1 +
arch/score/include/asm/Kbuild | 1 +
arch/sh/include/asm/Kbuild | 1 +
arch/sparc/include/asm/Kbuild | 1 +
arch/tile/include/asm/Kbuild | 1 +
arch/um/include/asm/Kbuild | 1 +
arch/unicore32/include/asm/Kbuild | 1 +
arch/x86/include/asm/trace_clock.h | 20 ++++++++++++++++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/trace_clock.c | 21 +++++++++++++++++++++
arch/xtensa/include/asm/Kbuild | 1 +
include/asm-generic/trace_clock.h | 16 ++++++++++++++++
include/linux/trace_clock.h | 2 ++
kernel/trace/trace.c | 1 +
33 files changed, 88 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/include/asm/trace_clock.h
create mode 100644 arch/x86/kernel/trace_clock.c
create mode 100644 include/asm-generic/trace_clock.h

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 64ffc9e..dcfabb9 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -11,3 +11,4 @@ header-y += reg.h
header-y += regdef.h
header-y += sysinfo.h
generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f70ae17..514e398 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -31,5 +31,6 @@ generic-y += sockios.h
generic-y += termbits.h
generic-y += termios.h
generic-y += timex.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += unaligned.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index a581a22..6e9ca46 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -43,6 +43,7 @@ generic-y += swab.h
generic-y += termbits.h
generic-y += termios.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += unaligned.h
generic-y += user.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 4807ded..4dd4f78 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -1,3 +1,4 @@

generic-y += clkdev.h
generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 5a0625a..27d7075 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -38,6 +38,7 @@ generic-y += statfs.h
generic-y += termbits.h
generic-y += termios.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += ucontext.h
generic-y += unaligned.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 112a496..eae7b59 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -49,6 +49,7 @@ generic-y += termbits.h
generic-y += termios.h
generic-y += tlbflush.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += ucontext.h
generic-y += user.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 6d43a95..15a122c 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -11,3 +11,4 @@ header-y += sync_serial.h
generic-y += clkdev.h
generic-y += exec.h
generic-y += module.h
+generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 4a159da..c5d7670 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -1,3 +1,4 @@

generic-y += clkdev.h
generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 50bbf38..4bc8ae7 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
generic-y += clkdev.h
generic-y += exec.h
generic-y += module.h
+generic-y += trace_clock.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 3bfa9b3..bdb54ce 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -48,6 +48,7 @@ generic-y += stat.h
generic-y += termbits.h
generic-y += termios.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += ucontext.h
generic-y += unaligned.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index dd02f09..05b03ec 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -2,3 +2,4 @@
generic-y += clkdev.h
generic-y += exec.h
generic-y += kvm_para.h
+generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 50bbf38..4bc8ae7 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
generic-y += clkdev.h
generic-y += exec.h
generic-y += module.h
+generic-y += trace_clock.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 88fa3ac..7f1949c 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += sections.h
generic-y += siginfo.h
generic-y += statfs.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += word-at-a-time.h
generic-y += xor.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 8653072..2957fcc 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
header-y += elf.h
generic-y += clkdev.h
generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 533053d..9b54b7a 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1 +1,2 @@
# MIPS headers
+generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 4a159da..c5d7670 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1,3 +1,4 @@

generic-y += clkdev.h
generic-y += exec.h
+generic-y += trace_clock.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 78de680..8971026 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -60,6 +60,7 @@ generic-y += swab.h
generic-y += termbits.h
generic-y += termios.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += ucontext.h
generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index bac8deb..ff4c9fa 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -3,3 +3,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \
div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
poll.h xor.h clkdev.h exec.h
+generic-y += trace_clock.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a4fe15e..2d62b48 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -2,3 +2,4 @@

generic-y += clkdev.h
generic-y += rwsem.h
+generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 0633dc6..f313f9c 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,3 +1,4 @@


generic-y += clkdev.h
+generic-y += trace_clock.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index ec697ae..16e41fe 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm
header-y +=

generic-y += clkdev.h
+generic-y += trace_clock.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 29f83be..280bea9 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -31,5 +31,6 @@ generic-y += socket.h
generic-y += statfs.h
generic-y += termbits.h
generic-y += termios.h
+generic-y += trace_clock.h
generic-y += ucontext.h
generic-y += xor.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 645a58d..e26d430 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -8,4 +8,5 @@ generic-y += local64.h
generic-y += irq_regs.h
generic-y += local.h
generic-y += module.h
+generic-y += trace_clock.h
generic-y += word-at-a-time.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 6948015..b17b9b8 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -34,5 +34,6 @@ generic-y += sockios.h
generic-y += statfs.h
generic-y += termbits.h
generic-y += termios.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += xor.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 0f6e7b3..b30f34a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -2,3 +2,4 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
generic-y += switch_to.h clkdev.h
+generic-y += trace_clock.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index c910c98..7be503e 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -54,6 +54,7 @@ generic-y += syscalls.h
generic-y += termbits.h
generic-y += termios.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += types.h
generic-y += ucontext.h
generic-y += unaligned.h
diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
new file mode 100644
index 0000000..5c16527
--- /dev/null
+++ b/arch/x86/include/asm/trace_clock.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_TRACE_CLOCK_H
+#define _ASM_X86_TRACE_CLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_X86_TSC
+
+extern u64 notrace trace_clock_x86_tsc(void);
+
+# define ARCH_TRACE_CLOCKS \
+ { trace_clock_x86_tsc, "x86-tsc" },
+
+#else /* !CONFIG_X86_TSC */
+
+#define ARCH_TRACE_CLOCKS
+
+#endif
+
+#endif /* _ASM_X86_TRACE_CLOCK_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9fd5eed..34e923a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
new file mode 100644
index 0000000..25b9937
--- /dev/null
+++ b/arch/x86/kernel/trace_clock.c
@@ -0,0 +1,21 @@
+/*
+ * X86 trace clocks
+ */
+#include <asm/trace_clock.h>
+#include <asm/barrier.h>
+#include <asm/msr.h>
+
+/*
+ * trace_clock_x86_tsc(): A clock that is just the cycle counter.
+ *
+ * Unlike the other clocks, this is not in nanoseconds.
+ */
+u64 notrace trace_clock_x86_tsc(void)
+{
+ u64 ret;
+
+ rdtsc_barrier();
+ rdtscll(ret);
+
+ return ret;
+}
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 6d13027..095f0a2 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -25,4 +25,5 @@ generic-y += siginfo.h
generic-y += statfs.h
generic-y += termios.h
generic-y += topology.h
+generic-y += trace_clock.h
generic-y += xor.h
diff --git a/include/asm-generic/trace_clock.h b/include/asm-generic/trace_clock.h
new file mode 100644
index 0000000..6726f1b
--- /dev/null
+++ b/include/asm-generic/trace_clock.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_GENERIC_TRACE_CLOCK_H
+#define _ASM_GENERIC_TRACE_CLOCK_H
+/*
+ * Arch-specific trace clocks.
+ */
+
+/*
+ * Additional trace clocks added to the trace_clocks
+ * array in kernel/trace/trace.c
+ * None if the architecture has not defined it.
+ */
+#ifndef ARCH_TRACE_CLOCKS
+# define ARCH_TRACE_CLOCKS
+#endif
+
+#endif /* _ASM_GENERIC_TRACE_CLOCK_H */
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
index 4eb4902..d563f37 100644
--- a/include/linux/trace_clock.h
+++ b/include/linux/trace_clock.h
@@ -12,6 +12,8 @@
#include <linux/compiler.h>
#include <linux/types.h>

+#include <asm/trace_clock.h>
+
extern u64 notrace trace_clock_local(void);
extern u64 notrace trace_clock(void);
extern u64 notrace trace_clock_global(void);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c1434b5..0d20620 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -488,6 +488,7 @@ static struct {
{ trace_clock_local, "local" },
{ trace_clock_global, "global" },
{ trace_clock_counter, "counter" },
+ ARCH_TRACE_CLOCKS
};

int trace_clock_id;
--
1.7.7.3


2012-11-13 20:18:44

by David Sharp

[permalink] [raw]
Subject: [PATCH v9 3/3] ftrace: Show raw time stamp on stats per cpu using counter or tsc mode for trace_clock

From: Yoshihiro YUNOMAE <[email protected]>

Show raw time stamp values for stats per cpu if you choose counter or tsc mode
for trace_clock. Although a unit of tracing time stamp is nsec in local or global mode,
the units in counter and TSC mode are tracing counter and cycles respectively.

Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ingo Molnar <[email protected]>
Signed-off-by: David Sharp <[email protected]>
---
kernel/trace/trace.c | 23 +++++++++++++++++------
1 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d943e69..b69cc38 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4388,13 +4388,24 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
trace_seq_printf(s, "bytes: %ld\n", cnt);

- t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
- usec_rem = do_div(t, USEC_PER_SEC);
- trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
+ if (trace_clocks[trace_clock_id].in_ns) {
+ /* local or global for trace_clock */
+ t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
+ t, usec_rem);
+
+ t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+ } else {
+ /* counter or tsc mode for trace_clock */
+ trace_seq_printf(s, "oldest event ts: %llu\n",
+ ring_buffer_oldest_event_ts(tr->buffer, cpu));

- t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
- usec_rem = do_div(t, USEC_PER_SEC);
- trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+ trace_seq_printf(s, "now ts: %llu\n",
+ ring_buffer_time_stamp(tr->buffer, cpu));
+ }

cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
trace_seq_printf(s, "dropped events: %ld\n", cnt);
--
1.7.7.3

2012-11-13 20:18:41

by David Sharp

[permalink] [raw]
Subject: [PATCH v9 2/3] tracing: Format non-nanosec times from tsc clock without a decimal point.

With the addition of the "tsc" clock, formatting timestamps to look like
fractional seconds is misleading. Mark clocks as either in nanoseconds or
not, and format non-nanosecond timestamps as decimal integers.

Tested:
$ cd /sys/kernel/debug/tracing/
$ cat trace_clock
[local] global tsc
$ echo sched_switch > set_event
$ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on
$ cat trace
<idle>-0 [000] 6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
sleep-29964 [000] 6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
...
$ echo 1 > options/latency-format
$ cat trace
<idle>-0 0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
sleep-29964 0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
...
$ echo tsc > trace_clock
$ cat trace
$ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on
$ echo 0 > options/latency-format
$ cat trace
<idle>-0 [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
...
echo 1 > options/latency-format
$ cat trace
<idle>-0 0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
sleep-31128 0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
...

v2:
Move arch-specific bits out of generic code.
v4:
Fix x86_32 build due to 64-bit division.

Google-Bug-Id: 6980623
Signed-off-by: David Sharp <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
---
arch/x86/include/asm/trace_clock.h | 2 +-
include/linux/ftrace_event.h | 6 +++
kernel/trace/trace.c | 15 ++++++-
kernel/trace/trace.h | 4 --
kernel/trace/trace_output.c | 78 ++++++++++++++++++++++++-----------
5 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
index 5c16527..beab86c 100644
--- a/arch/x86/include/asm/trace_clock.h
+++ b/arch/x86/include/asm/trace_clock.h
@@ -9,7 +9,7 @@
extern u64 notrace trace_clock_x86_tsc(void);

# define ARCH_TRACE_CLOCKS \
- { trace_clock_x86_tsc, "x86-tsc" },
+ { trace_clock_x86_tsc, "x86-tsc", .in_ns = 0 },

#else /* !CONFIG_X86_TSC */

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b80c8dd..a3d4895 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -86,6 +86,12 @@ struct trace_iterator {
cpumask_var_t started;
};

+enum trace_iter_flags {
+ TRACE_FILE_LAT_FMT = 1,
+ TRACE_FILE_ANNOTATE = 2,
+ TRACE_FILE_TIME_IN_NS = 4,
+};
+

struct trace_event;

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0d20620..d943e69 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -484,10 +484,11 @@ static const char *trace_options[] = {
static struct {
u64 (*func)(void);
const char *name;
+ int in_ns; /* is this clock in nanoseconds? */
} trace_clocks[] = {
- { trace_clock_local, "local" },
- { trace_clock_global, "global" },
- { trace_clock_counter, "counter" },
+ { trace_clock_local, "local", 1 },
+ { trace_clock_global, "global", 1 },
+ { trace_clock_counter, "counter", 0 },
ARCH_TRACE_CLOCKS
};

@@ -2478,6 +2479,10 @@ __tracing_open(struct inode *inode, struct file *file)
if (ring_buffer_overruns(iter->tr->buffer))
iter->iter_flags |= TRACE_FILE_ANNOTATE;

+ /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+ if (trace_clocks[trace_clock_id].in_ns)
+ iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
/* stop the trace while dumping */
tracing_stop();

@@ -3339,6 +3344,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (trace_flags & TRACE_ITER_LATENCY_FMT)
iter->iter_flags |= TRACE_FILE_LAT_FMT;

+ /* Output in nanoseconds only if we are using a clock in nanoseconds. */
+ if (trace_clocks[trace_clock_id].in_ns)
+ iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
iter->cpu_file = cpu_file;
iter->tr = &global_trace;
mutex_init(&iter->mutex);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 55010ed..c75d798 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -406,10 +406,6 @@ void tracing_stop_sched_switch_record(void);
void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
int is_tracing_stopped(void);
-enum trace_file_type {
- TRACE_FILE_LAT_FMT = 1,
- TRACE_FILE_ANNOTATE = 2,
-};

extern cpumask_var_t __read_mostly tracing_buffer_mask;

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 123b189..194d796 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -610,24 +610,54 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
return trace_print_lat_fmt(s, entry);
}

-static unsigned long preempt_mark_thresh = 100;
+static unsigned long preempt_mark_thresh_us = 100;

static int
-lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
- unsigned long rel_usecs)
+lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
{
- return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
- rel_usecs > preempt_mark_thresh ? '!' :
- rel_usecs > 1 ? '+' : ' ');
+ unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
+ unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
+ unsigned long long abs_ts = iter->ts - iter->tr->time_start;
+ unsigned long long rel_ts = next_ts - iter->ts;
+ struct trace_seq *s = &iter->seq;
+
+ if (in_ns) {
+ abs_ts = ns2usecs(abs_ts);
+ rel_ts = ns2usecs(rel_ts);
+ }
+
+ if (verbose && in_ns) {
+ unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC);
+ unsigned long abs_msec = (unsigned long)abs_ts;
+ unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC);
+ unsigned long rel_msec = (unsigned long)rel_ts;
+
+ return trace_seq_printf(
+ s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
+ ns2usecs(iter->ts),
+ abs_msec, abs_usec,
+ rel_msec, rel_usec);
+ } else if (verbose && !in_ns) {
+ return trace_seq_printf(
+ s, "[%016llx] %lld (+%lld): ",
+ iter->ts, abs_ts, rel_ts);
+ } else if (!verbose && in_ns) {
+ return trace_seq_printf(
+ s, " %4lldus%c: ",
+ abs_ts,
+ rel_ts > preempt_mark_thresh_us ? '!' :
+ rel_ts > 1 ? '+' : ' ');
+ } else { /* !verbose && !in_ns */
+ return trace_seq_printf(s, " %4lld: ", abs_ts);
+ }
}

int trace_print_context(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry = iter->ent;
- unsigned long long t = ns2usecs(iter->ts);
- unsigned long usec_rem = do_div(t, USEC_PER_SEC);
- unsigned long secs = (unsigned long)t;
+ unsigned long long t;
+ unsigned long secs, usec_rem;
char comm[TASK_COMM_LEN];
int ret;

@@ -644,8 +674,13 @@ int trace_print_context(struct trace_iterator *iter)
return 0;
}

- return trace_seq_printf(s, " %5lu.%06lu: ",
- secs, usec_rem);
+ if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
+ t = ns2usecs(iter->ts);
+ usec_rem = do_div(t, USEC_PER_SEC);
+ secs = (unsigned long)t;
+ return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem);
+ } else
+ return trace_seq_printf(s, " %12llu: ", iter->ts);
}

int trace_print_lat_context(struct trace_iterator *iter)
@@ -659,36 +694,29 @@ int trace_print_lat_context(struct trace_iterator *iter)
*next_entry = trace_find_next_entry(iter, NULL,
&next_ts);
unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
- unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
- unsigned long rel_usecs;

/* Restore the original ent_size */
iter->ent_size = ent_size;

if (!next_entry)
next_ts = iter->ts;
- rel_usecs = ns2usecs(next_ts - iter->ts);

if (verbose) {
char comm[TASK_COMM_LEN];

trace_find_cmdline(entry->pid, comm);

- ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
- " %ld.%03ldms (+%ld.%03ldms): ", comm,
- entry->pid, iter->cpu, entry->flags,
- entry->preempt_count, iter->idx,
- ns2usecs(iter->ts),
- abs_usecs / USEC_PER_MSEC,
- abs_usecs % USEC_PER_MSEC,
- rel_usecs / USEC_PER_MSEC,
- rel_usecs % USEC_PER_MSEC);
+ ret = trace_seq_printf(
+ s, "%16s %5d %3d %d %08x %08lx ",
+ comm, entry->pid, iter->cpu, entry->flags,
+ entry->preempt_count, iter->idx);
} else {
ret = lat_print_generic(s, entry, iter->cpu);
- if (ret)
- ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
}

+ if (ret)
+ ret = lat_print_timestamp(iter, next_ts);
+
return ret;
}

--
1.7.7.3