2009-04-23 18:24:19

by Tim Bird

[permalink] [raw]
Subject: [PATCH] Add function graph tracer support for ARM

Add ftrace function-graph tracer support for ARM.

This includes adding code in mcount to check for
(and call) a registered function graph trace entry
routine, and adding infrastructure to the threadinfo
structure to support a return trampoline.

I wrote a separate program to scan the kernel assembly
output and validate that callframes for mcount are
generated properly.

Much of the code in arch/arm/kernel/ftrace.c was
cloned from arch/x86/kernel/ftrace.c

IRQENTRY_TEXT was added to vmlinux.lds.S (to eliminate
a compiler error on kernel/trace/trace_functions_graph.c),
although no routines were marked as __irq_entry.

FIXTHIS indicates questions or areas of uncertainty.

This works for me with a gcc 4.1.1 compiler on an
OMAP OSK board, with kernel 2.6.29.

The use of "already_here" works around trace entry nesting
(I believe due to instrumentation of either cpu_clock or
raw_smp_processor_id). Advice on the best way to deal
with this would be appreciated. (that is, whether
to use the guard variable, or just un-instrument the
offending routines).

Thanks for any feedback.

Signed-off-by: <[email protected]>
---
arch/arm/Kconfig | 1
arch/arm/include/asm/ftrace.h | 28 +++++++
arch/arm/kernel/Makefile | 3
arch/arm/kernel/entry-common.S | 29 +++++++-
arch/arm/kernel/ftrace.c | 146 +++++++++++++++++++++++++++++++++++++++++
arch/arm/kernel/vmlinux.lds.S | 1
6 files changed, 204 insertions(+), 4 deletions(-)

--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -17,6 +17,7 @@ config ARM
select HAVE_KPROBES if (!XIP_KERNEL)
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+ select HAVE_FUNCTION_GRAPH_TRACER if (!XIP_KERNEL)
select HAVE_GENERIC_DMA_COHERENT
help
The ARM series is a line of low-power-consumption RISC chip designs
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -7,8 +7,32 @@

#ifndef __ASSEMBLY__
extern void mcount(void);
-#endif
+#endif /* __ASSEMBLY__ */

-#endif
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Stack of return addresses for functions of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+ unsigned long ret;
+ unsigned long func;
+ unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry-common.S
+ */
+extern void return_to_handler(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

#endif /* _ASM_ARM_FTRACE */
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -4,7 +4,7 @@

AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)

-ifdef CONFIG_DYNAMIC_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = -pg
endif

@@ -23,6 +23,7 @@ obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
obj-$(CONFIG_ATAGS_PROC) += atags.o
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -135,8 +135,16 @@ ENTRY(mcount)
adr r0, ftrace_stub
cmp r0, r2
bne trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ ldr r1, =ftrace_graph_return
+ ldr r2, [r1]
+ cmp r0, r2 @ if *ftrace_graph_return != ftrace_stub
+ bne ftrace_graph_caller
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
ldr lr, [fp, #-4] @ restore lr
- ldmia sp!, {r0-r3, pc}
+ ldmia sp!, {r0-r3, pc} @ return doing nothing

trace:
ldr r1, [fp, #-4] @ lr of instrumented routine
@@ -147,6 +155,25 @@ trace:
mov lr, r1 @ restore lr
ldmia sp!, {r0-r3, pc}

+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ sub r0, fp, #4 @ &lr of instrumented routine (&parent)
+ mov r1, lr @ instrumented routine (func)
+ sub r1, r1, #MCOUNT_INSN_SIZE
+ bl prepare_ftrace_return
+ ldr lr, [fp, #-4] @ restore lr
+ ldmia sp!, {r0-r3, pc}
+
+ .globl return_to_handler
+return_to_handler:
+ stmdb sp!, {r0-r3}
+ bl ftrace_return_to_handler
+ mov lr, r0 @ r0 has real ret addr
+ ldmia sp!, {r0-r3}
+ mov pc, lr
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
#endif /* CONFIG_DYNAMIC_FTRACE */

.globl ftrace_stub
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -16,6 +16,8 @@
#include <asm/cacheflush.h>
#include <asm/ftrace.h>

+#ifdef CONFIG_DYNAMIC_FTRACE
+
#define PC_OFFSET 8
#define BL_OPCODE 0xeb000000
#define BL_OFFSET_MASK 0x00ffffff
@@ -101,3 +103,147 @@ int __init ftrace_dyn_arch_init(void *da
ftrace_mcount_set(data);
return 0;
}
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func, int *depth)
+{
+ int index;
+
+ if (!current->ret_stack)
+ return -EBUSY;
+
+ /* The return trace stack is full */
+ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ atomic_inc(&current->trace_overrun);
+ return -EBUSY;
+ }
+
+ index = ++current->curr_ret_stack;
+ barrier();
+ current->ret_stack[index].ret = ret;
+ current->ret_stack[index].func = func;
+ current->ret_stack[index].calltime = time;
+ *depth = index;
+
+ return 0;
+}
+
+/*
+ * Retrieve a function return address from the trace stack on thread info.
+ * set the address to return to in *ret
+ */
+static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+{
+ int index;
+
+ index = current->curr_ret_stack;
+
+ if (unlikely(index < 0)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic, otherwise we have no where to go */
+ *ret = (unsigned long)panic;
+ return;
+ }
+
+ *ret = current->ret_stack[index].ret;
+ trace->func = current->ret_stack[index].func;
+ trace->calltime = current->ret_stack[index].calltime;
+ trace->overrun = atomic_read(&current->trace_overrun);
+ trace->depth = index;
+ barrier();
+ current->curr_ret_stack--;
+}
+
+int already_here; /* starts at 0 */
+
+/*
+ * Send the trace to the ring-buffer
+ * @return the original return address
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_graph_ret trace;
+ unsigned long ret;
+
+ /* guard against trace entry while handling a trace exit */
+ already_here++;
+
+ pop_return_trace(&trace, &ret);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_graph_return(&trace);
+
+ if (unlikely(!ret)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic. Where else to go? */
+ ret = (unsigned long)panic;
+ }
+
+ already_here--;
+ return ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ if (already_here)
+ return;
+ already_here++;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause))) {
+ already_here--;
+ return;
+ }
+
+ /* FIXTHIS - need a local_irqsave here?, (to fend off ints?) */
+
+ /* FIXTHIS - x86 protects against a fault during this operation!*/
+ old = *parent;
+ *parent = return_hooker;
+
+ if (unlikely(!__kernel_text_address(old))) {
+ ftrace_graph_stop();
+ *parent = old;
+ WARN_ON(1);
+ already_here--;
+ return;
+ }
+
+ /* FIXTHIS - trace entry appears to nest inside following */
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime,
+ self_addr, &trace.depth) == -EBUSY) {
+ *parent = old;
+ already_here--;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+
+ already_here--;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -95,6 +95,7 @@ SECTIONS
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
+ IRQENTRY_TEXT
#ifdef CONFIG_MMU
*(.fixup)
#endif


2009-04-23 18:39:30

by Uwe Kleine-König

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Hello,

On Thu, Apr 23, 2009 at 11:08:34AM -0700, Tim Bird wrote:
> Add ftrace function-graph tracer support for ARM.
>
> This includes adding code in mcount to check for
> (and call) a registered function graph trace entry
> routine, and adding infrastructure to the threadinfo
> structure to support a return trampoline.
>
> I wrote a separate program to scan the kernel assembly
> output and validate that callframes for mcount are
> generated properly.
>
> Much of the code in arch/arm/kernel/ftrace.c was
> cloned from arch/x86/kernel/ftrace.c
>
> IRQENTRY_TEXT was added to vmlinux.lds.S (to eliminate
> a compiler error on kernel/trace/trace_functions_graph.c),
> although no routines were marked as __irq_entry.
>
> FIXTHIS indicates questions or areas of uncertainty.
>
> This works for me with a gcc 4.1.1 compiler on an
> OMAP OSK board, with kernel 2.6.29.
>
> The use of "already_here" works around trace entry nesting
> (I believe due to instrumentation of either cpu_clock or
> raw_smp_processor_id). Advice on the best way to deal
> with this would be appreciated. (that is, whether
> to use the guard variable, or just un-instrument the
> offending routines).
>
> Thanks for any feedback.
>
> Signed-off-by: <[email protected]>
According to Documentation/SubmittingPatches you need to provide your
real name in the S-o-b line.

For the lazy of us, can you point me^Uus to some documentation how to
use the graph tracer?

> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -17,6 +17,7 @@ config ARM
> select HAVE_KPROBES if (!XIP_KERNEL)
> select HAVE_KRETPROBES if (HAVE_KPROBES)
> select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
> + select HAVE_FUNCTION_GRAPH_TRACER if (!XIP_KERNEL)
> select HAVE_GENERIC_DMA_COHERENT
> help
> The ARM series is a line of low-power-consumption RISC chip designs
> --- a/arch/arm/include/asm/ftrace.h
> +++ b/arch/arm/include/asm/ftrace.h
> @@ -7,8 +7,32 @@
>
> #ifndef __ASSEMBLY__
> extern void mcount(void);
> -#endif
> +#endif /* __ASSEMBLY__ */
IMHO this belongs into a "clean up first patch"

> -#endif
> +#endif /* CONFIG_FUNCTION_TRACER */
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +#ifndef __ASSEMBLY__
> +
> +/*
> + * Stack of return addresses for functions of a thread.
> + * Used in struct thread_info
> + */
> +struct ftrace_ret_stack {
> + unsigned long ret;
> + unsigned long func;
> + unsigned long long calltime;
> +};
> +
> +/*
> + * Primary handler of a function return.
> + * It relays on ftrace_return_to_handler.
> + * Defined in entry-common.S
> + */
> +extern void return_to_handler(void);
AFAIK extern doesn't change anything for function declarations. I
wouldn't write it myself, and not argue if you considered it
important/correct/whatever.

> +#endif /* __ASSEMBLY__ */
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>
> #endif /* _ASM_ARM_FTRACE */
> --- a/arch/arm/kernel/Makefile
> +++ b/arch/arm/kernel/Makefile
> @@ -4,7 +4,7 @@
>
> AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
>
> -ifdef CONFIG_DYNAMIC_FTRACE
> +ifdef CONFIG_FUNCTION_TRACER
> CFLAGS_REMOVE_ftrace.o = -pg
> endif
Why not simply remove the ifdef? CFLAGS_REMOVE_ftrace.o shouldn't hurt
if ftrace.o isn't compiled.

> @@ -23,6 +23,7 @@ obj-$(CONFIG_ISA_DMA) += dma-isa.o
> obj-$(CONFIG_PCI) += bios32.o isa.o
> obj-$(CONFIG_SMP) += smp.o
> obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
> +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
I there a reason that CONFIG_DYNAMIC_FTRACE and
CONFIG_FUNCTION_GRAPH_TRACER share the same source file?

> obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
> obj-$(CONFIG_ATAGS_PROC) += atags.o
> --- a/arch/arm/kernel/entry-common.S
> +++ b/arch/arm/kernel/entry-common.S
> @@ -135,8 +135,16 @@ ENTRY(mcount)
> adr r0, ftrace_stub
> cmp r0, r2
> bne trace
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + ldr r1, =ftrace_graph_return
> + ldr r2, [r1]
> + cmp r0, r2 @ if *ftrace_graph_return != ftrace_stub
> + bne ftrace_graph_caller
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> ldr lr, [fp, #-4] @ restore lr
> - ldmia sp!, {r0-r3, pc}
> + ldmia sp!, {r0-r3, pc} @ return doing nothing
If ftrace_trace_function != ftrace_stub then ftrace_graph_caller isn't
called. Is this correct?

> trace:
> ldr r1, [fp, #-4] @ lr of instrumented routine
> @@ -147,6 +155,25 @@ trace:
> mov lr, r1 @ restore lr
> ldmia sp!, {r0-r3, pc}
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +ENTRY(ftrace_graph_caller)
> + sub r0, fp, #4 @ &lr of instrumented routine (&parent)
> + mov r1, lr @ instrumented routine (func)
> + sub r1, r1, #MCOUNT_INSN_SIZE
> + bl prepare_ftrace_return
> + ldr lr, [fp, #-4] @ restore lr
> + ldmia sp!, {r0-r3, pc}
> +
> + .globl return_to_handler
> +return_to_handler:
> + stmdb sp!, {r0-r3}
> + bl ftrace_return_to_handler
> + mov lr, r0 @ r0 has real ret addr
> + ldmia sp!, {r0-r3}
> + mov pc, lr
> +
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> #endif /* CONFIG_DYNAMIC_FTRACE */
>
> .globl ftrace_stub
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -16,6 +16,8 @@
> #include <asm/cacheflush.h>
> #include <asm/ftrace.h>
>
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +
> #define PC_OFFSET 8
> #define BL_OPCODE 0xeb000000
> #define BL_OFFSET_MASK 0x00ffffff
> @@ -101,3 +103,147 @@ int __init ftrace_dyn_arch_init(void *da
> ftrace_mcount_set(data);
> return 0;
> }
> +
> +#endif /* CONFIG_DYNAMIC_FTRACE */
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +/* Add a function return address to the trace stack on thread info.*/
nitpick: add a space between "." and "*/" please.

> +static int push_return_trace(unsigned long ret, unsigned long long time,
> + unsigned long func, int *depth)
> +{
> + int index;
> +
> + if (!current->ret_stack)
> + return -EBUSY;
> +
> + /* The return trace stack is full */
> + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
> + atomic_inc(&current->trace_overrun);
> + return -EBUSY;
> + }
> +
> + index = ++current->curr_ret_stack;
> + barrier();
> + current->ret_stack[index].ret = ret;
> + current->ret_stack[index].func = func;
> + current->ret_stack[index].calltime = time;
> + *depth = index;
> +
> + return 0;
> +}
> +
> +/*
> + * Retrieve a function return address from the trace stack on thread info.
> + * set the address to return to in *ret
> + */
> +static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
> +{
> + int index;
> +
> + index = current->curr_ret_stack;
> +
> + if (unlikely(index < 0)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + /* Might as well panic, otherwise we have no where to go */
> + *ret = (unsigned long)panic;
> + return;
> + }
> +
> + *ret = current->ret_stack[index].ret;
> + trace->func = current->ret_stack[index].func;
> + trace->calltime = current->ret_stack[index].calltime;
> + trace->overrun = atomic_read(&current->trace_overrun);
> + trace->depth = index;
> + barrier();
> + current->curr_ret_stack--;
> +}
> +
> +int already_here; /* starts at 0 */
> +
> +/*
> + * Send the trace to the ring-buffer
> + * @return the original return address
> + */
> +unsigned long ftrace_return_to_handler(void)
> +{
> + struct ftrace_graph_ret trace;
> + unsigned long ret;
> +
> + /* guard against trace entry while handling a trace exit */
> + already_here++;
> +
> + pop_return_trace(&trace, &ret);
> + trace.rettime = cpu_clock(raw_smp_processor_id());
> + ftrace_graph_return(&trace);
> +
> + if (unlikely(!ret)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + /* Might as well panic. Where else to go? */
> + ret = (unsigned long)panic;
> + }
> +
> + already_here--;
> + return ret;
> +}
> +
> +/*
> + * Hook the return address and push it in the stack of return addrs
> + * in current thread info.
> + */
> +
> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
> +{
> + unsigned long old;
> + unsigned long long calltime;
> +
> + struct ftrace_graph_ent trace;
> + unsigned long return_hooker = (unsigned long)
> + &return_to_handler;
> +
> + if (already_here)
> + return;
> + already_here++;
> +
> + if (unlikely(atomic_read(&current->tracing_graph_pause))) {
> + already_here--;
> + return;
> + }
> +
> + /* FIXTHIS - need a local_irqsave here?, (to fend off ints?) */
> +
> + /* FIXTHIS - x86 protects against a fault during this operation!*/
> + old = *parent;
> + *parent = return_hooker;
> +
> + if (unlikely(!__kernel_text_address(old))) {
> + ftrace_graph_stop();
> + *parent = old;
> + WARN_ON(1);
> + already_here--;
> + return;
> + }
> +
> + /* FIXTHIS - trace entry appears to nest inside following */
> + calltime = cpu_clock(raw_smp_processor_id());
> +
> + if (push_return_trace(old, calltime,
> + self_addr, &trace.depth) == -EBUSY) {
> + *parent = old;
> + already_here--;
> + return;
> + }
> +
> + trace.func = self_addr;
> +
> + /* Only trace if the calling function expects to */
> + if (!ftrace_graph_entry(&trace)) {
> + current->curr_ret_stack--;
> + *parent = old;
> + }
> +
> + already_here--;
> +}
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -95,6 +95,7 @@ SECTIONS
> SCHED_TEXT
> LOCK_TEXT
> KPROBES_TEXT
> + IRQENTRY_TEXT
> #ifdef CONFIG_MMU
> *(.fixup)
> #endif
Is this only needed for graph tracer? If not this should be broken out.

Best regards
Uwe

--
Pengutronix e.K. | Uwe Kleine-K?nig |
Industrial Linux Solutions | http://www.pengutronix.de/ |

2009-04-23 19:20:39

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Hi Tim,


On Thu, Apr 23, 2009 at 11:08:34AM -0700, Tim Bird wrote:
> Add ftrace function-graph tracer support for ARM.
>
> This includes adding code in mcount to check for
> (and call) a registered function graph trace entry
> routine, and adding infrastructure to the threadinfo
> structure to support a return trampoline.
>
> I wrote a separate program to scan the kernel assembly
> output and validate that callframes for mcount are
> generated properly.
>
> Much of the code in arch/arm/kernel/ftrace.c was
> cloned from arch/x86/kernel/ftrace.c
>
> IRQENTRY_TEXT was added to vmlinux.lds.S (to eliminate
> a compiler error on kernel/trace/trace_functions_graph.c),
> although no routines were marked as __irq_entry.
>
> FIXTHIS indicates questions or areas of uncertainty.
>
> This works for me with a gcc 4.1.1 compiler on an
> OMAP OSK board, with kernel 2.6.29.


There have been a lot of changes on the function graph tracer
since 2.6.29 :)

I don't know which tree would be the best to rebase this work on,
whether ARM or -tip. But anyway it should really be based against
a 2.6.30 development tree.



>
> The use of "already_here" works around trace entry nesting
> (I believe due to instrumentation of either cpu_clock or
> raw_smp_processor_id). Advice on the best way to deal
> with this would be appreciated. (that is, whether
> to use the guard variable, or just un-instrument the
> offending routines).
> Thanks for any feedback.
>
> Signed-off-by: <[email protected]>
> ---
> arch/arm/Kconfig | 1
> arch/arm/include/asm/ftrace.h | 28 +++++++
> arch/arm/kernel/Makefile | 3
> arch/arm/kernel/entry-common.S | 29 +++++++-
> arch/arm/kernel/ftrace.c | 146 +++++++++++++++++++++++++++++++++++++++++
> arch/arm/kernel/vmlinux.lds.S | 1
> 6 files changed, 204 insertions(+), 4 deletions(-)
>
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -17,6 +17,7 @@ config ARM
> select HAVE_KPROBES if (!XIP_KERNEL)
> select HAVE_KRETPROBES if (HAVE_KPROBES)
> select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
> + select HAVE_FUNCTION_GRAPH_TRACER if (!XIP_KERNEL)
> select HAVE_GENERIC_DMA_COHERENT
> help
> The ARM series is a line of low-power-consumption RISC chip designs
> --- a/arch/arm/include/asm/ftrace.h
> +++ b/arch/arm/include/asm/ftrace.h
> @@ -7,8 +7,32 @@
>
> #ifndef __ASSEMBLY__
> extern void mcount(void);
> -#endif
> +#endif /* __ASSEMBLY__ */
>
> -#endif
> +#endif /* CONFIG_FUNCTION_TRACER */
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +#ifndef __ASSEMBLY__
> +
> +/*
> + * Stack of return addresses for functions of a thread.
> + * Used in struct thread_info
> + */
> +struct ftrace_ret_stack {
> + unsigned long ret;
> + unsigned long func;
> + unsigned long long calltime;
> +};


This one is now on <linux/ftrace.h>



> +
> +/*
> + * Primary handler of a function return.
> + * It relays on ftrace_return_to_handler.
> + * Defined in entry-common.S
> + */
> +extern void return_to_handler(void);
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>
> #endif /* _ASM_ARM_FTRACE */
> --- a/arch/arm/kernel/Makefile
> +++ b/arch/arm/kernel/Makefile
> @@ -4,7 +4,7 @@
>
> AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
>
> -ifdef CONFIG_DYNAMIC_FTRACE
> +ifdef CONFIG_FUNCTION_TRACER
> CFLAGS_REMOVE_ftrace.o = -pg
> endif
>
> @@ -23,6 +23,7 @@ obj-$(CONFIG_ISA_DMA) += dma-isa.o
> obj-$(CONFIG_PCI) += bios32.o isa.o
> obj-$(CONFIG_SMP) += smp.o
> obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
> +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
> obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
> obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
> obj-$(CONFIG_ATAGS_PROC) += atags.o
> --- a/arch/arm/kernel/entry-common.S
> +++ b/arch/arm/kernel/entry-common.S
> @@ -135,8 +135,16 @@ ENTRY(mcount)
> adr r0, ftrace_stub
> cmp r0, r2
> bne trace
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + ldr r1, =ftrace_graph_return
> + ldr r2, [r1]
> + cmp r0, r2 @ if *ftrace_graph_return != ftrace_stub
> + bne ftrace_graph_caller
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> ldr lr, [fp, #-4] @ restore lr
> - ldmia sp!, {r0-r3, pc}
> + ldmia sp!, {r0-r3, pc} @ return doing nothing
>
> trace:
> ldr r1, [fp, #-4] @ lr of instrumented routine
> @@ -147,6 +155,25 @@ trace:
> mov lr, r1 @ restore lr
> ldmia sp!, {r0-r3, pc}
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +ENTRY(ftrace_graph_caller)
> + sub r0, fp, #4 @ &lr of instrumented routine (&parent)
> + mov r1, lr @ instrumented routine (func)
> + sub r1, r1, #MCOUNT_INSN_SIZE
> + bl prepare_ftrace_return
> + ldr lr, [fp, #-4] @ restore lr
> + ldmia sp!, {r0-r3, pc}
> +
> + .globl return_to_handler
> +return_to_handler:
> + stmdb sp!, {r0-r3}
> + bl ftrace_return_to_handler
> + mov lr, r0 @ r0 has real ret addr
> + ldmia sp!, {r0-r3}
> + mov pc, lr



The assembly part looks good.



> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> #endif /* CONFIG_DYNAMIC_FTRACE */
>
> .globl ftrace_stub
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -16,6 +16,8 @@
> #include <asm/cacheflush.h>
> #include <asm/ftrace.h>
>
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +
> #define PC_OFFSET 8
> #define BL_OPCODE 0xeb000000
> #define BL_OFFSET_MASK 0x00ffffff
> @@ -101,3 +103,147 @@ int __init ftrace_dyn_arch_init(void *da
> ftrace_mcount_set(data);
> return 0;
> }
> +
> +#endif /* CONFIG_DYNAMIC_FTRACE */
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +/* Add a function return address to the trace stack on thread info.*/
> +static int push_return_trace(unsigned long ret, unsigned long long time,
> + unsigned long func, int *depth)
> +{
> + int index;
> +
> + if (!current->ret_stack)
> + return -EBUSY;
> +
> + /* The return trace stack is full */
> + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
> + atomic_inc(&current->trace_overrun);
> + return -EBUSY;
> + }
> +
> + index = ++current->curr_ret_stack;
> + barrier();
> + current->ret_stack[index].ret = ret;
> + current->ret_stack[index].func = func;
> + current->ret_stack[index].calltime = time;
> + *depth = index;
> +
> + return 0;
> +}


This part has been moved as generic code in kernel/trace/trace_function_graph.c



> +
> +/*
> + * Retrieve a function return address from the trace stack on thread info.
> + * set the address to return to in *ret
> + */
> +static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
> +{
> + int index;
> +
> + index = current->curr_ret_stack;
> +
> + if (unlikely(index < 0)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + /* Might as well panic, otherwise we have no where to go */
> + *ret = (unsigned long)panic;
> + return;
> + }
> +
> + *ret = current->ret_stack[index].ret;
> + trace->func = current->ret_stack[index].func;
> + trace->calltime = current->ret_stack[index].calltime;
> + trace->overrun = atomic_read(&current->trace_overrun);
> + trace->depth = index;
> + barrier();
> + current->curr_ret_stack--;
> +}



Ditto.


> +int already_here; /* starts at 0 */
> +
> +/*
> + * Send the trace to the ring-buffer
> + * @return the original return address
> + */
> +unsigned long ftrace_return_to_handler(void)
> +{



Ditto.


> + struct ftrace_graph_ret trace;
> + unsigned long ret;
> +
> + /* guard against trace entry while handling a trace exit */
> + already_here++;



I don't understand the purpose of this variable. Also it's not atomic,
you will rapidly run into an imbalance of its value due to concurrent
inc/decrementations.



> +
> + pop_return_trace(&trace, &ret);
> + trace.rettime = cpu_clock(raw_smp_processor_id());
> + ftrace_graph_return(&trace);
> +
> + if (unlikely(!ret)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + /* Might as well panic. Where else to go? */
> + ret = (unsigned long)panic;
> + }
> +
> + already_here--;
> + return ret;
> +}
> +
> +/*
> + * Hook the return address and push it in the stack of return addrs
> + * in current thread info.
> + */
> +
> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
> +{
> + unsigned long old;
> + unsigned long long calltime;
> +
> + struct ftrace_graph_ent trace;
> + unsigned long return_hooker = (unsigned long)
> + &return_to_handler;
> +
> + if (already_here)
> + return;
> + already_here++;



I really don't understand why you want this.
Not only doesn't it protect anything because it's not atomic
but moreover this function is supposed to be reentrant.

It's perfectly safe against irq, preemption.
It's also not reentrant but safe against NMI if you pick the
protection we have in 2.6.30:

if (unlikely(in_nmi()))
return;

Hmm, btw I should move this protection to a higher level,
we should be able to trace NMI in some conditions.


> +
> + if (unlikely(atomic_read(&current->tracing_graph_pause))) {
> + already_here--;
> + return;
> + }
> +
> + /* FIXTHIS - need a local_irqsave here?, (to fend off ints?) */


No you don't need to protect against irq here.
This is not racy because an irq will push its return adress to the task stack
(in struct task_struct) and on return it will pop its return address.
Then the stack of return addresses will be left intact for the current interrupted
traced function.


> + /* FIXTHIS - x86 protects against a fault during this operation!*/
> + old = *parent;
> + *parent = return_hooker;



Yeah we protect against fault for that. But we never had any fault problem
actually. But it's better to keep it, it doesn't impact performances and
it can help to debug possible future issues in the function graph tracer itself.


> +
> + if (unlikely(!__kernel_text_address(old))) {
> + ftrace_graph_stop();
> + *parent = old;
> + WARN_ON(1);
> + already_here--;
> + return;
> + }


We removed this check. It impacts performances and we haven't seen
any warnings from this place.


> +
> + /* FIXTHIS - trace entry appears to nest inside following */
> + calltime = cpu_clock(raw_smp_processor_id());



Now we are using the local_clock from kernel/trace/trace_clock.c
It's just a wrapping to sched_clock() and sched_clock() is faster
though a bit less accurate.



> + if (push_return_trace(old, calltime,
> + self_addr, &trace.depth) == -EBUSY) {
> + *parent = old;
> + already_here--;
> + return;
> + }
> +
> + trace.func = self_addr;
> +
> + /* Only trace if the calling function expects to */
> + if (!ftrace_graph_entry(&trace)) {
> + current->curr_ret_stack--;
> + *parent = old;
> + }
> +
> + already_here--;
> +}
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> --- a/arch/arm/kernel/vmlinux.lds.S
> +++ b/arch/arm/kernel/vmlinux.lds.S
> @@ -95,6 +95,7 @@ SECTIONS
> SCHED_TEXT
> LOCK_TEXT
> KPROBES_TEXT
> + IRQENTRY_TEXT
> #ifdef CONFIG_MMU
> *(.fixup)
> #endif



But that's a good beginning. It's great that you've made it work on
Arm.

It would be very nice if you could rebase it against latest -tip
or Arm though I can't tell you which one would be the best.

-tip is the most up to date with tracing, and Arm is more
up to date with...heh Arm :-)

I guess Ingo and Russell could answer you on that.

Thanks for this work!

Frederic.

2009-04-23 22:05:44

by Tim Bird

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Uwe � wrote:
>> Signed-off-by: <[email protected]>
> According to Documentation/SubmittingPatches you need to provide your
> real name in the S-o-b line.

OK -that's embarrassing - I'll fix this.
scripts/checkpatch.pl didn't catch this. I may look at adding
something to checkpatch.pl to catch empty names.

> For the lazy of us, can you point me^Uus to some documentation how to
> use the graph tracer?

It looks like Documentation/ftrace.txt is missing anything about
function graph tracing.

Should I add a section?

Here are some quick steps:

$ mount -t debugfs none /debug
$ cd /debug/tracing/
$ cat available_tracers
function_graph function sched_switch nop
$ echo function_graph >current_tracer
$ cat trace
# tracer: function_graph
#
# CPU OVERHEAD/DURATION FUNCTION CALLS
# | | | | | | |
------------------------------------------
0) --1 => events/-5
------------------------------------------

0) | activate_task() {
0) | enqueue_task() {
0) | enqueue_task_fair() {
0) | update_curr() {
0) 0.000 us | calc_delta_mine();
0) 0.000 us | update_min_vruntime();
0) + 61.035 us | }
0) 0.000 us | place_entity();
0) 0.000 us | __enqueue_entity();
0) + 91.552 us | }
0) ! 122.070 us | }
0) ! 152.588 us | }
0) | check_preempt_wakeup() {
0) 0.000 us | update_curr();
0) 0.000 us | wakeup_preempt_entity();
0) + 61.035 us | }
...

Clearly, my clock stinks, but that's a separate issue.

>>...
>> +extern void return_to_handler(void);
> AFAIK extern doesn't change anything for function declarations. I
> wouldn't write it myself, and not argue if you considered it
> important/correct/whatever.

It just feels a bit more natural to me. My mind reads it as
'this function is not in this file, go look elsewhere'.
You're right that it doesn't change anything.
If this is preferred, I have no problem removing it.

...
>> --- a/arch/arm/kernel/Makefile
>> +++ b/arch/arm/kernel/Makefile
>> @@ -4,7 +4,7 @@
>>
>> AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
>>
>> -ifdef CONFIG_DYNAMIC_FTRACE
>> +ifdef CONFIG_FUNCTION_TRACER
>> CFLAGS_REMOVE_ftrace.o = -pg
>> endif
> Why not simply remove the ifdef? CFLAGS_REMOVE_ftrace.o shouldn't hurt
> if ftrace.o isn't compiled.

Good point. I'll check this in the latest code, and remove
the #ifdef if it's still there. I can't imagine
any circumstances when you'd want to trace any of
the routines in ftrace.c


>> @@ -23,6 +23,7 @@ obj-$(CONFIG_ISA_DMA) += dma-isa.o
>> obj-$(CONFIG_PCI) += bios32.o isa.o
>> obj-$(CONFIG_SMP) += smp.o
>> obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
>> +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
> I there a reason that CONFIG_DYNAMIC_FTRACE and
> CONFIG_FUNCTION_GRAPH_TRACER share the same source file?
ftrace.c has stuff from both, so you want it "turned on"
if one or the other (or both) are configed on.

I'll look at whether it makes sense to split these two into
separate files. This issue may be resolved by the code
movement that Frederic Weisbecker mentioned in his e-mail.


>> --- a/arch/arm/kernel/entry-common.S
>> +++ b/arch/arm/kernel/entry-common.S
>> @@ -135,8 +135,16 @@ ENTRY(mcount)
>> adr r0, ftrace_stub
>> cmp r0, r2
>> bne trace
>> +
>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> + ldr r1, =ftrace_graph_return
>> + ldr r2, [r1]
>> + cmp r0, r2 @ if *ftrace_graph_return != ftrace_stub
>> + bne ftrace_graph_caller
>> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>> +
>> ldr lr, [fp, #-4] @ restore lr
>> - ldmia sp!, {r0-r3, pc}
>> + ldmia sp!, {r0-r3, pc} @ return doing nothing
> If ftrace_trace_function != ftrace_stub then ftrace_graph_caller isn't
> called. Is this correct?
My comment is possibly misleading here. The sense of the
'!=' includes the 'n' portion of the 'bne' on the following line.
So the logic is:
if *ftrace_graph_return != ftrace stub, branch to ftrace_graph_caller
or in other words
if *ftrace_graph_return == ftrace_stub, return doing nothing

Maybe I should just remove the comment and let the code
speak for itself? (or maybe move the comment down one line?)

...
>> +/* Add a function return address to the trace stack on thread info.*/
> nitpick: add a space between "." and "*/" please.

OK - nitpicks appreciated as well :-)

>> --- a/arch/arm/kernel/vmlinux.lds.S
>> +++ b/arch/arm/kernel/vmlinux.lds.S
>> @@ -95,6 +95,7 @@ SECTIONS
>> SCHED_TEXT
>> LOCK_TEXT
>> KPROBES_TEXT
>> + IRQENTRY_TEXT
>> #ifdef CONFIG_MMU
>> *(.fixup)
>> #endif
> Is this only needed for graph tracer? If not this should be broken out.

As far as I can tell, yes.
This evaporates into nothing if the graph tracer is not present.

Thanks very much for the feedback! I'll try to update the patch
based on your and Frederic Weisbecker's feedback shortly.
-- Tim

=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================

2009-04-23 22:15:37

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

On Thu, Apr 23, 2009 at 02:49:56PM -0700, Tim Bird wrote:
> Uwe � wrote:
> >> Signed-off-by: <[email protected]>
> > According to Documentation/SubmittingPatches you need to provide your
> > real name in the S-o-b line.
>
> OK -that's embarrassing - I'll fix this.
> scripts/checkpatch.pl didn't catch this. I may look at adding
> something to checkpatch.pl to catch empty names.
>
> > For the lazy of us, can you point me^Uus to some documentation how to
> > use the graph tracer?
>
> It looks like Documentation/ftrace.txt is missing anything about
> function graph tracing.
>
> Should I add a section?



There is already a section in Documentation/frace.txt as well in 2.6.30 :-)



>
> Here are some quick steps:
>
> $ mount -t debugfs none /debug
> $ cd /debug/tracing/
> $ cat available_tracers
> function_graph function sched_switch nop
> $ echo function_graph >current_tracer
> $ cat trace
> # tracer: function_graph
> #
> # CPU OVERHEAD/DURATION FUNCTION CALLS
> # | | | | | | |
> ------------------------------------------
> 0) --1 => events/-5
> ------------------------------------------
>
> 0) | activate_task() {
> 0) | enqueue_task() {
> 0) | enqueue_task_fair() {
> 0) | update_curr() {
> 0) 0.000 us | calc_delta_mine();
> 0) 0.000 us | update_min_vruntime();
> 0) + 61.035 us | }
> 0) 0.000 us | place_entity();
> 0) 0.000 us | __enqueue_entity();
> 0) + 91.552 us | }
> 0) ! 122.070 us | }
> 0) ! 152.588 us | }
> 0) | check_preempt_wakeup() {
> 0) 0.000 us | update_curr();
> 0) 0.000 us | wakeup_preempt_entity();
> 0) + 61.035 us | }
>
> Clearly, my clock stinks, but that's a separate issue.


Oh, that's the first function graph trace from Arm I can see.
/me feels a bit moved...

Just one childish caprice...it would be nice to see a part of a
trace which shows specific Arm arch functions in the next changelog :-)


Thanks,
Frederic.

2009-04-23 22:34:20

by Tim Bird

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Frederic Weisbecker wrote:
> On Thu, Apr 23, 2009 at 11:08:34AM -0700, Tim Bird wrote:
>> Add ftrace function-graph tracer support for ARM.
...

>> This works for me with a gcc 4.1.1 compiler on an
>> OMAP OSK board, with kernel 2.6.29.
>
>
> There have been a lot of changes on the function graph tracer
> since 2.6.29 :)

LOL. It serves me right for going on vacation for a week. ;-)

> I don't know which tree would be the best to rebase this work on,
> whether ARM or -tip. But anyway it should really be based against
> a 2.6.30 development tree.
Are you talking about -tip of Linus' tree, or is there a
tracing tree I should getting stuff from?

As far as I can tell, the ARM bits that this patch applies
against are relatively stable, so I'd prefer to stay
in synch with an ftrace tree, since that appears to be
the more mobile target at the moment.

...
>> --- a/arch/arm/include/asm/ftrace.h
>> +++ b/arch/arm/include/asm/ftrace.h
>> @@ -7,8 +7,32 @@
>>
>> #ifndef __ASSEMBLY__
>> extern void mcount(void);
>> -#endif
>> +#endif /* __ASSEMBLY__ */
>>
>> -#endif
>> +#endif /* CONFIG_FUNCTION_TRACER */
>> +
>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +/*
>> + * Stack of return addresses for functions of a thread.
>> + * Used in struct thread_info
>> + */
>> +struct ftrace_ret_stack {
>> + unsigned long ret;
>> + unsigned long func;
>> + unsigned long long calltime;
>> +};
>
>
> This one is now on <linux/ftrace.h>

OK - I'll update this relative to an appropriate
more recent version. Thanks.

>> --- a/arch/arm/kernel/ftrace.c
>> +++ b/arch/arm/kernel/ftrace.c
>> @@ -16,6 +16,8 @@
>> #include <asm/cacheflush.h>
>> #include <asm/ftrace.h>
>>
>> +#ifdef CONFIG_DYNAMIC_FTRACE
>> +
>> #define PC_OFFSET 8
>> #define BL_OPCODE 0xeb000000
>> #define BL_OFFSET_MASK 0x00ffffff
>> @@ -101,3 +103,147 @@ int __init ftrace_dyn_arch_init(void *da
>> ftrace_mcount_set(data);
>> return 0;
>> }
>> +
>> +#endif /* CONFIG_DYNAMIC_FTRACE */
>> +
>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> +
>> +/* Add a function return address to the trace stack on thread info.*/
>> +static int push_return_trace(unsigned long ret, unsigned long long time,
>> + unsigned long func, int *depth)
>> +{
>> + int index;
>> +
>> + if (!current->ret_stack)
>> + return -EBUSY;
>> +
>> + /* The return trace stack is full */
>> + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
>> + atomic_inc(&current->trace_overrun);
>> + return -EBUSY;
>> + }
>> +
>> + index = ++current->curr_ret_stack;
>> + barrier();
>> + current->ret_stack[index].ret = ret;
>> + current->ret_stack[index].func = func;
>> + current->ret_stack[index].calltime = time;
>> + *depth = index;
>> +
>> + return 0;
>> +}
>
>
> This part has been moved as generic code in kernel/trace/trace_function_graph.c

OK - same as above, and for the following dittos. I'll
check the latest and eliminate duplicate stuff.

>> + struct ftrace_graph_ret trace;
>> + unsigned long ret;
>> +
>> + /* guard against trace entry while handling a trace exit */
>> + already_here++;
>
> I don't understand the purpose of this variable. Also it's not atomic,
> you will rapidly run into an imbalance of its value due to concurrent
> inc/decrementations.

It as a quick workaround to solve recursion in the trace system
itself. More notes below.

>> +
>> + pop_return_trace(&trace, &ret);
>> + trace.rettime = cpu_clock(raw_smp_processor_id());
>> + ftrace_graph_return(&trace);
>> +
>> + if (unlikely(!ret)) {
>> + ftrace_graph_stop();
>> + WARN_ON(1);
>> + /* Might as well panic. Where else to go? */
>> + ret = (unsigned long)panic;
>> + }
>> +
>> + already_here--;
>> + return ret;
>> +}
>> +
>> +/*
>> + * Hook the return address and push it in the stack of return addrs
>> + * in current thread info.
>> + */
>> +
>> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
>> +{
>> + unsigned long old;
>> + unsigned long long calltime;
>> +
>> + struct ftrace_graph_ent trace;
>> + unsigned long return_hooker = (unsigned long)
>> + &return_to_handler;
>> +
>> + if (already_here)
>> + return;
>> + already_here++;
>
>
>
> I really don't understand why you want this.
> Not only doesn't it protect anything because it's not atomic
> but moreover this function is supposed to be reentrant.

It's there for recursion, not (non-recursive) reentrancy.
If I don't have this, I walk immediately off the stack when
I enable the trace, because of nested trace entry.

This solution has, as you point out, several issues.
It works OK in a UP environment, but would need to be
per-cpu to be made a proper guard for SMP.

I don't mind doing something per-cpu, but last time I
looked at the atomic_inc stuff, it was super-expensive
on ARM. Is this still the case?

>
> It's perfectly safe against irq, preemption.
> It's also not reentrant but safe against NMI if you pick the
> protection we have in 2.6.30:
>
> if (unlikely(in_nmi()))
> return;
>
> Hmm, btw I should move this protection to a higher level,
> we should be able to trace NMI in some conditions.

Pardon my ignorance, but is there an NMI on ARM?

>
>> +
>> + if (unlikely(atomic_read(&current->tracing_graph_pause))) {
>> + already_here--;
>> + return;
>> + }
>> +
>> + /* FIXTHIS - need a local_irqsave here?, (to fend off ints?) */
>
>
> No you don't need to protect against irq here.
> This is not racy because an irq will push its return adress to the task stack
> (in struct task_struct) and on return it will pop its return address.
> Then the stack of return addresses will be left intact for the current interrupted
> traced function.

OK - my already_here will interefere with this (eliminating the trace
capture of any irqs caught during a trace event). I'll have to think
about this more.

>
>> + /* FIXTHIS - x86 protects against a fault during this operation!*/
>> + old = *parent;
>> + *parent = return_hooker;
>
>
>
> Yeah we protect against fault for that. But we never had any fault problem
> actually. But it's better to keep it, it doesn't impact performances and
> it can help to debug possible future issues in the function graph tracer itself.

OK.

>
>> +
>> + if (unlikely(!__kernel_text_address(old))) {
>> + ftrace_graph_stop();
>> + *parent = old;
>> + WARN_ON(1);
>> + already_here--;
>> + return;
>> + }
>
>
> We removed this check. It impacts performances and we haven't seen
> any warnings from this place.

OK.

>
>> +
>> + /* FIXTHIS - trace entry appears to nest inside following */
>> + calltime = cpu_clock(raw_smp_processor_id());
>
>
>
> Now we are using the local_clock from kernel/trace/trace_clock.c
> It's just a wrapping to sched_clock() and sched_clock() is faster
> though a bit less accurate.

My personal preference is speed over accuracy, so this is
OK with me. It does mean, however, that if I get rid of
my guard variable, I'll need to add notrace down through
sched_clock().


> But that's a good beginning. It's great that you've made it work on
> Arm.
>
> It would be very nice if you could rebase it against latest -tip
> or Arm though I can't tell you which one would be the best.
>
> -tip is the most up to date with tracing, and Arm is more
> up to date with...heh Arm :-)

I'll work on rebasing against latest -tip. Let me know if
this means something other than the tip of Linus' tree.

Thanks very much for the feedback.
-- Tim

=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================

2009-04-23 22:41:41

by Tim Bird

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Frederic Weisbecker wrote:
> On Thu, Apr 23, 2009 at 02:49:56PM -0700, Tim Bird wrote:
>> Uwe � wrote:
>>>> Signed-off-by: <[email protected]>
>>> According to Documentation/SubmittingPatches you need to provide your
>>> real name in the S-o-b line.
>> OK -that's embarrassing - I'll fix this.
>> scripts/checkpatch.pl didn't catch this. I may look at adding
>> something to checkpatch.pl to catch empty names.
>>
>>> For the lazy of us, can you point me^Uus to some documentation how to
>>> use the graph tracer?
>> It looks like Documentation/ftrace.txt is missing anything about
>> function graph tracing.
>>
>> Should I add a section?
>
>
>
> There is already a section in Documentation/frace.txt as well in 2.6.30 :-)

LOL. :-)

> Oh, that's the first function graph trace from Arm I can see.
> /me feels a bit moved...
>
> Just one childish caprice...it would be nice to see a part of a
> trace which shows specific Arm arch functions in the next changelog :-)

Will do. Here's something to tide you over. I have pages and
pages of the following (repeating over and over):

0) | omap_pm_idle() {
0) + 30.518 us | omap_dma_running();
0) | __exception_text_start() {
0) | irq_enter() {
0) 0.000 us | idle_cpu();
0) 0.000 us | }
0) 0.000 us | irq_to_desc();
0) | handle_level_irq() {
0) + 30.517 us | omap_mask_irq();
0) | omap_mask_ack_irq() {
0) 0.000 us | omap_mask_irq();
0) + 30.518 us | }
0) | handle_IRQ_event() {
0) | omap_mpu_timer1_interrupt() {
0) | tick_handle_periodic() {
0) | tick_periodic() {
0) | do_timer() {
0) | update_wall_time() {
0) 0.000 us | mpu_read();
0) 0.000 us | update_xtime_cache();
0) + 30.517 us | clocksource_get_next();
0) ! 122.070 us | }
0) ! 183.106 us | }
0) | update_process_times() {
0) | account_process_tick() {
0) + 30.517 us | account_idle_time();
0) + 30.517 us | }
0) | run_local_timers() {
0) | hrtimer_run_queues() {
0) 0.000 us | current_kernel_time();
0) + 30.518 us | }
0) 0.000 us | raise_softirq();
0) | softlockup_tick() {
0) | get_timestamp() {
0) 0.000 us | sched_clock();
0) 0.000 us | }
0) + 61.035 us | }
0) ! 122.071 us | }
0) | rcu_pending() {
0) 0.000 us | __rcu_pending();
0) 0.000 us | __rcu_pending();
0) + 61.035 us | }
0) 0.000 us | printk_tick();
0) | scheduler_tick() {
0) 0.000 us | sched_clock();
0) + 30.518 us | task_tick_idle();
0) + 61.035 us | }
0) 0.000 us | run_posix_cpu_timers();
0) ! 305.176 us | }
0) 0.000 us | profile_tick();
0) ! 518.799 us | }
0) ! 518.799 us | }
0) ! 549.316 us | }
0) ! 549.316 us | }
0) 0.000 us | note_interrupt();
0) 0.000 us | omap_unmask_irq();
0) ! 671.386 us | }
0) | irq_exit() {
0) | __do_softirq() {
0) | run_timer_softirq() {
0) 0.000 us | hrtimer_run_pending();
0) + 30.518 us | }
0) 0.000 us | _local_bh_enable();
0) + 61.036 us | }
0) + 91.553 us | }
0) ! 823.975 us | }
0) ! 9979.248 us | }

Enjoy :-)
-- Tim

=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================

2009-04-23 23:42:39

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

On Thu, Apr 23, 2009 at 03:33:48PM -0700, Tim Bird wrote:
> Frederic Weisbecker wrote:
> > On Thu, Apr 23, 2009 at 11:08:34AM -0700, Tim Bird wrote:
> >> Add ftrace function-graph tracer support for ARM.
> ...
>
> >> This works for me with a gcc 4.1.1 compiler on an
> >> OMAP OSK board, with kernel 2.6.29.
> >
> >
> > There have been a lot of changes on the function graph tracer
> > since 2.6.29 :)
>
> LOL. It serves me right for going on vacation for a week. ;-)
>
> > I don't know which tree would be the best to rebase this work on,
> > whether ARM or -tip. But anyway it should really be based against
> > a 2.6.30 development tree.
> Are you talking about -tip of Linus' tree, or is there a
> tracing tree I should getting stuff from?
>
> As far as I can tell, the ARM bits that this patch applies
> against are relatively stable, so I'd prefer to stay
> in synch with an ftrace tree, since that appears to be
> the more mobile target at the moment.
>
> ...
> >> --- a/arch/arm/include/asm/ftrace.h
> >> +++ b/arch/arm/include/asm/ftrace.h
> >> @@ -7,8 +7,32 @@
> >>
> >> #ifndef __ASSEMBLY__
> >> extern void mcount(void);
> >> -#endif
> >> +#endif /* __ASSEMBLY__ */
> >>
> >> -#endif
> >> +#endif /* CONFIG_FUNCTION_TRACER */
> >> +
> >> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> >> +
> >> +#ifndef __ASSEMBLY__
> >> +
> >> +/*
> >> + * Stack of return addresses for functions of a thread.
> >> + * Used in struct thread_info
> >> + */
> >> +struct ftrace_ret_stack {
> >> + unsigned long ret;
> >> + unsigned long func;
> >> + unsigned long long calltime;
> >> +};
> >
> >
> > This one is now on <linux/ftrace.h>
>
> OK - I'll update this relative to an appropriate
> more recent version. Thanks.
>
> >> --- a/arch/arm/kernel/ftrace.c
> >> +++ b/arch/arm/kernel/ftrace.c
> >> @@ -16,6 +16,8 @@
> >> #include <asm/cacheflush.h>
> >> #include <asm/ftrace.h>
> >>
> >> +#ifdef CONFIG_DYNAMIC_FTRACE
> >> +
> >> #define PC_OFFSET 8
> >> #define BL_OPCODE 0xeb000000
> >> #define BL_OFFSET_MASK 0x00ffffff
> >> @@ -101,3 +103,147 @@ int __init ftrace_dyn_arch_init(void *da
> >> ftrace_mcount_set(data);
> >> return 0;
> >> }
> >> +
> >> +#endif /* CONFIG_DYNAMIC_FTRACE */
> >> +
> >> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> >> +
> >> +/* Add a function return address to the trace stack on thread info.*/
> >> +static int push_return_trace(unsigned long ret, unsigned long long time,
> >> + unsigned long func, int *depth)
> >> +{
> >> + int index;
> >> +
> >> + if (!current->ret_stack)
> >> + return -EBUSY;
> >> +
> >> + /* The return trace stack is full */
> >> + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
> >> + atomic_inc(&current->trace_overrun);
> >> + return -EBUSY;
> >> + }
> >> +
> >> + index = ++current->curr_ret_stack;
> >> + barrier();
> >> + current->ret_stack[index].ret = ret;
> >> + current->ret_stack[index].func = func;
> >> + current->ret_stack[index].calltime = time;
> >> + *depth = index;
> >> +
> >> + return 0;
> >> +}
> >
> >
> > This part has been moved as generic code in kernel/trace/trace_function_graph.c
>
> OK - same as above, and for the following dittos. I'll
> check the latest and eliminate duplicate stuff.
>
> >> + struct ftrace_graph_ret trace;
> >> + unsigned long ret;
> >> +
> >> + /* guard against trace entry while handling a trace exit */
> >> + already_here++;
> >
> > I don't understand the purpose of this variable. Also it's not atomic,
> > you will rapidly run into an imbalance of its value due to concurrent
> > inc/decrementations.
>
> It as a quick workaround to solve recursion in the trace system
> itself. More notes below.
>
> >> +
> >> + pop_return_trace(&trace, &ret);
> >> + trace.rettime = cpu_clock(raw_smp_processor_id());
> >> + ftrace_graph_return(&trace);
> >> +
> >> + if (unlikely(!ret)) {
> >> + ftrace_graph_stop();
> >> + WARN_ON(1);
> >> + /* Might as well panic. Where else to go? */
> >> + ret = (unsigned long)panic;
> >> + }
> >> +
> >> + already_here--;
> >> + return ret;
> >> +}
> >> +
> >> +/*
> >> + * Hook the return address and push it in the stack of return addrs
> >> + * in current thread info.
> >> + */
> >> +
> >> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)

> >> +{
> >> + unsigned long old;
> >> + unsigned long long calltime;
> >> +
> >> + struct ftrace_graph_ent trace;
> >> + unsigned long return_hooker = (unsigned long)
> >> + &return_to_handler;
> >> +
> >> + if (already_here)
> >> + return;
> >> + already_here++;
> >
> >
> >
> > I really don't understand why you want this.
> > Not only doesn't it protect anything because it's not atomic
> > but moreover this function is supposed to be reentrant.
>
> It's there for recursion, not (non-recursive) reentrancy.
> If I don't have this, I walk immediately off the stack when
> I enable the trace, because of nested trace entry.
>
> This solution has, as you point out, several issues.
> It works OK in a UP environment, but would need to be
> per-cpu to be made a proper guard for SMP.
>
> I don't mind doing something per-cpu, but last time I
> looked at the atomic_inc stuff, it was super-expensive
> on ARM. Is this still the case?


I don't know. But now I understand the point of this guard.
On x86, all these low level functions, such as prepare_ftrace_return,
return_to_handler are using un-traceable functions.

As an example, sched_clock() is protected because x86/kernel/tsc.c
has CFLAGS_REMOVE_tsc.o = -pg
native_sched_clock() (an alias to sched_clock()) is then not traced.
Also it doesn't call any extern traceable functions so this is safe.

There are other CFLAGS_REMOVE_* = -pg around the kernel for critical
places. For exemple kernel_text_address was previously protected when
it was used inside prepare_ftrace_return.

The generic kernel code will of course be the same for arm and
x86 so the files and functions you should care about are in arch/arm.

Just check that you aren't calling one of these from the function graph
tracing code and also do the same check for those which have been moved
in kernel/trace/trace_function_graph.c:

- ftrace_push_return_trace()
- ftrace_pop_return_trace()
- ftrace_return_to_handler()

As an example:

ftrace_push_return_trace() -> trace_clock_local() -> sched_clock()
-> native_sched_clock() -> .... recursion on arch code?

Three ways to protect a function from tracing:

- __notrace annotations (individual functions), applies on function tracer
and function graph tracer.
- CFLAGS_REMOVE_tsc.o = -pg (whole files)
- __notrace_funcgraph, same as __notrace but is a nop if !CONFIG_FUNCTION_GRAPH_TRACER
It is useful in rare cases when we only want to deactivate tracing for
functions on the function graph tracer but not the function tracer.
It is rare, I almost only used it for kernel_text_address(). Really
in doubt it's better to choose __notrace.

Also, the nasty things to debug often happen when you protect a function
but you haven't seen that this function called an extern function which
is traced :-)

Another horrid thing comes when inlined functions are referenced.
It's safe to assume that inline functions are not traced, but if
their address is used as a pointer, then they becomes uninlined
and get traced. It's rare but we have met one of these cases.

Anyway inside trace_graph_entry() or trace_graph_return(),
you can stop worrying about recursion, because these functions
are self protected against recursion.

Concerning the arch functions that you should worry about, you can
start grepping arch/x86 for __notrace and "-pg". But it's only
a start, I guess there are unexpected traps somewhere :)


>
> >
> > It's perfectly safe against irq, preemption.
> > It's also not reentrant but safe against NMI if you pick the
> > protection we have in 2.6.30:
> >
> > if (unlikely(in_nmi()))
> > return;
> >
> > Hmm, btw I should move this protection to a higher level,
> > we should be able to trace NMI in some conditions.
>
> Pardon my ignorance, but is there an NMI on ARM?


I know know either :)


> >
> >> +
> >> + if (unlikely(atomic_read(&current->tracing_graph_pause))) {
> >> + already_here--;
> >> + return;
> >> + }
> >> +
> >> + /* FIXTHIS - need a local_irqsave here?, (to fend off ints?) */
> >
> >
> > No you don't need to protect against irq here.
> > This is not racy because an irq will push its return adress to the task stack
> > (in struct task_struct) and on return it will pop its return address.
> > Then the stack of return addresses will be left intact for the current interrupted
> > traced function.
>
> OK - my already_here will interefere with this (eliminating the trace
> capture of any irqs caught during a trace event). I'll have to think
> about this more.
>
> >
> >> + /* FIXTHIS - x86 protects against a fault during this operation!*/
> >> + old = *parent;
> >> + *parent = return_hooker;
> >
> >
> >
> > Yeah we protect against fault for that. But we never had any fault problem
> > actually. But it's better to keep it, it doesn't impact performances and
> > it can help to debug possible future issues in the function graph tracer itself.
>
> OK.
>
> >
> >> +
> >> + if (unlikely(!__kernel_text_address(old))) {
> >> + ftrace_graph_stop();
> >> + *parent = old;
> >> + WARN_ON(1);
> >> + already_here--;
> >> + return;
> >> + }
> >
> >
> > We removed this check. It impacts performances and we haven't seen
> > any warnings from this place.
>
> OK.
>
> >
> >> +
> >> + /* FIXTHIS - trace entry appears to nest inside following */
> >> + calltime = cpu_clock(raw_smp_processor_id());
> >
> >
> >
> > Now we are using the local_clock from kernel/trace/trace_clock.c
> > It's just a wrapping to sched_clock() and sched_clock() is faster
> > though a bit less accurate.
>
> My personal preference is speed over accuracy, so this is
> OK with me. It does mean, however, that if I get rid of
> my guard variable, I'll need to add notrace down through
> sched_clock().



Yeah, or may remove -pg on a whole file. I don't know much the arch/arm
place so, it depends on how native_sched_clock() is implemented,
if it calls another functions, whether they are inline, on the same
file or extern...



>
> > But that's a good beginning. It's great that you've made it work on
> > Arm.
> >
> > It would be very nice if you could rebase it against latest -tip
> > or Arm though I can't tell you which one would be the best.
> >
> > -tip is the most up to date with tracing, and Arm is more
> > up to date with...heh Arm :-)
>
> I'll work on rebasing against latest -tip. Let me know if
> this means something other than the tip of Linus' tree.



It's not the mainline, it's a tree which handles the tracing
branch and also another things (x86, scheduler, etc...):

http://people.redhat.com/mingo/tip.git/readme.txt


Another thing. How do you handle 64 bits return values?
You get the return value in r0 for most calls but
I suspect a pair of registers (r1:r0 ?) is used for
64 bits return values, which includes a lot of
clock related functions.

In x86, we solved it by always returning the edx:eax pair.

Thanks,
Frederic.

2009-04-24 06:33:28

by Uwe Kleine-König

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

On Thu, Apr 23, 2009 at 03:33:48PM -0700, Tim Bird wrote:
> > I don't know which tree would be the best to rebase this work on,
> > whether ARM or -tip. But anyway it should really be based against
> > a 2.6.30 development tree.
> Are you talking about -tip of Linus' tree, or is there a
> tracing tree I should getting stuff from?
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git

Best regards
Uwe

--
Pengutronix e.K. | Uwe Kleine-K?nig |
Industrial Linux Solutions | http://www.pengutronix.de/ |

2009-04-24 06:44:18

by Uwe Kleine-König

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Hello,

On Thu, Apr 23, 2009 at 02:49:56PM -0700, Tim Bird wrote:
> >> @@ -23,6 +23,7 @@ obj-$(CONFIG_ISA_DMA) += dma-isa.o
> >> obj-$(CONFIG_PCI) += bios32.o isa.o
> >> obj-$(CONFIG_SMP) += smp.o
> >> obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
> >> +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
> > I there a reason that CONFIG_DYNAMIC_FTRACE and
> > CONFIG_FUNCTION_GRAPH_TRACER share the same source file?
> ftrace.c has stuff from both, so you want it "turned on"
> if one or the other (or both) are configed on.
I got this ...
>
> I'll look at whether it makes sense to split these two into
> separate files. This issue may be resolved by the code
> movement that Frederic Weisbecker mentioned in his e-mail.
... and this was my intention.

> >> --- a/arch/arm/kernel/entry-common.S
> >> +++ b/arch/arm/kernel/entry-common.S
> >> @@ -135,8 +135,16 @@ ENTRY(mcount)
> >> adr r0, ftrace_stub
> >> cmp r0, r2
> >> bne trace
> >> +
> >> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> >> + ldr r1, =ftrace_graph_return
> >> + ldr r2, [r1]
> >> + cmp r0, r2 @ if *ftrace_graph_return != ftrace_stub
> >> + bne ftrace_graph_caller
> >> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> >> +
> >> ldr lr, [fp, #-4] @ restore lr
> >> - ldmia sp!, {r0-r3, pc}
> >> + ldmia sp!, {r0-r3, pc} @ return doing nothing
> > If ftrace_trace_function != ftrace_stub then ftrace_graph_caller isn't
> > called. Is this correct?
> My comment is possibly misleading here. The sense of the
> '!=' includes the 'n' portion of the 'bne' on the following line.
> So the logic is:
> if *ftrace_graph_return != ftrace stub, branch to ftrace_graph_caller
> or in other words
> if *ftrace_graph_return == ftrace_stub, return doing nothing
>
> Maybe I should just remove the comment and let the code
> speak for itself? (or maybe move the comment down one line?)
Yes, the comments are as good as the comments for ftrace_trace_function.
What I meant is that with your code using both ftrace_trace_function and
ftrace_graph_caller doesn't work, because if ftrace_trace_function !=
ftrace_stub then the check for ftrace_trace_function is simply skipped.

And I just noticed something else: There is a 2nd implementation of
mcount in arch/arm/kernel/entry-common.S for CONFIG_DYNAMIC_FTRACE
(which is currently not selectable for ARM). Maybe add a note to this
mcount that it needs fixing for graph tracing when it is revived?

Best regards
Uwe

--
Pengutronix e.K. | Uwe Kleine-K?nig |
Industrial Linux Solutions | http://www.pengutronix.de/ |

2009-04-24 18:01:06

by Tim Bird

[permalink] [raw]
Subject: Re: [PATCH] Add function graph tracer support for ARM

Uwe � wrote:
>>>> --- a/arch/arm/kernel/entry-common.S
>>>> +++ b/arch/arm/kernel/entry-common.S
>>>> @@ -135,8 +135,16 @@ ENTRY(mcount)
>>>> adr r0, ftrace_stub
>>>> cmp r0, r2
>>>> bne trace
>>>> +
>>>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>>>> + ldr r1, =ftrace_graph_return
>>>> + ldr r2, [r1]
>>>> + cmp r0, r2 @ if *ftrace_graph_return != ftrace_stub
>>>> + bne ftrace_graph_caller
>>>> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>>>> +
>>>> ldr lr, [fp, #-4] @ restore lr
>>>> - ldmia sp!, {r0-r3, pc}
>>>> + ldmia sp!, {r0-r3, pc} @ return doing nothing
>>> If ftrace_trace_function != ftrace_stub then ftrace_graph_caller isn't
>>> called. Is this correct?
...
> Yes, the comments are as good as the comments for ftrace_trace_function.
> What I meant is that with your code using both ftrace_trace_function and
> ftrace_graph_caller doesn't work, because if ftrace_trace_function !=
> ftrace_stub then the check for ftrace_trace_function is simply skipped.

OK - I understand now. I didn't think of this case, but you
are correct. This has the same behavior as the x86 case.
Whether that's good or not is hard to say. I wouldn't want
to add extra tests (since this is very much a hot path), but
maybe this should be noted somewhere. I'll test to see
how this impacts the user-visible operation of the tracers.
(I assume this means you would have to reset the function
tracer, if it was active, before invoking a function graph
tracer.)

Alternatively, maybe the logic of installing the function graph
tracer should automatically reset the function tracer?

Steve,

Should both a function trace and a function graph tracer
be allowed to be active at the same time? I would think
not, but maybe there's a use case I'm missing.

> And I just noticed something else: There is a 2nd implementation of
> mcount in arch/arm/kernel/entry-common.S for CONFIG_DYNAMIC_FTRACE
> (which is currently not selectable for ARM). Maybe add a note to this
> mcount that it needs fixing for graph tracing when it is revived?

Good catch. I'll put something in.

Thanks!!
-- Tim

=============================
Tim Bird
Architecture Group Chair, CE Linux Forum
Senior Staff Engineer, Sony Corporation of America
=============================