2008-12-01 23:21:07

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH] tracing/function-branch-tracer: support for x86-64

Small note: Ingo, I have only one test box and I had to install a 64 bits distro to
make this patch. So I can't verify if it breaks something in x86-32. I don't know
what could be broken here but we never know.
For further patches, I will use a virtual machine to test under 32.

--
This patch implements the support for function branch tracer under x86-64.
Both static and dynamic tracing are supported.

This causes some small CPP conditional asm on arch/x86/kernel/ftrace.c
I wanted to use probe_kernel_read/write to make the return address saving/patching
code more generic but it causes tracing recursion.

That would be perhaps useful to implement a notrace version of these function for other
archs ports.

Note that arch/x86/process_64.c is not traced, as in X86-32. I first thought __switch_to()
was responsible of crashes during tracing because I believed current task were changed
inside but that's actually not the case (actually yes, but not the "current" pointer).

So I will have to investigate to find the functions that harm here, to enable tracing
of the other functions inside (but there is no issue at this time, while process_64.c
stays out of -pg flags).

A little possible race condition is fixed inside this patch too. When the tracer allocate
a return stack dynamically, the current depth is not initialized before but after.
An interrupt could occur at this time and, after seeing that the return stack is
allocated, the tracer could try to trace it with a random uninitialized depth.
It's a prevention, even if I hadn't problems with it.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/x86/Kconfig | 2 +-
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/entry_64.S | 74 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/ftrace.c | 11 ++++++-
kernel/trace/ftrace.c | 2 +-
5 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e7c0a1b..c216882 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,7 +29,7 @@ config X86
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
- select HAVE_FUNCTION_GRAPH_TRACER if X86_32
+ select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6bc8aef..5ee1ba6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,6 +18,7 @@ endif
ifdef CONFIG_FUNCTION_GRAPH_TRACER
# Don't trace __switch_to() but let it for function tracer
CFLAGS_REMOVE_process_32.o = -pg
+CFLAGS_REMOVE_process_64.o = -pg
endif

#
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5e63b53..c433d86 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -97,6 +97,12 @@ ftrace_call:
movq (%rsp), %rax
addq $0x38, %rsp

+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
+
.globl ftrace_stub
ftrace_stub:
retq
@@ -109,6 +115,12 @@ ENTRY(mcount)

cmpq $ftrace_stub, ftrace_trace_function
jnz trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpq $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+#endif
+
.globl ftrace_stub
ftrace_stub:
retq
@@ -144,6 +156,68 @@ END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */

+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ leaq 8(%rbp), %rdi
+ movq 0x38(%rsp), %rsi
+
+ call prepare_ftrace_return
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+ retq
+END(ftrace_graph_caller)
+
+
+.globl return_to_handler
+return_to_handler:
+ subq $80, %rsp
+
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+ movq %r10, 56(%rsp)
+ movq %r11, 64(%rsp)
+
+ call ftrace_return_to_handler
+
+ movq %rax, 72(%rsp)
+ movq 64(%rsp), %r11
+ movq 56(%rsp), %r10
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $72, %rsp
+ retq
+#endif
+
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 7ef914e..5883247 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -467,8 +467,13 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
* ignore such a protection.
*/
asm volatile(
+#ifdef CONFIG_X86_64
+ "1: movq (%[parent_old]), %[old]\n"
+ "2: movq %[return_hooker], (%[parent_replaced])\n"
+#else
"1: movl (%[parent_old]), %[old]\n"
"2: movl %[return_hooker], (%[parent_replaced])\n"
+#endif
" movl $0, %[faulted]\n"

".section .fixup, \"ax\"\n"
@@ -476,8 +481,13 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
".previous\n"

".section __ex_table, \"a\"\n"
+#ifdef CONFIG_X86_64
+ " .quad 1b, 3b\n"
+ " .quad 2b, 3b\n"
+#else
" .long 1b, 3b\n"
" .long 2b, 3b\n"
+#endif
".previous\n"

: [parent_replaced] "=r" (parent), [old] "=r" (old),
@@ -509,5 +519,4 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
ftrace_graph_entry(&trace);

}
-
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 08b536a..1e9379d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1673,8 +1673,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}

if (t->ret_stack == NULL) {
- t->ret_stack = ret_stack_list[start++];
t->curr_ret_stack = -1;
+ t->ret_stack = ret_stack_list[start++];
atomic_set(&t->trace_overrun, 0);
}
} while_each_thread(g, t);
--
1.5.6.3


2008-12-02 09:02:34

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-branch-tracer: support for x86-64


* Frederic Weisbecker <[email protected]> wrote:

> This patch implements the support for function branch tracer under x86-64.
> Both static and dynamic tracing are supported.

Fantastic stuff! :-)

> Small note: Ingo, I have only one test box and I had to install a 64
> bits distro to make this patch. So I can't verify if it breaks
> something in x86-32. I don't know what could be broken here but we
> never know. For further patches, I will use a virtual machine to test
> under 32.

that's OK. The patch looks fairly safe on the 32-bit side.

> This causes some small CPP conditional asm on arch/x86/kernel/ftrace.c
> I wanted to use probe_kernel_read/write to make the return address
> saving/patching code more generic but it causes tracing recursion.

it's this bit:

> +#ifdef CONFIG_X86_64
> + "1: movq (%[parent_old]), %[old]\n"
> + "2: movq %[return_hooker], (%[parent_replaced])\n"
> +#else
> "1: movl (%[parent_old]), %[old]\n"
> "2: movl %[return_hooker], (%[parent_replaced])\n"
> +#endif
> " movl $0, %[faulted]\n"
>
> ".section .fixup, \"ax\"\n"
> @@ -476,8 +481,13 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
> ".previous\n"
>
> ".section __ex_table, \"a\"\n"
> +#ifdef CONFIG_X86_64
> + " .quad 1b, 3b\n"
> + " .quad 2b, 3b\n"
> +#else
> " .long 1b, 3b\n"
> " .long 2b, 3b\n"
> +#endif

i think we might want to introduce a few assembly helpers/defines to
standardize such constructs - they are quite frequent. Something like:

" .ip_ptr 1b, 3b\n"
" .ip_ptr 2b, 3b\n"

(Cc:-ed Alexander and Cyrill who have done work in this area recently)

we might also introduce instruction helpers:

"1: mov_ptr (%[parent_old]), %[old]\n"
"2: mov_ptr %[return_hooker], (%[parent_replaced])\n"

and avoid the #ifdefs altogether.

> Note that arch/x86/process_64.c is not traced, as in X86-32. I first
> thought __switch_to() was responsible of crashes during tracing because
> I believed current task were changed inside but that's actually not the
> case (actually yes, but not the "current" pointer).
>
> So I will have to investigate to find the functions that harm here, to
> enable tracing of the other functions inside (but there is no issue at
> this time, while process_64.c stays out of -pg flags).

ok. You should take a look at arch/x86/include/asm/system.h's switch_to()
macros - it has special stack switching smarts for context-switching.

the other special stack layout case is the starting of kernel threads -
ret_from_fork and its details in process*.c.

> A little possible race condition is fixed inside this patch too. When
> the tracer allocate a return stack dynamically, the current depth is
> not initialized before but after. An interrupt could occur at this time
> and, after seeing that the return stack is allocated, the tracer could
> try to trace it with a random uninitialized depth. It's a prevention,
> even if I hadn't problems with it.

> index 08b536a..1e9379d 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1673,8 +1673,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
> }
>
> if (t->ret_stack == NULL) {
> - t->ret_stack = ret_stack_list[start++];
> t->curr_ret_stack = -1;
> + t->ret_stack = ret_stack_list[start++];
> atomic_set(&t->trace_overrun, 0);
> }
> } while_each_thread(g, t);

okay - the (optimization-)safe way to tell the compiler about such local
CPU ordering information is:

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 08b536a..f724996 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1673,8 +1673,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}

if (t->ret_stack == NULL) {
- t->ret_stack = ret_stack_list[start++];
t->curr_ret_stack = -1;
+ /* Make sure IRQs see the -1 first: */
+ barrier();
+ t->ret_stack = ret_stack_list[start++];
atomic_set(&t->trace_overrun, 0);
}
} while_each_thread(g, t);

i changed the patch to do that.

All in one, great stuff!

Ingo

2008-12-02 09:06:35

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-graph-tracer: support for x86-64


* Ingo Molnar <[email protected]> wrote:

> > This patch implements the support for function graph tracer under
> > x86-64. Both static and dynamic tracing are supported.
>
> Fantastic stuff! :-)

Graph tracer output from a 64-bit testbox:

0) | cache_grow() {
0) 0.323 us | _spin_lock();
0) 0.314 us | kmem_flagcheck();
0) | kmem_getpages() {
0) | __alloc_pages_internal() {
0) | get_page_from_freelist() {
0) 0.295 us | next_zones_zonelist();
0) 0.306 us | next_zones_zonelist();
0) 0.394 us | zone_watermark_ok();
0) 0.415 us | _spin_lock_irqsave();
0) | __rmqueue() {
0) | __rmqueue_smallest() {
0) 0.387 us | __mod_zone_page_state();
0) 1.514 us | }
0) 2.369 us | }
0) | zone_statistics() {
0) 0.297 us | __inc_zone_state();
0) 0.290 us | __inc_zone_state();
0) 1.448 us | }
0) 7.315 us | }
0) 7.967 us | }
0) | mod_zone_page_state() {
0) 0.285 us | __mod_zone_page_state();
0) 0.931 us | }
0) 9.993 us | }
0) | kmem_cache_alloc_node() {
0) | cache_alloc_refill() {

nice :)

Ingo

2008-12-02 11:10:41

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-branch-tracer: support for x86-64

2008/12/2 Ingo Molnar <[email protected]>:
> it's this bit:
>
>> +#ifdef CONFIG_X86_64
>> + "1: movq (%[parent_old]), %[old]\n"
>> + "2: movq %[return_hooker], (%[parent_replaced])\n"
>> +#else
>> "1: movl (%[parent_old]), %[old]\n"
>> "2: movl %[return_hooker], (%[parent_replaced])\n"
>> +#endif
>> " movl $0, %[faulted]\n"
>>
>> ".section .fixup, \"ax\"\n"
>> @@ -476,8 +481,13 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
>> ".previous\n"
>>
>> ".section __ex_table, \"a\"\n"
>> +#ifdef CONFIG_X86_64
>> + " .quad 1b, 3b\n"
>> + " .quad 2b, 3b\n"
>> +#else
>> " .long 1b, 3b\n"
>> " .long 2b, 3b\n"
>> +#endif
>
> i think we might want to introduce a few assembly helpers/defines to
> standardize such constructs - they are quite frequent. Something like:
>
> " .ip_ptr 1b, 3b\n"
> " .ip_ptr 2b, 3b\n"


Yeah. Note that in this particular case I could use EX_TABLE() macro
too (if it is defined for x86, I can't verify yet).

> (Cc:-ed Alexander and Cyrill who have done work in this area recently)
>
> we might also introduce instruction helpers:
>
> "1: mov_ptr (%[parent_old]), %[old]\n"
> "2: mov_ptr %[return_hooker], (%[parent_replaced])\n"
>
> and avoid the #ifdefs altogether.


Good idea.

>
>> Note that arch/x86/process_64.c is not traced, as in X86-32. I first
>> thought __switch_to() was responsible of crashes during tracing because
>> I believed current task were changed inside but that's actually not the
>> case (actually yes, but not the "current" pointer).
>>
>> So I will have to investigate to find the functions that harm here, to
>> enable tracing of the other functions inside (but there is no issue at
>> this time, while process_64.c stays out of -pg flags).
>
> ok. You should take a look at arch/x86/include/asm/system.h's switch_to()
> macros - it has special stack switching smarts for context-switching.
>
> the other special stack layout case is the starting of kernel threads -
> ret_from_fork and its details in process*.c.


Ok, I will have a look inside.

>> A little possible race condition is fixed inside this patch too. When
>> the tracer allocate a return stack dynamically, the current depth is
>> not initialized before but after. An interrupt could occur at this time
>> and, after seeing that the return stack is allocated, the tracer could
>> try to trace it with a random uninitialized depth. It's a prevention,
>> even if I hadn't problems with it.
>
>> index 08b536a..1e9379d 100644
>> --- a/kernel/trace/ftrace.c
>> +++ b/kernel/trace/ftrace.c
>> @@ -1673,8 +1673,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
>> }
>>
>> if (t->ret_stack == NULL) {
>> - t->ret_stack = ret_stack_list[start++];
>> t->curr_ret_stack = -1;
>> + t->ret_stack = ret_stack_list[start++];
>> atomic_set(&t->trace_overrun, 0);
>> }
>> } while_each_thread(g, t);
>
> okay - the (optimization-)safe way to tell the compiler about such local
> CPU ordering information is:
>
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 08b536a..f724996 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1673,8 +1673,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
> }
>
> if (t->ret_stack == NULL) {
> - t->ret_stack = ret_stack_list[start++];
> t->curr_ret_stack = -1;
> + /* Make sure IRQs see the -1 first: */
> + barrier();
> + t->ret_stack = ret_stack_list[start++];
> atomic_set(&t->trace_overrun, 0);
> }
> } while_each_thread(g, t);
>
> i changed the patch to do that.


Oops, I forgot it one more time.

That reminds me: I don't know why I always confuse the name of this
tracer: function-branch-tracer instead of function-graph-tracer.
Weird :-)


> All in one, great stuff!


Thanks :-)

2008-12-02 12:03:37

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-graph-tracer: support for x86-64

2008/12/2 Ingo Molnar <[email protected]>:
> Graph tracer output from a 64-bit testbox:
>
> 0) | cache_grow() {
> 0) 0.323 us | _spin_lock();
> 0) 0.314 us | kmem_flagcheck();
> 0) | kmem_getpages() {
> 0) | __alloc_pages_internal() {
> 0) | get_page_from_freelist() {
> 0) 0.295 us | next_zones_zonelist();
> 0) 0.306 us | next_zones_zonelist();
> 0) 0.394 us | zone_watermark_ok();
> 0) 0.415 us | _spin_lock_irqsave();
> 0) | __rmqueue() {
> 0) | __rmqueue_smallest() {
> 0) 0.387 us | __mod_zone_page_state();
> 0) 1.514 us | }
> 0) 2.369 us | }
> 0) | zone_statistics() {
> 0) 0.297 us | __inc_zone_state();
> 0) 0.290 us | __inc_zone_state();
> 0) 1.448 us | }
> 0) 7.315 us | }
> 0) 7.967 us | }
> 0) | mod_zone_page_state() {
> 0) 0.285 us | __mod_zone_page_state();
> 0) 0.931 us | }
> 0) 9.993 us | }
> 0) | kmem_cache_alloc_node() {
> 0) | cache_alloc_refill() {
>
> nice :)
>
> Ingo
>

:-)

Yesterday, I noticed some rare and weird things like this:

leaf_func();
leaf_func();
leaf_func();
leaf_func() {
leaf_func();
leaf_func();

I'm not sure of the origin. I will investigate....

2008-12-02 21:25:59

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-branch-tracer: support for x86-64


Hmm, I had issues with my mail server so I just received this. I was
porting it to x86-64 last night too.

On Tue, 2 Dec 2008, Ingo Molnar wrote:

>
> * Frederic Weisbecker <[email protected]> wrote:
>
> > This patch implements the support for function branch tracer under x86-64.
> > Both static and dynamic tracing are supported.
>
> Fantastic stuff! :-)
>
> > Small note: Ingo, I have only one test box and I had to install a 64
> > bits distro to make this patch. So I can't verify if it breaks
> > something in x86-32. I don't know what could be broken here but we
> > never know. For further patches, I will use a virtual machine to test
> > under 32.
>
> that's OK. The patch looks fairly safe on the 32-bit side.
>
> > This causes some small CPP conditional asm on arch/x86/kernel/ftrace.c
> > I wanted to use probe_kernel_read/write to make the return address
> > saving/patching code more generic but it causes tracing recursion.
>
> it's this bit:
>
> > +#ifdef CONFIG_X86_64
> > + "1: movq (%[parent_old]), %[old]\n"
> > + "2: movq %[return_hooker], (%[parent_replaced])\n"
> > +#else
> > "1: movl (%[parent_old]), %[old]\n"
> > "2: movl %[return_hooker], (%[parent_replaced])\n"
> > +#endif
> > " movl $0, %[faulted]\n"
> >
> > ".section .fixup, \"ax\"\n"
> > @@ -476,8 +481,13 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
> > ".previous\n"
> >
> > ".section __ex_table, \"a\"\n"
> > +#ifdef CONFIG_X86_64
> > + " .quad 1b, 3b\n"
> > + " .quad 2b, 3b\n"
> > +#else
> > " .long 1b, 3b\n"
> > " .long 2b, 3b\n"
> > +#endif
>
> i think we might want to introduce a few assembly helpers/defines to
> standardize such constructs - they are quite frequent. Something like:
>
> " .ip_ptr 1b, 3b\n"
> " .ip_ptr 2b, 3b\n"
>
> (Cc:-ed Alexander and Cyrill who have done work in this area recently)
>
> we might also introduce instruction helpers:
>
> "1: mov_ptr (%[parent_old]), %[old]\n"
> "2: mov_ptr %[return_hooker], (%[parent_replaced])\n"
>
> and avoid the #ifdefs altogether.

I fixed this in my last patch queue.

>
> > Note that arch/x86/process_64.c is not traced, as in X86-32. I first
> > thought __switch_to() was responsible of crashes during tracing because
> > I believed current task were changed inside but that's actually not the
> > case (actually yes, but not the "current" pointer).
> >
> > So I will have to investigate to find the functions that harm here, to
> > enable tracing of the other functions inside (but there is no issue at
> > this time, while process_64.c stays out of -pg flags).
>
> ok. You should take a look at arch/x86/include/asm/system.h's switch_to()
> macros - it has special stack switching smarts for context-switching.
>
> the other special stack layout case is the starting of kernel threads -
> ret_from_fork and its details in process*.c.

I'm hitting some crashes but it does not seem to be related to this.
I'm still investigating, but it looks like it is due to some strange race
because I can run for hours sometimes and other times it crashes right
away.

>
> > A little possible race condition is fixed inside this patch too. When
> > the tracer allocate a return stack dynamically, the current depth is
> > not initialized before but after. An interrupt could occur at this time
> > and, after seeing that the return stack is allocated, the tracer could
> > try to trace it with a random uninitialized depth. It's a prevention,
> > even if I hadn't problems with it.
>
> > index 08b536a..1e9379d 100644
> > --- a/kernel/trace/ftrace.c
> > +++ b/kernel/trace/ftrace.c
> > @@ -1673,8 +1673,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
> > }
> >
> > if (t->ret_stack == NULL) {
> > - t->ret_stack = ret_stack_list[start++];
> > t->curr_ret_stack = -1;
> > + t->ret_stack = ret_stack_list[start++];
> > atomic_set(&t->trace_overrun, 0);
> > }
> > } while_each_thread(g, t);
>
> okay - the (optimization-)safe way to tell the compiler about such local
> CPU ordering information is:
>
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 08b536a..f724996 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1673,8 +1673,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
> }
>
> if (t->ret_stack == NULL) {
> - t->ret_stack = ret_stack_list[start++];
> t->curr_ret_stack = -1;
> + /* Make sure IRQs see the -1 first: */
> + barrier();
> + t->ret_stack = ret_stack_list[start++];
> atomic_set(&t->trace_overrun, 0);
> }
> } while_each_thread(g, t);
>
> i changed the patch to do that.
>
> All in one, great stuff!

Agree, this is really awesome. I'm also working on a way to trigger
specific functions to trace instead of tracing all functions.

-- Steve

2008-12-02 21:40:27

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-branch-tracer: support for x86-64

2008/12/2 Steven Rostedt <[email protected]>:
> Hmm, I had issues with my mail server so I just received this. I was
> porting it to x86-64 last night too.

Oops...


> I'm also working on a way to trigger
> specific functions to trace instead of tracing all functions.

You mean only tracing one function and its tree of calls (by defining
a kind of root
of tracing?) as suggested Arjan?
Yeah, that would be great.

To define it through trace_filter, perhaps we could use the sign "^"
before a function name to say:
"all calls which starts with this function". I hope that wouldn't
confuse with the real sense that "^" has in regular
expressions.

2008-12-03 21:30:28

by Alexander van Heukelum

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-branch-tracer: support for x86-64

On Tue, 02 Dec 2008 00:20:39 +0100, "Frederic Weisbecker"
<[email protected]> said:
> Small note: Ingo, I have only one test box and I had to install a 64 bits
> distro to
> make this patch. So I can't verify if it breaks something in x86-32. I
> don't know
> what could be broken here but we never know.
> For further patches, I will use a virtual machine to test under 32.
>
> --
> This patch implements the support for function branch tracer under
> x86-64.
> Both static and dynamic tracing are supported.
>
> This causes some small CPP conditional asm on arch/x86/kernel/ftrace.c
> I wanted to use probe_kernel_read/write to make the return address
> saving/patching
> code more generic but it causes tracing recursion.
>
> That would be perhaps useful to implement a notrace version of these
> function for other
> archs ports.
>
> Note that arch/x86/process_64.c is not traced, as in X86-32. I first
> thought __switch_to()
> was responsible of crashes during tracing because I believed current task
> were changed
> inside but that's actually not the case (actually yes, but not the
> "current" pointer).
>
> So I will have to investigate to find the functions that harm here, to
> enable tracing
> of the other functions inside (but there is no issue at this time, while
> process_64.c
> stays out of -pg flags).
>
> A little possible race condition is fixed inside this patch too. When the
> tracer allocate
> a return stack dynamically, the current depth is not initialized before
> but after.
> An interrupt could occur at this time and, after seeing that the return
> stack is
> allocated, the tracer could try to trace it with a random uninitialized
> depth.
> It's a prevention, even if I hadn't problems with it.
>
> Signed-off-by: Frederic Weisbecker <[email protected]>
> ---
> arch/x86/Kconfig | 2 +-
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/entry_64.S | 74 ++++++++++++++++++++++++++++++++++++++++++++
> arch/x86/kernel/ftrace.c | 11 ++++++-
> kernel/trace/ftrace.c | 2 +-
> 5 files changed, 87 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index e7c0a1b..c216882 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -29,7 +29,7 @@ config X86
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_DYNAMIC_FTRACE
> select HAVE_FUNCTION_TRACER
> - select HAVE_FUNCTION_GRAPH_TRACER if X86_32
> + select HAVE_FUNCTION_GRAPH_TRACER
> select HAVE_FUNCTION_TRACE_MCOUNT_TEST
> select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
> select HAVE_ARCH_KGDB if !X86_VOYAGER
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 6bc8aef..5ee1ba6 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -18,6 +18,7 @@ endif
> ifdef CONFIG_FUNCTION_GRAPH_TRACER
> # Don't trace __switch_to() but let it for function tracer
> CFLAGS_REMOVE_process_32.o = -pg
> +CFLAGS_REMOVE_process_64.o = -pg
> endif
>
> #
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 5e63b53..c433d86 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -97,6 +97,12 @@ ftrace_call:
> movq (%rsp), %rax
> addq $0x38, %rsp
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +.globl ftrace_graph_call
> +ftrace_graph_call:
> + jmp ftrace_stub
> +#endif
> +
> .globl ftrace_stub
> ftrace_stub:
> retq
> @@ -109,6 +115,12 @@ ENTRY(mcount)
>
> cmpq $ftrace_stub, ftrace_trace_function
> jnz trace
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + cmpq $ftrace_stub, ftrace_graph_return
> + jnz ftrace_graph_caller
> +#endif
> +
> .globl ftrace_stub
> ftrace_stub:
> retq
> @@ -144,6 +156,68 @@ END(mcount)
> #endif /* CONFIG_DYNAMIC_FTRACE */
> #endif /* CONFIG_FUNCTION_TRACER */
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +ENTRY(ftrace_graph_caller)
> + cmpl $0, function_trace_stop
> + jne ftrace_stub
> +
> + subq $0x38, %rsp
> + movq %rax, (%rsp)
> + movq %rcx, 8(%rsp)
> + movq %rdx, 16(%rsp)
> + movq %rsi, 24(%rsp)
> + movq %rdi, 32(%rsp)
> + movq %r8, 40(%rsp)
> + movq %r9, 48(%rsp)
> +
> + leaq 8(%rbp), %rdi
> + movq 0x38(%rsp), %rsi
> +
> + call prepare_ftrace_return
> +
> + movq 48(%rsp), %r9
> + movq 40(%rsp), %r8
> + movq 32(%rsp), %rdi
> + movq 24(%rsp), %rsi
> + movq 16(%rsp), %rdx
> + movq 8(%rsp), %rcx
> + movq (%rsp), %rax
> + addq $0x38, %rsp
> + retq
> +END(ftrace_graph_caller)
> +
> +
> +.globl return_to_handler
> +return_to_handler:
> + subq $80, %rsp
> +
> + movq %rax, (%rsp)
> + movq %rcx, 8(%rsp)
> + movq %rdx, 16(%rsp)
> + movq %rsi, 24(%rsp)
> + movq %rdi, 32(%rsp)
> + movq %r8, 40(%rsp)
> + movq %r9, 48(%rsp)
> + movq %r10, 56(%rsp)
> + movq %r11, 64(%rsp)
> +
> + call ftrace_return_to_handler
> +
> + movq %rax, 72(%rsp)
> + movq 64(%rsp), %r11
> + movq 56(%rsp), %r10
> + movq 48(%rsp), %r9
> + movq 40(%rsp), %r8
> + movq 32(%rsp), %rdi
> + movq 24(%rsp), %rsi
> + movq 16(%rsp), %rdx
> + movq 8(%rsp), %rcx
> + movq (%rsp), %rax
> + addq $72, %rsp
> + retq
> +#endif
> +
> +
> #ifndef CONFIG_PREEMPT
> #define retint_kernel retint_restore_args
> #endif

Hi Frederic,

The changes to entry_64.S look simple enough. The order of
saving the registers on the stack is not important in any
way, right?

> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 7ef914e..5883247 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -467,8 +467,13 @@ void prepare_ftrace_return(unsigned long *parent,
> unsigned long self_addr)
> * ignore such a protection.
> */
> asm volatile(
> +#ifdef CONFIG_X86_64
> + "1: movq (%[parent_old]), %[old]\n"
> + "2: movq %[return_hooker], (%[parent_replaced])\n"
> +#else
> "1: movl (%[parent_old]), %[old]\n"
> "2: movl %[return_hooker], (%[parent_replaced])\n"
> +#endif

The following should work in both cases, right?

"1: mov (%[parent_old]), %[old]\n"
"2: mov %[return_hooker], (%[parent_replaced])\n"

> " movl $0, %[faulted]\n"
>
> ".section .fixup, \"ax\"\n"
> @@ -476,8 +481,13 @@ void prepare_ftrace_return(unsigned long *parent,
> unsigned long self_addr)
> ".previous\n"

> ".section __ex_table, \"a\"\n"
> +#ifdef CONFIG_X86_64
> + " .quad 1b, 3b\n"
> + " .quad 2b, 3b\n"
> +#else
> " .long 1b, 3b\n"
> " .long 2b, 3b\n"
> +#endif
> ".previous\n"

I think this can be done with:

_ASM_EXTABLE(1b,3b)
_ASM_EXTABLE(2b,3b)

(from: arch/x86/include/asm/asm.h)

Greetings,
Alexander

> : [parent_replaced] "=r" (parent), [old] "=r" (old),
> @@ -509,5 +519,4 @@ void prepare_ftrace_return(unsigned long *parent,
> unsigned long self_addr)
> ftrace_graph_entry(&trace);
>
> }
> -
> #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 08b536a..1e9379d 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1673,8 +1673,8 @@ static int alloc_retstack_tasklist(struct
> ftrace_ret_stack **ret_stack_list)
> }
>
> if (t->ret_stack == NULL) {
> - t->ret_stack = ret_stack_list[start++];
> t->curr_ret_stack = -1;
> + t->ret_stack = ret_stack_list[start++];
> atomic_set(&t->trace_overrun, 0);
> }
> } while_each_thread(g, t);
--
Alexander van Heukelum
[email protected]

--
http://www.fastmail.fm - Does exactly what it says on the tin

2008-12-03 23:33:19

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] tracing/function-branch-tracer: support for x86-64

2008/12/3 Alexander van Heukelum <[email protected]>:
> Hi Frederic,
>
> The changes to entry_64.S look simple enough. The order of
> saving the registers on the stack is not important in any
> way, right?
>[...]

No they don't matter.
All arguments should be saved on ftrace_graph_caller to not loose them
on call entry hooking.

On return (return_to_handler), I thought that I only needed to save
%rax (return value), the other scratch registers shouldn't matter at
the return state and the others will be saved by the return hooker.

But when I only save %rax, my system crashes. I don't know why so I
kept saving all scratch registers.

> I think this can be done with:
>
> _ASM_EXTABLE(1b,3b)
> _ASM_EXTABLE(2b,3b)
>
> (from: arch/x86/include/asm/asm.h)

Thanks. That and the generic move have actually been implemented by
Steven very recently:
http://lkml.org/lkml/2008/12/2/280